In [1]:
pip install --upgrade transformers



In [2]:
pip install --upgrade keras



In [3]:
import random
import pprint
import sys
import time
import numpy as np
from optparse import OptionParser
import pickle
import math
import cv2
import copy
from matplotlib import pyplot as plt
import tensorflow as tf
import pandas as pd
import os

from sklearn.metrics import average_precision_score

from keras import backend as K
from keras.optimizers import Adam, SGD, RMSprop
from keras.layers import Flatten, Dense, Input, Conv2D, MaxPooling2D, Dropout
from keras.layers import GlobalAveragePooling2D, GlobalMaxPooling2D, TimeDistributed
from keras.utils import get_source_inputs
#from keras.utils import layer_utils
from tensorflow.python.keras.utils import layer_utils


from tensorflow.python.keras.utils import generic_utils
from keras.utils import get_file
from keras.metrics import categorical_crossentropy

from keras.models import Model
from tensorflow.python.keras.utils import generic_utils
from keras.layers import Layer,InputSpec
from keras import initializers, regularizers

In [4]:
def partial_vgg(input_tensor=None):


    input_shape = (None, None, 3)

    img_input = Input(shape=input_shape)

    # Block 1
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input)
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)
    print(x)

    # Block 2
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)
    print(x)

    # Block 3
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)
    print(x)

    # Block 4
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)
    print(x)

    # Block 5
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x)
    # x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)

    # We are not using fully connected layers (3 fc layers) as we need feature maps as output from this network.

    return x
#RPN layer

def rpn_layer(base_layers, num_anchors):

    #cnn_used for creating feature maps: vgg, num_anchors: 9
    x = Conv2D(512, (3, 3), padding='same', activation='relu')(base_layers)

    #classification layer: num_anchors (9) channels for 0, 1 sigmoid activation output
    x_class = Conv2D(num_anchors, (1, 1), activation='sigmoid')(x)

    #regression layer: num_anchors*4 (36) channels for computing the regression of bboxes
    x_regr = Conv2D(num_anchors * 4, (1, 1), activation='linear')(x)

    return [x_class, x_regr, base_layers] #classification of object(0 or 1),compute bounding boxes, base layers vgg


In [5]:
class RoiPoolingConv(Layer):

    def __init__(self, pool_size, num_rois, **kwargs):

        self.dim_ordering = K.image_dim_ordering()
        self.pool_size = pool_size
        self.num_rois = num_rois

        super(RoiPoolingConv, self).__init__(**kwargs)

    def build(self, input_shape):
        self.nb_channels = input_shape[0][3]

    def compute_output_shape(self, input_shape):
        return None, self.num_rois, self.pool_size, self.pool_size, self.nb_channels

    def call(self, x, mask=None):

        assert(len(x) == 2)

        # x[0] is image with shape (rows, cols, channels)
        img = x[0]

        # x[1] is roi with shape (num_rois,4) with ordering (x,y,w,h)
        rois = x[1]

        input_shape = K.shape(img)

        outputs = []

        for roi_idx in range(self.num_rois):

            x = rois[0, roi_idx, 0]
            y = rois[0, roi_idx, 1]
            w = rois[0, roi_idx, 2]
            h = rois[0, roi_idx, 3]

            x = K.cast(x, 'int32')
            y = K.cast(y, 'int32')
            w = K.cast(w, 'int32')
            h = K.cast(h, 'int32')

            # Resized roi of the image to pooling size (7x7)
            rs = tf.image.resize_images(img[:, y:y+h, x:x+w, :], (self.pool_size, self.pool_size))
            outputs.append(rs)


        final_output = K.concatenate(outputs, axis=0)
                # Reshape to (1, num_rois, pool_size, pool_size, nb_channels) : (1, 4, 7, 7, 3)
        final_output = K.reshape(final_output, (1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels))

        # permute_dimensions is similar to transpose
        final_output = K.permute_dimensions(final_output, (0, 1, 2, 3, 4))

        return final_output


    def get_config(self):
        config = {'pool_size': self.pool_size,
                  'num_rois': self.num_rois}
        base_config = super(RoiPoolingConv, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

In [6]:
def classifier_layer(base_layers, input_rois, num_rois, nb_classes = 4):

    # base_layers: vgg
    #input_rois: `(1,num_rois,4)` list of rois, with ordering (x,y,w,h)
    #num_rois: number of rois to be processed in one time (4 in here)

    input_shape = (num_rois,7,7,512)

    pooling_regions = 7

    # out_roi_pool.shape = (1, num_rois, channels, pool_size, pool_size)
    # num_rois (4) 7x7 roi pooling
    out_roi_pool = RoiPoolingConv(pooling_regions, num_rois)([base_layers, input_rois])

    # Flatten the convlutional layer and connected to 2 FC and 2 dropout
    out = Flatten(name='flatten')(out_roi_pool) #expanded into a vector with 25,088 (7×7×512) channels.
    out = Dense(4096, activation='relu', name='fc1')(out)
    out = Dropout(0.5)(out)
    out = Dense(4096, activation='relu', name='fc2')(out)
    out = Dropout(0.5)(out)

    # two output layer- classifier and regressor
    # for classify the class name of the object
    out_class = Dense(nb_classes, activation='softmax', kernel_initializer='zero'), name=='dense_class_{}'.format(nb_classes)(out)

    #for bboxes coordinates regression
    out_regr = Dense(4 * (nb_classes-1), activation='linear', kernel_initializer='zero'), name=='dense_regress_{}'.format(nb_classes)(out)

    return [out_class, out_regr]

fffffffffffffffffffffffffffffffffffffffffff


In [7]:
import numpy as np
import pandas as pd
import os
import gzip

In [8]:
import random
from PIL import Image, ImageDraw
from collections import Counter
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

import torch
import torchvision
from torchvision import transforms as T
from torchvision.models.detection. faster_rcnn import FastRCNNPredictor

import gzip
import sys
import zipfile
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [9]:
with zipfile.ZipFile(r'drive/MyDrive/face_img.zip', 'r') as zip_ref:
    zip_ref.extractall('data_folder')
train =pd.read_csv(r"data_folder/faces.csv")
train=train[['image_name','x0','y0','x1','y1']]

In [10]:
train.head()
train=train.head(100)

In [11]:
#import zip


In [12]:
unique_imgs=train.image_name.unique()
len(unique_imgs)

#12.29
unique_imgs

array(['00001722.jpg', '00001044.jpg', '00001050.jpg', '00001736.jpg',
       '00003121.jpg', '00000400.jpg', '00002571.jpg', '00000366.jpg',
       '00002565.jpg', '00001939.jpg', '00001087.jpg', '00001093.jpg',
       '00001905.jpg', '00001911.jpg', '00002997.jpg', '00003451.jpg',
       '00001246.jpg', '00001520.jpg', '00002029.jpg', '00003337.jpg',
       '00003323.jpg', '00001252.jpg', '00002983.jpg', '00003445.jpg',
       '00002015.jpg', '00000602.jpg', '00000616.jpg', '00002001.jpg',
       '00002767.jpg', '00000170.jpg', '00002954.jpg', '00003492.jpg',
       '00000825.jpg', '00000831.jpg', '00002798.jpg', '00002940.jpg',
       '00003486.jpg', '00000819.jpg', '00001332.jpg', '00000992.jpg',
       '00003243.jpg', '00000986.jpg', '00001440.jpg', '00001326.jpg',
       '00002607.jpg', '00002161.jpg', '00001468.jpg', '00000762.jpg',
       '00002613.jpg', '00000004.jpg', '00000789.jpg', '00000951.jpg',
       '00001497.jpg', '00001483.jpg', '00002808.jpg', '00003041.jpg',
      

In [13]:
train=train.drop_duplicates(subset='image_name')
len(train)
'''
for row in train.iterrows():
  if row['image_name'] not in unique_imgs:
    print(row['image_name'])
    '''
len(train)

57

In [14]:
class CustDat(torch.utils.data.Dataset):

  def __init__(self, df, unique_imgs, indices):
    self.df=df
    self.unique_imgs=unique_imgs
    self.indices=indices

  def __len__(self):
    return len(self.indices)

  def __getitem__(self, idx):
    image_name = self.unique_imgs[self.indices[idx]]

    boxes=self.df[self.df.image_name == image_name].values[:, 1:].astype("float")
    img=Image.open("data_folder/images/" + image_name).convert('RGB')
    labels = torch.ones((boxes.shape[0]), dtype = torch.int64)
    target = {}
    target["boxes"] =torch.tensor (boxes)
    target["label"] = labels
    return T. ToTensor() (img), target

In [15]:
train_inds,val_inds=train_test_split(range(train.shape[0]),test_size=0.1)
def custom_collate(data):
  return data

In [16]:
train_dl = torch.utils.data.DataLoader(CustDat(train,train.image_name.unique(),train_inds),
                                batch_size=10,
                                shuffle=True,
                                collate_fn=custom_collate,
                                pin_memory= True if torch.cuda.is_available() else False)

val_dl = torch.utils.data.DataLoader(CustDat(train,train.image_name.unique(),val_inds),
                                batch_size=8,
                                shuffle=True,
                                collate_fn=custom_collate,
                                pin_memory= True if torch.cuda.is_available() else False)


In [17]:
model=torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained =True)
num_classes=3
in_features=model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor=FastRCNNPredictor(in_features,num_classes)



In [18]:
device=torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cuda')

In [19]:
optimizer=torch.optim.SGD(model.parameters(),lr=0.001,momentum=0.9,weight_decay=0.0005)
num_epochs=2

In [20]:
model.to(device)
for epochs in range(num_epochs):
  epoch_loss=0
  for data in train_dl:
    imgs=[]
    targets=[]
    for d in data:
      imgs.append(d[0].to(device))
      targ={}
      targ['boxes']=d[1]["boxes"].to(device)
      targ["labels"]=d[1]["label"].to(device)
      targets.append(targ)
    loss_dict=model(imgs,targets)
    loss=sum(v for v in loss_dict.values())
    epoch_loss+=loss.cpu().detach().numpy()
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
  print(epoch_loss)

OutOfMemoryError: ignored

In [None]:
model.eval()
data=iter(val_dl).__next__()

In [None]:
img=data[0][0]
boxes=data[0][1]["boxes"]
labels=data[0][1]["label"]

In [None]:
output=model([img.to(device)])

In [None]:
out_bbox=output[0]["boxes"]
out_scores=output[0]["scores"]

In [None]:
keep=torchvision.ops.nms(out_bbox,out_scores,.45)
out_bbox.shape,keep.shape

In [None]:
im = (img.permute(1,2,0).cpu().detach().numpy()=255).astype('uint8')

In [None]:
vsample=Image.fromarray(im)
draw=ImageDraw.draw(vsample)
for box in boxes:
  draw.rectangle(list(box),fill=None,outline='red')
vsample