#### Downlad the dataset

In [None]:
#Download both images and annotations
!wget -q http://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz
!wget -q http://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz

In [None]:
#Check current directory to make sure data is downloaded
!ls -l

In [None]:
#unzip the tar files downloaded abve
!tar xf images.tar.gz
!tar xf annotations.tar.gz

In [None]:
#Explore directories
!ls -l 

In [None]:
#Check the xml annotations
!ls -l annotations/xmls

In [None]:
#Install tidy to review xml files
!sudo apt-get install tidy --quiet

In [None]:
#Check one of the xml file to understand annotations
!tidy -xml -i annotations/xmls/wheaten_terrier_170.xml

#### Convert XML to CSV

In [None]:
#Move all xml files to images folder, this is needed for python script used next
!mv annotations/xmls/* images/

In [None]:
#Mount Google drive (change code for local machine). We need to copy generate_dataset.py script to current directory
from google.colab import drive
drive.mount('/gdrive')

In [None]:
#Copy generate_dataset.py file to current directory
!cp "/gdrive/My Drive/ACV/Localization/generate_dataset.py" .

In [None]:
!ls -l

In [None]:
#Build csv file for both training and test dataset
!python generate_dataset.py

In [None]:
!ls -l

#### Visualize Data

In [None]:
from matplotlib import pyplot as plt
import cv2
import numpy as np
import pandas as pd

In [None]:
#Read csv file as pandas dataframe, csv file has no header
train_df = pd.read_csv('train.csv', header=None, 
                       names=['File', 'Height','Width','xmin',
                              'ymin','xmax', 'ymax','Class','Label'])

In [None]:
print(train_df.shape)
train_df.head()

In [None]:
#Create a dictionary to hold label and corresponding class name
num_classes = train_df['Label'].unique()
label_class_dict = dict(zip(train_df['Label'], train_df['Class']))
#label_class_dict

In [None]:
num_classes

In [None]:
label_class_dict

Show images with bounding box

In [None]:
#Pickup a random image number
img_num = np.random.randint(0, train_df.shape[0])

#Read the image and draw a rectangle as per bounding box information
img = cv2.imread(train_df.loc[img_num,'File'])
cv2.rectangle(img, 
             (train_df.loc[img_num, 'xmin'],train_df.loc[img_num, 'ymin']),
             (train_df.loc[img_num, 'xmax'],train_df.loc[img_num, 'ymax']), 
             (0,255,0),
             2)
#Convert BGR format (used by opencv to RGB format used by matplotlib)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

#Draw image using matplotlib
plt.suptitle(train_df.loc[img_num, 'Class'])
plt.imshow(img)
plt.show()

In [None]:
#Read the validation csv file
test_df = pd.read_csv('validation.csv', header=None, 
                       names=['File', 'Height','Width','xmin',
                              'ymin','xmax', 'ymax','Class','Label'])

#### Define Augmentations

In [None]:
#Install imgaug
!pip install imgaug --quiet

In [None]:
import imgaug as ia
from imgaug import augmenters as iaa

In [None]:
img_size = 224
img_depth = 3

Training Augmentation Sequence

In [None]:
# Sometimes(0.5, ...) applies the given augmenter in 50% of all cases,
#sometimes = lambda aug: iaa.Sometimes(0.5, aug)

train_seq = iaa.Sequential([    
    #horizontal flips 50% of the time
    iaa.Fliplr(0.5),
    #Resize all images to a specific size    
    iaa.Resize({"height": img_size, "width": img_size}),
    # Make some images brighter and some darker.
    # In 20% of all cases, we sample the multiplier once per channel,
    # which can end up changing the color of the images.
    # change brightness, DOES NOT AFFECT BBs
    iaa.Multiply((1.0, 1.5), per_channel=0.2),
    # translate by 40/60px on x/y axis
    # Rotate between 25 and -25 degrees
    # THIS AFFECTs BBs
    iaa.Sometimes(0.5, [iaa.Affine(
        translate_percent={"x": (-0.1, 0.1), "y": (-0.1, 0.1)},
        rotate=(-25, 25),
    )])
])

Test Augmentation Sequence

In [None]:
test_seq = iaa.Sequential([    
    #Resize all images to a specific size    
    iaa.Resize({"height": img_size, "width": img_size})
])

Function to apply augmentation sequence on images

In [None]:
#Augmentation function to apply on a batch
def apply_aug(images, bboxes, aug_seq):
    
    #Number of images to process
    img_count = len(images)
    
    #Make sequence deterministic
    seq_det = aug_seq.to_deterministic()
    
    #Initialize boxes for all images
    boxes = []
    
    for i in range(img_count):
        
        b_box = ia.BoundingBox(x1 = bboxes[i][0],
                               y1 = bboxes[i][1],
                               x2 = bboxes[i][2],
                               y2 = bboxes[i][3])
        
        boxes.append(ia.BoundingBoxesOnImage([b_box], shape=images[i].shape))
    
    #Perform image and BBs augmentation
    image_aug = seq_det.augment_images(images)
    bbs_aug = seq_det.augment_bounding_boxes(boxes)
    
    b_images = np.zeros((img_count,img_size, img_size,img_depth))
    b_bboxes = np.zeros((img_count, 4))
    
    for i in range(img_count):
        b_images[i] = image_aug[i]
        box_new = bbs_aug[i].bounding_boxes[0]
        b_bboxes[i] = [box_new.x1,
                       box_new.y1, 
                       box_new.x2 - box_new.x1,
                       box_new.y2- box_new.y1]
    return b_images, b_bboxes

#### Build a Batch Generator

In [None]:
import tensorflow as tf

In [None]:
def batch_generator(df, batch_size=32, train_mode=True):

    while True:

        #Create indexes
        image_nums = np.random.randint(0,df.shape[0], size=batch_size)

        #Create empty arrays
        #1. To hold image input
        batch_images = np.zeros(shape=(batch_size, img_size, img_size, 3))
        
        #Classification Labels 
        batch_labels = np.zeros(shape=(batch_size, len(num_classes)))
        
        #Regression labels - 4 numbers per example image
        batch_bboxes = np.zeros(shape=(batch_size, 4))
        
        #List to hold all images which will be augmented
        all_images = []

        for i in range(batch_size):

            #Read image and resize
            img = tf.keras.preprocessing.image.load_img(df.loc[image_nums[i], 'File'])
            
            #Conver to numpy array - also set it to unsigned int8 data type as required by imgaug
            img_array = tf.keras.preprocessing.image.img_to_array(img).astype('uint8')

            #Update batch
            all_images.append(img_array)

            #Read image classification label & convert to one hot vector
            cl_label = df.loc[image_nums[i], 'Label']
            cl_label = tf.keras.utils.to_categorical(cl_label, num_classes=len(num_classes))
            batch_labels[i] = cl_label

            #Read and resize bounding box co-ordinates
            img_width = df.loc[image_nums[i], 'Width']
            img_height = df.loc[image_nums[i], 'Height']
            
            xmin = df.loc[image_nums[i], 'xmin']
            xmax = df.loc[image_nums[i], 'xmax']

            ymin = df.loc[image_nums[i], 'ymin']
            ymax = df.loc[image_nums[i], 'ymax']

            #We will ask model to predict xmin, ymin, width and height of bounding box
            batch_bboxes[i] = [xmin, ymin, xmax, ymax]

        #Apply augmentation
        if(train_mode):
            batch_images, batch_bboxes = apply_aug(all_images, batch_bboxes, train_seq)
        else:
            batch_images, batch_bboxes = apply_aug(all_images, batch_bboxes, test_seq)

        #Normalize batch images as per Pre-trained model to be used
        batch_images = tf.keras.applications.resnet50.preprocess_input(batch_images)
        
        #Make bounding boxes (x, y, w, h) as numbers between 0 and 1 - this seems to work better
        batch_bboxes = batch_bboxes/img_size

        #Return batch - use yield function to make it a python generator
        yield batch_images, [batch_labels, batch_bboxes]

In [None]:
gen = batch_generator(train_df, batch_size=2)

In [None]:
X, y = next(gen)

In [None]:
X.shape

#### Build the Model

Load Pre-Trained Model

In [None]:
tf.keras.backend.clear_session()
model = tf.keras.applications.resnet50.ResNet50(include_top=False, #Do not include FC layer at the end
                                          input_shape=(img_size,img_size, 3),
                                          weights='imagenet')

Freeze all layers of Pre-trained model

In [None]:
model.summary()

In [None]:
len(model.layers)

In [None]:
for layer in model.layers:
    layer.trainable = False

In [None]:
model.summary()

Add layers

In [None]:
#get Output layer of Pre-trained model
x1 = model.output

#Add Dropout
x2 = tf.keras.layers.Dropout(0.5)(x1)

#Add a convolution layer
x3 = tf.keras.layers.Conv2D(50, (1,1), activation='relu')(x2)

#Flatten the output to feed to Dense layer
x4 = tf.keras.layers.Flatten()(x3)

#Batch Norm
x5 = tf.keras.layers.BatchNormalization()(x4)

In [None]:
x1

In [None]:
x2

In [None]:
x3

In [None]:
x4

In [None]:
x5

Build layer for Label output

In [None]:
#Classification
label_output = tf.keras.layers.Dense(len(num_classes), activation='softmax', name='class_op')(x5)

Build layer for bounding box output

In [None]:
#Regression
bbox_output = tf.keras.layers.Dense(4 , activation='sigmoid', name='reg_op')(x5)

Finalize the model

In [None]:
#Non Sequential model as it has two different outputs
final_model = tf.keras.models.Model(inputs=model.input, #Pre-trained model input as input layer
                                    outputs=[label_output,bbox_output]) #Output layer added

Define IoU Metrics

In [None]:
def calculate_iou(y_true, y_pred):
    
    
    """
    Input:
    Keras provides the input as numpy arrays with shape (batch_size, num_columns).
    
    Arguments:
    y_true -- first box, numpy array with format [x, y, width, height, conf_score]
    y_pred -- second box, numpy array with format [x, y, width, height, conf_score]
    x any y are the coordinates of the top left corner of each box.
    
    Output: IoU of type float32. (This is a ratio. Max is 1. Min is 0.)
    
    """

    
    results = []
    
    for i in range(0,y_true.shape[0]):
    
        # set the types so we are sure what type we are using
        y_true = np.array(y_true, dtype=np.float32)
        y_pred = np.array(y_pred, dtype=np.float32)

        #print(y_true.shape)
        #print(y_pred.shape)
        # boxTrue
        x_boxTrue_tleft = y_true[i,0]  # numpy index selection
        y_boxTrue_tleft = y_true[i,1]
        boxTrue_width = y_true[i,2]
        boxTrue_height = y_true[i,3]
        area_boxTrue = (boxTrue_width * boxTrue_height)

        # boxPred
        x_boxPred_tleft = y_pred[i,0]
        y_boxPred_tleft = y_pred[i,1]
        boxPred_width = y_pred[i,2]
        boxPred_height = y_pred[i,3]
        area_boxPred = (boxPred_width * boxPred_height)

        # calculate the bottom right coordinates for boxTrue and boxPred

        # boxTrue
        x_boxTrue_br = x_boxTrue_tleft + boxTrue_width
        y_boxTrue_br = y_boxTrue_tleft + boxTrue_height # Version 2 revision

        # boxPred
        x_boxPred_br = x_boxPred_tleft + boxPred_width
        y_boxPred_br = y_boxPred_tleft + boxPred_height # Version 2 revision


        # calculate the top left and bottom right coordinates for the intersection box, boxInt

        # boxInt - top left coords
        x_boxInt_tleft = np.max([x_boxTrue_tleft,x_boxPred_tleft])
        y_boxInt_tleft = np.max([y_boxTrue_tleft,y_boxPred_tleft]) # Version 2 revision

        # boxInt - bottom right coords
        x_boxInt_br = np.min([x_boxTrue_br,x_boxPred_br])
        y_boxInt_br = np.min([y_boxTrue_br,y_boxPred_br]) 

        # Calculate the area of boxInt, i.e. the area of the intersection 
        # between boxTrue and boxPred.
        # The np.max() function forces the intersection area to 0 if the boxes don't overlap.
        
        
        # Version 2 revision
        area_of_intersection = \
        np.max([0,(x_boxInt_br - x_boxInt_tleft)]) * np.max([0,(y_boxInt_br - y_boxInt_tleft)])

        iou = area_of_intersection / ((area_boxTrue + area_boxPred) - area_of_intersection)


        # This must match the type used in py_func
        iou = np.array(iou, dtype=np.float32)
        
        # append the result to a list at the end of each loop
        results.append(iou)
    
    # return the mean IoU score for the batch
    return np.mean(results)



def IoU(y_true, y_pred):
    
    # Note: the type float32 is very important. It must be the same type as the output from
    # the python function above or you too may spend many late night hours 
    # trying to debug and almost give up.
    
    iou = tf.py_function(calculate_iou, [y_true, y_pred], tf.float32)

    return iou

In [None]:
model_checkpoint = tf.keras.callbacks.ModelCheckpoint("pets.h5", save_best_only=True, verbose=1)

In [None]:
final_model.compile(optimizer='adam', 
                    #loss=['categorical_crossentropy', 'mae'],
                    loss={'reg_op':'mse', 'class_op':'categorical_crossentropy'},
                    loss_weights={'reg_op':1, 'class_op':1},
                    metrics={'reg_op':[IoU], 'class_op':['accuracy']})

In [None]:
final_model.summary()

Train the model

In [None]:
#Create train and test generator
batchsize = 64
train_generator = batch_generator(train_df, batch_size=batchsize, train_mode=True) #batchsize can be changed
test_generator = batch_generator(test_df, batch_size=batchsize, train_mode=False)

In [None]:
final_model.fit(train_generator,
                epochs=3,
                steps_per_epoch= train_df.shape[0]//batchsize,
                validation_data=test_generator,
                validation_steps = test_df.shape[0]//batchsize, 
                callbacks=[model_checkpoint])

In [None]:
final_model.compile(optimizer='adam', 
                    loss={'reg_op':'mse', 'class_op':'categorical_crossentropy'},
                    loss_weights={'reg_op':20, 'class_op':1},
                    metrics={'reg_op':[IoU], 'class_op':['accuracy']})

In [None]:
final_model.fit(train_generator,
                epochs=50,
                initial_epoch=3,
                steps_per_epoch= train_df.shape[0]//batchsize,
                validation_data=test_generator,
                validation_steps = test_df.shape[0]//batchsize, 
                callbacks=[model_checkpoint])

In [None]:
!ls -l

In [None]:
final_model = tf.keras.models.load_model('pets.h5', custom_objects={'IoU':IoU})

In [None]:
final_model.save('/gdrive/My Drive/pets.h5')

In [None]:
final_model

#### Model Prediction

In [None]:
def predict_and_draw(image_num, df):

    #Load image
    img = tf.keras.preprocessing.image.load_img(df.loc[image_num, 'File'])
    w, h = img.size

    #Read actual label and bounding box
    act_class = df.loc[image_num, 'Class']
    xmin, ymin, xmax, ymax = df.loc[image_num, ['xmin', 'ymin', 'xmax', 'ymax']]
    
    #Prepare input for model
    #1. Resize image
    img_resized = img.resize((img_size, img_size)) 
    #2. Conver to array and make it a batch of 1
    input_array = tf.keras.preprocessing.image.img_to_array(img_resized)
    input_array = np.expand_dims(input_array, axis=0)
    #3. Normalize image data
    input_array = tf.keras.applications.resnet50.preprocess_input(input_array)

    #Prediction
    pred = final_model.predict(input_array)
    #Get classification and regression predictions
    label_pred, bbox_pred = pred[0][0], pred[1][0]
    print(label_pred)
    print(bbox_pred)
    #Get Label with highest probability
    pred_class = label_class_dict[np.argmax(label_pred)]

    print('Real Label :', act_class, '\nPredicted Label: ', pred_class)
    
    #Draw bounding boxes - Actual (Red) and Predicted(Green)
    img = cv2.imread(df.loc[image_num, 'File'])
    #Draw actual bounding box
    img = cv2.rectangle(img, (xmin, ymin), 
                        (xmax, ymax), (0,0,255), 2)
    #Draw predicted bounding box
    img = cv2.rectangle(img, (int(bbox_pred[0]*w), int(bbox_pred[1]*h)), 
                        (int((bbox_pred[0]+bbox_pred[2])*w), int((bbox_pred[1]+bbox_pred[3])*h)), (0,255,0), 2)

    #Display the picture
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.imshow(img)
    plt.show()

In [None]:
#Predict on Test Dataset
image_num = np.random.randint(0, test_df.shape[0])
predict_and_draw(437, test_df)

In [None]:
image_num