# Object detection #4
sources: https://pyimagesearch.com/2020/07/13/r-cnn-object-detection-with-keras-tensorflow-and-deep-learning/, https://towardsdatascience.com/step-by-step-r-cnn-implementation-from-scratch-in-python-e97101ccde55

Implement an RCNN object detector given the techniques we've learnt previously.

We will use selective search to generate ROIs then use these ROIs as our training data to classify objects from our dataset.

## Summary
RCNN object detection and classification pipeline:

1) Build object detection dataset with selective search (calculate IOU on proposed region with ground truth data and add labels to proposed regions)

2) Fine-tune classification model on dataset (utilize transfer learning)

3) During inference run selective search on input image

4) Make predictions on each ROI using fine tuned model, apply NMS and return results

*IOU: intersection over union

## Dependencies

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import cv2
import time
import os

## NMS
non-maxima suppression (NMS) removes redundant bboxes by discarding those that has overlaps above a given threshold

In [2]:
# Felzenszwalb et al.
# boxes should be defined as [(start_x, start_y, end_x, end_y),...]
def NMS(boxes, overlapThresh):
    if len(boxes) == 0:
        return []
    # picked indexes
    pick = []
    # coords of all bboxes
    x1 = boxes[:,0]
    y1 = boxes[:,1]
    x2 = boxes[:,2]
    y2 = boxes[:,3]
    # compute area of bboxes
    area = (x2 - x1 + 1) * (y2 - y1 + 1)
    # get indexes sorted by bottom right coord
    idxs = np.argsort(x1)
    # while there are still bboxes not checked
    while len(idxs) > 0:
        # grab the last idx
        last = len(idxs) - 1
        i = idxs[last]
        pick.append(i) # list of boxes picked (curr is cfm picked as overlapped bboxes are discarded beforehand)
        suppress = [] # list of boxes to be deleted
        idxs = idxs[:-1]
        # compare all bboxes with last
        for pos in range(0, last):
            j = idxs[pos]
            # compare box i and j for overlap
            xx1 = max(x1[i], x1[j])
            yy1 = max(y1[i], y1[j])
            xx2 = min(x2[i], x2[j])
            yy2 = min(y2[i], y2[j])
            w = max(0, xx2 - xx1 + 1)
            h = max(0, yy2 - yy1 + 1)
            overlap = float(w * h) / area[j]
            # overlap above threshold? Pos to be deleted
            if overlap > overlapThresh:
                suppress.append(pos)
        # delete all indexes from the index list that are in suppression list
        idxs = np.delete(idxs, suppress)
    # only picked boxes returned
    return boxes[pick]

## Selective search

In [3]:
def selective_search(img, print_info=False):
    ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()
    ss.setBaseImage(img)
    # use fast quality
    ss.switchToSelectiveSearchFast()
    start = time.time()
    rects = ss.process()
    end = time.time()
    if print_info:
        print('Selective search took {:.4f}s'.format(end - start))
        print('{} total region proposals'.format(len(rects)))
    return rects

## IOU
IOU value = overlap area : non-overlap area. For the dataset building phase we need to get the IOU of the ground truth box with the ROI from selective search.

In [4]:
def get_iou(bb1, bb2):
    x_1 = max(bb1[0], bb2[0])
    y_1 = max(bb1[1], bb2[1])
    x_2 = min(bb1[2], bb2[2])
    y_2 = min(bb1[3], bb2[3])
    # no contact
    if x_2 < x_1 or y_2 < y_1:
        return 0.0
    intersection_area = (x_2 - x_1) * (y2 - y1)
    combined_area = (bb1[2] - bb1[0]) * (bb1[3] - bb1[1]) + (bb2[2] - bb2[0]) * (bb2[3] - bb2[1])
    return intersection_area / float(combined_area - intersection_area)

## Constants

In [8]:
WIDTH = 600
INPUT_SIZE = (224, 224) # for classifier

## Build object detection dataset
We will train our model to detect airplanes using the airplanes dataset.

Pre-process and create the dataset for our classifier. In our case we can have 2 classes for each ROI: foreground (airplane) or background. We will set the label of foreground (airplane) as 1 and background as 0.

In [None]:
def process_data(img, rects, gtvalues):
    imout = img.copy()
    counter = 0
    falsecounter = 0
    flag = 0
    fflag = 0
    bflag = 0
    # for each roi
    for e, result in enumerate(rects):
        # we only retrive first 2000 region proposals
        if e < 2000 and flag == 0:
            # for each gt bbox
            for gtval in gtvalues:
                (x, y, w, h) = result
                iou = get_iou(gtval, (x, y, x + w, y + h))
                # limit to 30 valid ROIs
                if counter < 30:
                    if iou > 0.7:
                        timage = imout[y:y + h, x:x + w]
                        resized = cv2.resize(timage, INPUT_SIZE, interpolation = cv2.INTER_AREA)

In [7]:
x_train = []
y_train = []
iter_count = 0
for e, i in enumerate(os.listdir('data/Airplanes_Annotations')):
    if iter_count == 1:
        break
    if i.startswith('airplane'):
        # extract image and annotation
        filename = i.split('.')[0] + '.jpg'
        img = cv2.imread('data/Images/' + filename)
        img = np.flip(img, axis=-1)
        df = pd.read_csv('data/Airplanes_Annotations/' + i)
        # format annots
        gtvalues=[]
        for row in df.iterrows():
            coords = row[1][0].split(' ')
            # store annotated gt coords
            gtvalues.append(tuple([int(x) for x in coords]))
        # apply selective search
        rects = selective_search(img, True)
        # process data with IOU between gt and roi
        
        iter_count += 1

Selective search took 0.4151s
308 total region proposals
