# Import Libraries

In [1]:
import os
import torch
import torch.utils.data
import torchvision
from PIL import Image
from pycocotools.coco import COCO
import torchvision.transforms.functional as F
import torchvision.datasets as dset
import torchvision.transforms as transforms
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

import numpy as np
import matplotlib.pyplot as plt

# Set data path

In [2]:
# data folder
dataset_path = '../data'

# train val test annotations
train_file_path = dataset_path + '/train/' + 'annotations_0_train.json'
val_file_path = dataset_path + '/val/' + 'annotations_0_val.json'
test_file_path = dataset_path + '/test/' + 'annotations_0_test.json'

# Load validation and test dataset

Initially, validation dataset is intended to check the scores during training for each epochs.\
Due to training runs crashing, the validation code was omitted.\
'Validation' and 'Test' are used to evaluate model performance.

In [3]:
val = dset.CocoDetection(root = dataset_path,
                               annFile = val_file_path,)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [4]:
test = dset.CocoDetection(root = dataset_path,
                               annFile = test_file_path,)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [5]:
# select device (whether GPU or CPU)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Defining custom metric

Retrieving images, annotations, labels and scores into a list.

In [6]:
def get_boxes(model, val, confidence_threshold=0.5):
# val is the Cocodetection dataset

    to_tensor = transforms.ToTensor()
    
    pred_boxes = []
    true_boxes = []

    idx = 0

    for img, anns in val:

        # each image can have multiple annotations
        # get list of ground truth annotations [ [idx, class_pred, prob_score, x1, y1, x2, y2], ... ]
        for ann in anns:

            ann_list = []

            img_idx_in_val = idx
            class_pred = ann['category_id']+1 # shift by +1 to account for addition of background class
            prob_score = 1 # ground truth prob is 1
            x1 = int(ann['bbox'][0])
            y1 = int(ann['bbox'][1])
            x2 = int(ann['bbox'][2] + x1)
            y2 = int(ann['bbox'][3] + y1)

            ann_list = [img_idx_in_val, class_pred, prob_score, x1, y1, x2, y2]
            true_boxes.append(ann_list)
        
        model_eval = model.eval()
        # get model detections
        detections = model_eval(to_tensor(img).to(device).unsqueeze(0))

        # set condition to return only scores above 0.5
        mask = detections[0]['scores']>confidence_threshold                    

        # length of results with scores more than 0.5
        length = len(detections[0]['scores'][mask])

        for det in range(length):

            det_list = []

            img_idx_in_val = idx
            class_pred = detections[0]['labels'][det].item()
            prob_score = detections[0]['scores'][det].item()
            x1 = int(detections[0]['boxes'][det][0].item())
            y1 = int(detections[0]['boxes'][det][1].item())
            x2 = int(detections[0]['boxes'][det][2].item())
            y2 = int(detections[0]['boxes'][det][3].item())

            det_list = [img_idx_in_val, class_pred, prob_score, x1, y1, x2, y2]
            pred_boxes.append(det_list)

        idx += 1

    return true_boxes, pred_boxes

Defining IOU metric to determine the confidence of predicted bounding boxes.

In [7]:
def intersection_over_union(boxA, boxB):
    # determine the (x, y)-coordinates of the intersection rectangle
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])

    # compute the area of intersection rectangle
    interArea = abs(max((xB - xA, 0)) * max((yB - yA), 0))
    if interArea == 0:
        return 0
    # compute the area of both the prediction and ground-truth
    # rectangles
    boxAArea = abs((boxA[2] - boxA[0]) * (boxA[3] - boxA[1]))
    boxBArea = abs((boxB[2] - boxB[0]) * (boxB[3] - boxB[1]))

    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the interesection area
    iou = interArea / float(boxAArea + boxBArea - interArea)

    # return the intersection over union value
    return iou

Define function to compute the custom metric for bounding box detection rate vs ground truth and correct classification rate vs ground truth for each image.\
Conventional mean average precision uses the average precision for each class, but unable to install 'FiftyOne' library or develop that code in time.

In [8]:
def model_metric(val, true_boxes, pred_boxes):

    correct_detection_rate = []
    class_detection_rate = []

    for i in range(len(val)):

        # for every test image

        each_image_true = []
        for t_box in true_boxes:
            if t_box[0] == i:
                each_image_true.append(t_box)

        each_image_pred = []
        for p_box in pred_boxes:
            if p_box[0] == i:
                each_image_pred.append(p_box)

        num_true = len(each_image_true)
        correct_detection = 0
        class_detection = 0
        pos_class_detection = 0
        neg_class_detection = 0

        for t_item in each_image_true:
            true_box = t_item[3:]
            true_class = t_item[1]

            good_iou_counts = 0
            bad_iou_counts = 0

            for p_item in each_image_pred:
                pred_box = p_item[3:]
                pred_class = p_item[1]

                iou = intersection_over_union(true_box, pred_box)
                if iou >= 0.5:
                    good_iou_counts += 1
                    if pred_class == true_class:
                        pos_class_detection += 1
                    else:
                        neg_class_detection += 1
                elif iou < 0.5:
                    bad_iou_counts += 1

            if good_iou_counts >= 1:
                correct_detection += 1

            if pos_class_detection >= 1:
                class_detection += 1

        detection_rate = correct_detection / num_true

        correct_detection_rate.append(detection_rate)

        class_rate = class_detection / num_true

        class_detection_rate.append(class_rate)
    
    print(f'bounding boxes detection rate: {np.mean(correct_detection_rate)*100:.0f}%')
    print(f'class detection rate: {np.mean(class_detection_rate)*100:.0f}%')
#     return np.mean(correct_detection_rate), np.mean(class_detection_rate)

# Model metric for Faster RCNN with Mobilenetv3

In [9]:
def get_model_instance_segmentation(num_classes):
    # load an instance segmentation model pre-trained pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(pretrained=True)
    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model

# 2 classes; Only target class or background
num_classes = 61

model_mobnet = get_model_instance_segmentation(num_classes)

# stop the model from keeping gradients
for param in model_mobnet.parameters():
    param.requires_grad = False

# move model to the right device
model_mobnet.to(device)

PATH = 'mobnet_model_25_34_cuda.pt'

checkpoint = torch.load(PATH, map_location=device)
model_mobnet.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

For validation set

In [12]:
tb_mn, pb_mn = get_boxes(model_mobnet, val, confidence_threshold=0.5)
model_metric(val, tb_mn, pb_mn)

bounding boxes detection rate: 36%
class detection rate: 29%


For test set

In [13]:
tb_mn, pb_mn = get_boxes(model_mobnet, test, confidence_threshold=0.5)
model_metric(test, tb_mn, pb_mn)

bounding boxes detection rate: 33%
class detection rate: 31%


# Model metric for Faster RCNN with ResNet50

In [15]:
def get_model_instance_segmentation(num_classes):
    # load an instance segmentation model pre-trained pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model

# 2 classes; Only target class or background
num_classes = 61

model_fr = get_model_instance_segmentation(num_classes)

# stop the model from keeping gradients
for param in model_mobnet.parameters():
    param.requires_grad = False

# move model to the right device
model_fr.to(device)

PATH = 'model_35_39.pt'

checkpoint = torch.load(PATH, map_location=device)
model_fr.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

For validation set

In [16]:
tb_fr, pb_fr = get_boxes(model_fr, val, confidence_threshold=0.5)
model_metric(val, tb_fr, pb_fr)

bounding boxes detection rate: 40%
class detection rate: 36%


For test set

In [17]:
tb_fr, pb_fr = get_boxes(model_fr, test, confidence_threshold=0.5)
model_metric(test, tb_fr, pb_fr)

bounding boxes detection rate: 40%
class detection rate: 40%


# Model metric for RetinaNet with ResNet50

In [19]:
# load model
model_rn = torchvision.models.detection.retinanet_resnet50_fpn(pretrained=True)

# move model to the right device
model_rn.to(device)
    
# stop the model from keeping gradients
for param in model_mobnet.parameters():
    param.requires_grad = False

PATH = 'retnet_model_30_39.pt'

checkpoint = torch.load(PATH, map_location=device)
model_rn.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [20]:
tb_rn, pb_rn = get_boxes(model_rn, val, confidence_threshold=0.5)
model_metric(val, tb_rn, pb_rn)

bounding boxes detection rate: 43%
class detection rate: 31%


In [21]:
tb_rn, pb_rn = get_boxes(model_rn, test, confidence_threshold=0.5)
model_metric(test, tb_rn, pb_rn)

bounding boxes detection rate: 41%
class detection rate: 38%
