In [1]:
import sys
sys.path.insert(1, "../")
sys.path.insert(1, "../Models/")
import torch
from torch.utils.data import Subset
import torch.nn.functional as F
import torch.nn as nn
from detection_datasets import *
from yolov2 import YOLOv2D19 as YOLOv2
from data_preprocessing import get_norms
from albumentations.pytorch import ToTensorV2
import albumentations as A
from utils import IoU

  check_for_updates()


In [2]:
print(f"Allocated memory: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")
import gc

# Invoke garbage collector
gc.collect()

# Clear GPU cache
torch.cuda.empty_cache()
print(f"Allocated memory: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")

Allocated memory: 0.00 MB
Allocated memory: 0.00 MB


In [3]:
import pickle
with open('../Models/anchors_VOC0712trainval.pickle', 'rb') as handle:
    anchors = pickle.load(handle)

In [4]:
# device = torch.device('cuda:0')
device = torch.device('cpu')
dtype=torch.float32

In [5]:
norms = get_norms('../../datasets/VOCdevkit/trainval_norms.json')
means = norms['means']
stds = norms['stds']

In [6]:
model = YOLOv2(state_dict_path='./darknet19_72.96.pth', device=device, dtype=dtype, num_anchors=5)

  state_dict = torch.load(state_dict_path, map_location=self.device)


In [7]:
transforms = A.Compose([
    A.Resize(width=416, height=416),
    # A.VerticalFlip(p=1.0),
    A.Normalize(mean=means, std=stds),
    ToTensorV2()
], bbox_params=A.BboxParams(format='pascal_voc'))
train_set = VOCDatasetV2(devkit_path = '../../datasets/VOCdevkit/', 
                         subsets = [('VOC2012', 'trainval')],
                         scales=[13], anchors=anchors, transforms=transforms, 
                         dtype=dtype, device=device)

True ../../datasets/VOCdevkit/VOC2012\ImageSets\Main\trainval.txt


In [8]:
def raw_to_expected_outputsV2(out, num_classes, anchors):
    # input: (N, objects w/ relative coords, grid_size, grid_size)
    # single obj.: (objectness, box, classes) * num_boxes
    # [conf, obj_xc, obj_yc, obj_w, obj_h]
    obj_stride = num_classes+5
    out[:, 0::obj_stride, :, :] = out[:, 0::obj_stride, :, :].sigmoid() # objectness
    out[:, 1::obj_stride, :, :] = out[:, 1::obj_stride, :, :].sigmoid() # xc
    out[:, 2::obj_stride, :, :] = out[:, 2::obj_stride, :, :].sigmoid() # yc
    
    grid_size = out.shape[-1]
    _anchors = torch.tensor(anchors).to(out.device) * grid_size
    pw = _anchors[:, 0]
    ph = _anchors[:, 1]
    
    out[:, 3::obj_stride, :, :] = pw[None, :, None, None] * out[:, 3::obj_stride, :, :].exp() # w
    out[:, 4::obj_stride, :, :] = ph[None, :, None, None] * out[:, 4::obj_stride, :, :].exp() # h

    for i in range(len(anchors)):
        start_i = 5+i*obj_stride
        end_i = obj_stride*(i+1)
        out[:, start_i:end_i, :, :] = F.softmax(out[:, start_i:end_i, :, :], dim=1, dtype=out.dtype)
        
    # output: (N, objects w/ absolute coords, grid_size, grid_size)
    return out    

In [9]:
def get_absolute_boxesV2(out, num_classes, num_boxes):
    # bringing to outputs relative to the entire image
    grid_size = out.shape[-1]
    indexed_columns = torch.tensor([range(0,grid_size) for i in range(grid_size)], dtype=out.dtype, device=out.device)
    obj_stride = num_classes + 5
    out[:, 1::obj_stride, :, :] = (out[:, 1::obj_stride, :, :] + indexed_columns) / grid_size # xc
    out[:, 2::obj_stride, :, :] = (out[:, 2::obj_stride, :, :] + indexed_columns.T) / grid_size # yc
    
    out[:, 3::obj_stride, :, :] = out[:, 3::obj_stride, :, :] / grid_size # w
    out[:, 4::obj_stride, :, :] = out[:, 4::obj_stride, :, :] / grid_size # h
    return out

In [10]:
def reshape_for_eval(out, num_classes, num_boxes):
    batch_size = out.shape[0]
    obj_stride = num_classes + 5
    grid_size = out.shape[-1]
    out = out.view(batch_size, obj_stride*num_boxes, grid_size*grid_size)
    out = torch.concat(torch.split(out, obj_stride, 1), -1)
    out = out.permute(0,2,1) 
    
    # batch_size, number of objects, (classes + coords + objectness) * num_boxes
    return out

In [11]:
def sort_by_objectness(out):
    batch_size = out.shape[0]
    indeces = out[:, :, 0].argsort(descending=True)
    out = out[torch.arange(batch_size).unsqueeze(1), indeces]
    return out

In [12]:
def remove_below_threshold(out, obj_threshold):
    batch_size = out.shape[0]
    mask = out[:, :, 0] > obj_threshold
    return \
    [out[i, mask[i], :] for i in range(batch_size)]

In [13]:
def iou(box1, box2):
    # expects midpoint data
    xmin1 = box1[..., 0] - box1[..., 2] / 2 
    xmax1 = box1[..., 0] + box1[..., 2] / 2 
    ymin1 = box1[..., 1] - box1[..., 3] / 2
    ymax1 = box1[..., 1] + box1[..., 3] / 2

    xmin2 = box2[..., 0] - box2[..., 2] / 2 
    xmax2 = box2[..., 0] + box2[..., 2] / 2 
    ymin2 = box2[..., 1] - box2[..., 3] / 2
    ymax2 = box2[..., 1] + box2[..., 3] / 2

    xmin_i = torch.stack([xmin1, xmin2]).max(dim=0)[0]
    xmax_i = torch.stack([xmax1, xmax2]).min(dim=0)[0]
    ymin_i = torch.stack([ymin1, ymin2]).max(dim=0)[0]
    ymax_i = torch.stack([ymax1, ymax2]).min(dim=0)[0]

    intersection = F.relu(xmax_i-xmin_i) * F.relu(ymax_i-ymin_i)

    area1 = (xmax1 - xmin1) * (ymax1 - ymin1)
    area2 = (xmax2 - xmin2) * (ymax2 - ymin2)

    return intersection / (area1 + area2 - intersection + 1e-6)

In [14]:
def NMS(pred_boxes, num_classes=20, iou_threshold=0.5):

    selected = []
    for img in pred_boxes:
        pred_class = img[:, 5]
        selected_for_img = []
        for cls in range(num_classes):
            # get objects of that class
            indeces = (pred_class == cls).nonzero(as_tuple=True)[0]
            if indeces.shape[0] < 1:
                continue
            objects = img[indeces]
            reference = objects[:1][:, 1:5] # highest IoU
            compared = objects[1:][:, 1:5]
            reference = reference.expand(compared.shape[0], 4)
            ious = iou(reference, compared)
            ious = torch.concat((torch.tensor([0.0]), ious))
            selected_for_img.append(indeces[ious < iou_threshold])
        selected_for_img = torch.concat(selected_for_img)
        selected.append(selected_for_img)

    return selected

In [15]:
def get_class_labels(eval_out):
    return \
        [torch.concat((img[:, :5], img[:, 5:].argmax(dim=1).unsqueeze(-1)), dim=-1) for img in eval_out]

In [16]:
def get_pred_boxes(out, anchors, num_classes=20, num_boxes=5, obj_threshold=0.5):
    out = out.detach()
    out = raw_to_expected_outputsV2(out, num_classes, anchors)
    out = get_absolute_boxesV2(out, num_classes, num_boxes)
    out = reshape_for_eval(out, num_classes, num_boxes)
    out = sort_by_objectness(out)
    out = remove_below_threshold(out, obj_threshold)
    return get_class_labels(out)

In [17]:
out = model(torch.stack([train_set[0][0], train_set[1][0]], dim=0))
pred_boxes = get_pred_boxes(out, anchors, obj_threshold=0.5)

In [18]:
indeces = pred_boxes[0][:, -1]==0

In [19]:
pred_boxes[0][indeces]

tensor([[0.6314, 0.6468, 0.6613, 0.1935, 0.3819, 0.0000],
        [0.6164, 0.7331, 0.5818, 0.1386, 0.2208, 0.0000],
        [0.6153, 0.8266, 0.5825, 0.0843, 0.3427, 0.0000],
        [0.6110, 0.9609, 0.4969, 0.1278, 0.2824, 0.0000],
        [0.6004, 0.9631, 0.5711, 0.1471, 0.2422, 0.0000],
        [0.5919, 0.2511, 0.9675, 0.3294, 0.8659, 0.0000],
        [0.5907, 0.7242, 0.9688, 0.6743, 0.9677, 0.0000],
        [0.5897, 0.9582, 0.3501, 0.1758, 0.3197, 0.0000],
        [0.5849, 0.8114, 0.9668, 0.2308, 0.4636, 0.0000],
        [0.5832, 0.7309, 0.7424, 0.2150, 0.3891, 0.0000],
        [0.5735, 0.5816, 0.1088, 0.4373, 0.5547, 0.0000],
        [0.5640, 0.7980, 0.9665, 0.4175, 0.6108, 0.0000],
        [0.5579, 0.4173, 0.6546, 0.1554, 0.2249, 0.0000],
        [0.5566, 0.0452, 0.8089, 0.0998, 0.3293, 0.0000],
        [0.5474, 0.7292, 0.8072, 0.3169, 0.2632, 0.0000],
        [0.5460, 0.6654, 0.9609, 0.2580, 0.5515, 0.0000],
        [0.5447, 0.8152, 0.5049, 0.1005, 0.3775, 0.0000],
        [0.538

In [20]:
sel = NMS(pred_boxes, num_classes=20, iou_threshold=0.5)

In [21]:
pred_boxes[0][sel[0]]

tensor([[ 0.6314,  0.6468,  0.6613,  0.1935,  0.3819,  0.0000],
        [ 0.6164,  0.7331,  0.5818,  0.1386,  0.2208,  0.0000],
        [ 0.6153,  0.8266,  0.5825,  0.0843,  0.3427,  0.0000],
        ...,
        [ 0.5028,  0.1103,  0.5792,  1.1056,  0.8730, 19.0000],
        [ 0.5011,  0.2713,  0.9631,  1.3034,  0.2699, 19.0000],
        [ 0.5004,  0.4235,  0.2003,  0.6272,  0.2911, 19.0000]])

In [22]:
pred_boxes[0]

tensor([[ 0.8023,  0.8883,  0.2078,  0.0276,  0.0833, 16.0000],
        [ 0.7804,  0.8784,  0.1091,  0.1218,  0.1980,  3.0000],
        [ 0.7681,  0.7237,  0.6677,  0.0429,  0.0765, 12.0000],
        ...,
        [ 0.5011,  0.9649,  0.3507,  0.0458,  0.1759, 16.0000],
        [ 0.5011,  0.2713,  0.9631,  1.3034,  0.2699, 19.0000],
        [ 0.5004,  0.4235,  0.2003,  0.6272,  0.2911, 19.0000]])

# mAP

In [23]:
def get_gt_boxes(gt_out, num_classes=20, num_boxes=5, obj_threshold=0.5):
    gt_out = gt_out.detach()
    gt_out = get_absolute_boxesV2(gt_out, num_classes, num_boxes)
    gt_out = reshape_for_eval(gt_out, num_classes, num_boxes)
    gt_out = sort_by_objectness(gt_out)
    gt_out = remove_below_threshold(gt_out, obj_threshold)
    return get_class_labels(gt_out)

In [24]:
gt_out = torch.stack([train_set[0][1], train_set[1][1]], dim=0)
gt_boxes = get_gt_boxes(gt_out)

In [25]:
gt_boxes

[tensor([[ 1.0000,  0.4051,  0.4823,  0.8280,  0.7520, 19.0000]]),
 tensor([[ 1.0000,  0.6835,  0.1117,  0.0420,  0.1592, 14.0000],
         [ 1.0000,  0.4691,  0.5934,  0.9080,  0.9670, 18.0000]])]

In [27]:
gt_out.shape

torch.Size([2, 125, 13, 13])

In [56]:
out = model(torch.stack([train_set[0][0], train_set[1][0]], dim=0))
pred_boxes = get_pred_boxes(out, anchors, obj_threshold=0.5)

In [57]:
gt_out = torch.stack([train_set[0][1], train_set[1][1]], dim=0)
gt_out1 = gt_out.detach().clone()
gt_boxes = get_gt_boxes(gt_out)
gt_boxes1 = get_gt_boxes(gt_out1)

In [58]:
res = mAP(gt_boxes, gt_boxes1)
res

tensor([0.])


tensor([[ 0.0000,  1.0000,  0.4051,  0.4823,  0.8280,  0.7520, 19.0000,  1.0000,
          1.0000,  1.0000],
        [ 1.0000,  1.0000,  0.6835,  0.1117,  0.0420,  0.1592, 14.0000,  1.0000,
          1.0000,  1.0000],
        [ 1.0000,  1.0000,  0.4691,  0.5934,  0.9080,  0.9670, 18.0000,  1.0000,
          1.0000,  1.0000]])

In [32]:
res[res[:, 6].argsort(descending=True)]

tensor([[ 0.0000,  1.0000,  0.4051,  0.4823,  0.8280,  0.7520, 19.0000,  1.0000,
          1.0000,  1.0000],
        [ 1.0000,  1.0000,  0.4691,  0.5934,  0.9080,  0.9670, 18.0000,  1.0000,
          1.0000,  1.0000],
        [ 1.0000,  1.0000,  0.6835,  0.1117,  0.0420,  0.1592, 14.0000,  1.0000,
          1.0000,  1.0000]])

In [262]:
gt_boxes

[tensor([[ 1.0000,  0.4051,  0.4823,  0.8280,  0.7520, 19.0000]]),
 tensor([[ 1.0000,  0.6835,  0.1117,  0.0420,  0.1592, 14.0000],
         [ 1.0000,  0.4691,  0.5934,  0.9080,  0.9670, 18.0000]])]

# Develop test cases

In [87]:
from torcheval.metrics.aggregation.auc import AUC

In [134]:
def mAP(pred_boxes, gt_boxes, num_classes=20, iou_threshold=0.5):

    # create one tensor where 0 dim is number of objects and 1 dim is an object.
    # [[img_i, objectness_score, xc, yc, w, h, class_label, is_true_positive, precision, recall]]
    batch_size = len(gt_boxes)
    for img_i in range(batch_size):
        img_pred = pred_boxes[img_i]
        num_objects = img_pred.shape[0]
        img_i_column = torch.full((num_objects, 1), img_i, dtype=img_pred.dtype, device=img_pred.device)
        true_positives_column = torch.zeros(num_objects, dtype=img_pred.dtype, device=img_pred.device).unsqueeze(-1)
        metrics_columns = torch.full((num_objects, 2), -1, dtype=img_pred.dtype, device=img_pred.device)
        new_img_pred = torch.cat((img_i_column, img_pred, true_positives_column, metrics_columns), dim=-1)
        pred_boxes[img_i] = new_img_pred
    pred_boxes = torch.concat(pred_boxes)

    # figure out if predicted objects are true positives
    for pred_obj_i in range(pred_boxes.shape[0]):
        pred_obj = pred_boxes[pred_obj_i:pred_obj_i+1]
        img_i = int(pred_obj[0, 0])
        _class = pred_obj[0, 6]
        true_objs = gt_boxes[img_i][
                            (gt_boxes[img_i][:, 5] == _class).nonzero(as_tuple=True)[0]
        ]

        if true_objs.numel() == 0:
            continue
        pred_obj = pred_obj[:, 2:6]
        pred_obj = pred_obj.expand(true_objs.shape[0], 4)
        true_positive = torch.sum(
            iou(pred_obj, true_objs[:, 1:5]) > iou_threshold
        ).item() > 0
    
        if true_positive:
            pred_boxes[pred_obj_i, 7] = 1.0

    # sort by objectness score
    pred_boxes = pred_boxes[pred_boxes[:, 1].argsort(descending=True)]

    # calculate precision and recall for every class
    pred_class = pred_boxes[:, 6]
    results = {}
    for cls in range(num_classes):
        indeces_of_cls = (pred_class == cls).nonzero(as_tuple=True)[0]
        num_objects_of_cls = indeces_of_cls.shape[0]
        for obj_i in range(num_objects_of_cls):         
            objects = pred_boxes[indeces_of_cls[:obj_i+1]]
    
            # precision tp / (tp+fp)
            tp = objects[:, 7].sum()
            tp_plus_fp = objects.shape[0]
            precision = tp / tp_plus_fp
    
            # recall tp / (tp + fn)
            recall = tp / num_objects_of_cls

            pred_boxes[indeces_of_cls[obj_i], 8] = precision
            pred_boxes[indeces_of_cls[obj_i], 9] = recall

        if num_objects_of_cls > 0:
            metric = AUC()
            precision_scores = torch.cat([torch.tensor([1.]), pred_boxes[indeces_of_cls, 8]])
            recall_scores = torch.cat([torch.tensor([0.]), pred_boxes[indeces_of_cls, 9]])
            metric.update(recall_scores, precision_scores)
            print(recall_scores, precision_scores)
            ap_score = metric.compute()
            metric.reset()
            results[cls] = {'ap_score': ap_score, 'num_objects_of_cls': num_objects_of_cls}
        else:
            results[cls] = {'ap_score': -1, 'num_objects_of_cls': num_objects_of_cls}
            continue

    N = 0
    ap_sum = 0
    for cls, value in results.items():
        if value['num_objects_of_cls'] < 1:
            continue
        else:
            N += 1
            ap_sum += value['ap_score']

    results['mAP'] = ap_sum / N
    
    return results

In [173]:
gt_boxes = [torch.tensor([[1.0, 0.2, 0.3, 0.2, 0.3, 0.],
                          [1.0, 0.6, 0.75, 0.5, 0.3, 0.]]),
            torch.tensor([[1.0, 0.8, 0.7, 0.2, 0.3, 0.],
                          [1.0, 0.4, 0.25, 0.5, 0.3, 0.]]),
            torch.tensor([[1.0, 0.2, 0.7, 0.2, 0.3, 0.],
                          [1.0, 0.75, 0.4, 0.3, 0.5, 0.]])]
gt_boxes1 = [torch.tensor([[1.0, 0.2, 0.3, 0.2, 0.3, 0.],
                          [1.0, 0.6, 0.75, 0.5, 0.3, 0.]]),
            torch.tensor([[1.0, 0.8, 0.7, 0.2, 0.3, 0.],
                          [1.0, 0.4, 0.25, 0.5, 0.3, 0.]]),
            torch.tensor([[1.0, 0.2, 0.7, 0.2, 0.3, 0.],
                          [1.0, 0.75, 0.4, 0.3, 0.5, 0.]])]
pred_boxes = [torch.tensor([[0.3, 0.7, 0.15, 0.3, 0.1, 0.],
                            [0.7, 0.65, 0.7, 0.5, 0.3, 0.],
                            [0.6, 0.2, 0.25, 0.3, 0.3, 0.]]),
              torch.tensor([[0.3, 0.3, 0.85, 0.3, 0.1, 0.],
                            [0.7, 0.35, 0.3, 0.5, 0.3, 0.],
                            [0.6, 0.8, 0.75, 0.3, 0.3, 0.]]),
              torch.tensor([[0.3, 0.3, 0.15, 0.3, 0.1, 0.],
                            [0.7, 0.7, 0.35, 0.3, 0.5, 0.],
                            [0.6, 0.2, 0.75, 0.3, 0.3, 0.]])]
mAP(gt_boxes1, gt_boxes, num_classes=20, iou_threshold=0.5)

tensor([0.0000, 0.1667, 0.3333, 0.5000, 0.6667, 0.8333, 1.0000]) tensor([1., 1., 1., 1., 1., 1., 1.])


{0: {'ap_score': tensor([1.]), 'num_objects_of_cls': 6},
 1: {'ap_score': -1, 'num_objects_of_cls': 0},
 2: {'ap_score': -1, 'num_objects_of_cls': 0},
 3: {'ap_score': -1, 'num_objects_of_cls': 0},
 4: {'ap_score': -1, 'num_objects_of_cls': 0},
 5: {'ap_score': -1, 'num_objects_of_cls': 0},
 6: {'ap_score': -1, 'num_objects_of_cls': 0},
 7: {'ap_score': -1, 'num_objects_of_cls': 0},
 8: {'ap_score': -1, 'num_objects_of_cls': 0},
 9: {'ap_score': -1, 'num_objects_of_cls': 0},
 10: {'ap_score': -1, 'num_objects_of_cls': 0},
 11: {'ap_score': -1, 'num_objects_of_cls': 0},
 12: {'ap_score': -1, 'num_objects_of_cls': 0},
 13: {'ap_score': -1, 'num_objects_of_cls': 0},
 14: {'ap_score': -1, 'num_objects_of_cls': 0},
 15: {'ap_score': -1, 'num_objects_of_cls': 0},
 16: {'ap_score': -1, 'num_objects_of_cls': 0},
 17: {'ap_score': -1, 'num_objects_of_cls': 0},
 18: {'ap_score': -1, 'num_objects_of_cls': 0},
 19: {'ap_score': -1, 'num_objects_of_cls': 0},
 'mAP': tensor([1.])}

In [159]:
out = model(torch.stack([train_set[0][0], train_set[1][0]], dim=0))
pred_boxes1 = get_pred_boxes(out, anchors, obj_threshold=0.5)

In [171]:
NMS(pred_boxes, iou_threshold=0.51)

[tensor([0, 1, 2]), tensor([0, 1, 2]), tensor([0, 1, 2])]

In [176]:
metric = AUC()
precision_scores = torch.tensor([1., ])
recall_scores = torch.tensor([0., .11, .33, .66, 1.])
metric.update(recall_scores, precision_scores)
print(recall_scores, precision_scores)
ap_score = metric.compute()
# metric.reset()

NameError: name 'indeces_of_cls' is not defined