In [2]:
!pip install object_detection_metrics

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting object_detection_metrics
  Downloading object_detection_metrics-0.4.post1-py3-none-any.whl (17 kB)
Collecting docopt
  Downloading docopt-0.6.2.tar.gz (25 kB)
Building wheels for collected packages: docopt
  Building wheel for docopt (setup.py) ... [?25l[?25hdone
  Created wheel for docopt: filename=docopt-0.6.2-py2.py3-none-any.whl size=13723 sha256=67e2d45b7d93e78c680a9a9b725680a5d85b5880fe4900daf0f1773a1d5610c6
  Stored in directory: /root/.cache/pip/wheels/72/b0/3f/1d95f96ff986c7dfffe46ce2be4062f38ebd04b506c77c81b9
Successfully built docopt
Installing collected packages: docopt, object-detection-metrics
Successfully installed docopt-0.6.2 object-detection-metrics-0.4.post1


In [3]:
# Mount google drive connection to have access to the shared data and import required packages
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from podm import coco_decoder
from podm.metrics import get_pascal_voc_metrics, MetricPerClass, get_bounding_boxes

import json
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# 4. Metric contrast

In [4]:
def compute_dices(gt, preds):
  """
  Given two lists of predictions in coco format ground truth (gt) and predictions (preds)
  computes a list of dice scores for each image
  """
  image_ids = [gt['annotations'][i]['image_id'] for i in range(len(gt['annotations']))]
  dices = []
  scores = []

  for i in image_ids:
    current_gt = np.zeros((512, 512), dtype='int')
    current_pred = np.zeros((512, 512), dtype='int')

    for bbox in [gt['annotations'][k]['bbox'] for k in range(len(gt['annotations'])) if gt['annotations'][k]['image_id'] == i]:
      current_gt[int(bbox[2]):int(bbox[0]), int(bbox[3]):int(bbox[1])] = 1

    for bbox in [preds[k]['bbox'] for k in range(len(preds)) if preds[k]['image_id']==i]:
      current_pred[int(bbox[2]):int(bbox[0]), int(bbox[3]):int(bbox[1])] = 1

    dice = np.sum(current_pred[current_gt==1])*2.0 / ((np.sum(current_pred) + np.sum(current_gt))+1)
    dices.append(dice)
    confidences = [preds[k]['score'] for k in range(len(preds)) if preds[k]['image_id']==i]
    if len(confidences)>0:
      scores.append(np.mean(confidences))
    else:
      scores.append(0)
    
  return dices, scores

## 4.1 Train metric contrast

In [5]:
models = ['detectron2', 'yolo']

In [6]:
model = 'yolo'
ground_truth = COCO('/content/drive/MyDrive/UNICEF & NYU Giga initiative - data sharing/data/bounding_boxes/train_v2.json')
coco_rst = ground_truth.loadRes(f'/content/drive/MyDrive/UNICEF & NYU Giga initiative - data sharing/data/results/{model}_train.json')
cocoEval = COCOeval(ground_truth, coco_rst, iouType='bbox')

loading annotations into memory...
Done (t=1.58s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.89s)
creating index...
index created!


In [8]:
for model in models:
  print('#'*30)
  print(f'Report of train metrics for {model} model')
  if model == 'yolo':
    ground_truth = COCO('/content/drive/MyDrive/UNICEF & NYU Giga initiative - data sharing/data/bounding_boxes/train_v2.json')
    coco_rst = ground_truth.loadRes(f'/content/drive/MyDrive/UNICEF & NYU Giga initiative - data sharing/data/results/{model}_train_l_150.json')
  else:
    ground_truth = COCO('/content/drive/MyDrive/UNICEF & NYU Giga initiative - data sharing/data/bounding_boxes/train.json')
    coco_rst = ground_truth.loadRes(f'/content/drive/MyDrive/UNICEF & NYU Giga initiative - data sharing/data/results/{model}_train.json')
  cocoEval = COCOeval(ground_truth, coco_rst, iouType='bbox')
  cocoEval.evaluate()
  cocoEval.accumulate()
  cocoEval.summarize()
  print('#'*4 + ' Only 0.5')
  cocoEval.params.iouThrs = np.array([0.5])
  cocoEval.evaluate()
  cocoEval.accumulate()
  cocoEval.summarize()

  with open('/content/drive/MyDrive/UNICEF & NYU Giga initiative - data sharing/data/bounding_boxes/train.json', 'r') as f:
    gt = json.load(f)
  with open(f'/content/drive/MyDrive/UNICEF & NYU Giga initiative - data sharing/data/results/{model}_train.json') as f:
    preds = json.load(f)
  print('DICE SCORE', np.mean(compute_dices(gt, preds)[0]))

##############################
Report of train metrics for detectron2 model
loading annotations into memory...
Done (t=0.09s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.44s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.94s).
Accumulating evaluation results...
DONE (t=0.18s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.793
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.988
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.964
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.782
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.805
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.755
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.720
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10

## 4.2 Validation metric contrast

In [9]:
for model in models:
  print('#'*30)
  print(f'Report of train metrics for {model} model')
  if model == 'yolo':
    ground_truth = COCO('/content/drive/MyDrive/UNICEF & NYU Giga initiative - data sharing/data/bounding_boxes/val_v2.json')
    coco_rst = ground_truth.loadRes(f'/content/drive/MyDrive/UNICEF & NYU Giga initiative - data sharing/data/results/{model}_val_l_150.json')
  else:
    ground_truth = COCO('/content/drive/MyDrive/UNICEF & NYU Giga initiative - data sharing/data/bounding_boxes/val.json')
    coco_rst = ground_truth.loadRes(f'/content/drive/MyDrive/UNICEF & NYU Giga initiative - data sharing/data/results/{model}_val.json')
  cocoEval = COCOeval(ground_truth, coco_rst, iouType='bbox')
  cocoEval.evaluate()
  cocoEval.accumulate()
  cocoEval.summarize()
  print('#'*4 + ' Only 0.5')
  cocoEval.params.iouThrs = np.array([0.5])
  cocoEval.evaluate()
  cocoEval.accumulate()
  cocoEval.summarize()
  with open('/content/drive/MyDrive/UNICEF & NYU Giga initiative - data sharing/data/bounding_boxes/val.json', 'r') as f:
    gt = json.load(f)
  with open(f'/content/drive/MyDrive/UNICEF & NYU Giga initiative - data sharing/data/results/{model}_val.json') as f:
    preds = json.load(f)
  print('DICE SCORE', np.mean(compute_dices(gt, preds)[0]))

##############################
Report of train metrics for detectron2 model
loading annotations into memory...
Done (t=0.39s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.43s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.10s).
Accumulating evaluation results...
DONE (t=0.03s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.415
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.763
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.421
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.239
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.459
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.265
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.425
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10

## 4.3 Test metric contrast

In [10]:
for model in models:
  print('#'*30)
  print(f'Report of train metrics for {model} model')
  if model == 'yolo':
    ground_truth = COCO('/content/drive/MyDrive/UNICEF & NYU Giga initiative - data sharing/data/bounding_boxes/test_v2.json')
    coco_rst = ground_truth.loadRes(f'/content/drive/MyDrive/UNICEF & NYU Giga initiative - data sharing/data/results/{model}_test_l_150.json')
  else:
    ground_truth = COCO('/content/drive/MyDrive/UNICEF & NYU Giga initiative - data sharing/data/bounding_boxes/test.json')
    coco_rst = ground_truth.loadRes(f'/content/drive/MyDrive/UNICEF & NYU Giga initiative - data sharing/data/results/{model}_test.json')
  cocoEval = COCOeval(ground_truth, coco_rst, iouType='bbox')
  cocoEval.evaluate()
  cocoEval.accumulate()
  cocoEval.summarize()
  print('#'*4 + ' Only 0.5')
  cocoEval.params.iouThrs = np.array([0.5])
  cocoEval.evaluate()
  cocoEval.accumulate()
  cocoEval.summarize()
  with open('/content/drive/MyDrive/UNICEF & NYU Giga initiative - data sharing/data/bounding_boxes/test.json', 'r') as f:
    gt = json.load(f)
  with open(f'/content/drive/MyDrive/UNICEF & NYU Giga initiative - data sharing/data/results/{model}_test.json') as f:
    preds = json.load(f)
  print('DICE SCORE', np.mean(compute_dices(gt, preds)[0]))

##############################
Report of train metrics for detectron2 model
loading annotations into memory...
Done (t=0.33s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.33s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.10s).
Accumulating evaluation results...
DONE (t=0.03s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.353
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.697
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.324
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.304
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.385
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.313
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.381
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10

## 4.4 Using VOC metrics

In [None]:
for t in ['train', 'val', 'test']:
  with open(f'/content/drive/MyDrive/UNICEF & NYU Giga initiative - data sharing/data/bounding_boxes/{t}.json') as fp:
    gt = coco_decoder.load_true_object_detection_dataset(fp)

  with open(f'/content/drive/MyDrive/UNICEF & NYU Giga initiative - data sharing/data/results/detectron2_{t}.json') as fp:
      pred_dataset = coco_decoder.load_pred_object_detection_dataset(fp, gt)

  gt_BoundingBoxes = get_bounding_boxes(gt)
  pd_BoundingBoxes = get_bounding_boxes(pred_dataset)
  results = get_pascal_voc_metrics(gt_BoundingBoxes, pd_BoundingBoxes, .5)

  for cls, metric in results.items():
      label = metric.label
      print(t, 'ap', metric.ap)

train ap 0.9407964505513814
val ap 0.7002729023820106
test ap 0.677576419893404


In [None]:
dice, scores = compute_dices(gt, preds)

confidence_contrast = pd.DataFrame({'dice': dice, 'confidence': scores})
confidence_contrast['decile_conf'] = pd.qcut(confidence_contrast['confidence'], 10)
confidence_contrast.groupby('decile_conf')['dice'].mean()

decile_conf
(-0.001, 0.39]    0.176510
(0.39, 0.584]     0.472279
(0.584, 0.72]     0.571562
(0.72, 0.803]     0.640700
(0.803, 0.851]    0.615081
(0.851, 0.883]    0.710117
(0.883, 0.908]    0.801302
(0.908, 0.919]    0.698709
(0.919, 0.933]    0.673259
(0.933, 0.963]    0.393980
Name: dice, dtype: float64