# Pre set up

In [1]:
# You may need to restart your runtime prior to this, to let your installation take effect
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import cv2
import random
# from google.colab.patches import cv2_imshow

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog

In [2]:
import matplotlib.pyplot as plt
import matplotlib.pylab as pylab
pylab.rcParams['figure.figsize'] = 13,13
def imshow(img):
    plt.imshow(img[:, :, [2, 1, 0]])
    plt.axis("off")
    plt.show()

# Run a pre-trained detectron2 model

In [None]:
!wget http://images.cocodataset.org/val2017/000000439715.jpg -O input.jpg
im = cv2.imread("./input.jpg")

In [None]:
imshow(im)

In [None]:
cfg = get_cfg()
# add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
# Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
cfg.DATASETS.TRAIN = ("amodal_train",)
cfg.DATASETS.TEST = ("small_amodal_test")
predictor = DefaultPredictor(cfg)
outputs = predictor(im)

In [None]:
# look at the outputs. See https://detectron2.readthedocs.io/tutorials/models.html#model-output-format for specification
outputs["instances"].pred_classes
outputs["instances"].pred_boxes

In [None]:
# We can use `Visualizer` to draw the predictions on the image.
v = Visualizer(im[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
v = v.draw_instance_predictions(outputs["instances"].to("cpu"))

In [None]:
imshow(v.get_image()[:, :, ::-1])

In [None]:
from detectron2.engine import DefaultTrainer
trainer = DefaultTrainer(cfg) 
trainer.resume_or_load(resume=False)
#  evaluate its performance using AP metric implemented in COCO API.
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader
evaluator = COCOEvaluator("amodal_val", cfg, False, output_dir="./output/")
val_loader = build_detection_test_loader(cfg, "amodal_val")
inference_on_dataset(trainer.model, val_loader, evaluator)

# Train pre-trained model on a custom dataset (FT)

## Prepare the dataset

In [3]:
# Register my amodal datasets 
from detectron2.data.datasets import register_coco_instances
from detectron2.data import MetadataCatalog
register_coco_instances("amodal_coco_train", {}, "datasets/coco/annotations/COCO_amodal_train2014_with_classes_poly.json", "datasets/coco/train2014")
# Prepare test datasets 
from detectron2.data.datasets import register_coco_instances
from detectron2.data import MetadataCatalog
register_coco_instances("amodal_coco_val", {}, "datasets/coco/annotations/COCO_amodal_val2014_with_classes_poly.json", "datasets/coco/val2014")
from detectron2.data import DatasetCatalog
dataset_dicts = DatasetCatalog.get("amodal_coco_train")

[32m[02/29 01:22:13 d2.data.datasets.coco]: [0mLoaded 2276 images in COCO format from datasets/coco/annotations/COCO_amodal_train2014_with_classes_poly.json


In [None]:
 # Run pre-trained model on this amodal image 
cfg = get_cfg()
# add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
# Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
from detectron2.data import DatasetMapper
mapper = DatasetMapper(cfg,is_train=True)
mydict = mapper(dataset_dicts[3])
boxes = mydict['instances'].gt_boxes.tensor
print(mydict['instances'].gt_masks)

In [None]:
## To verify the data loading is correct, let's visualize the annotations of randomly selected samples in the training set:
import random
from detectron2.data import DatasetCatalog
from detectron2.utils.visualizer import Visualizer
dataset_dicts = DatasetCatalog.get("amodal_train")
for d in random.sample(dataset_dicts, 1):
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=MetadataCatalog.get("amodal_train"), scale=0.5)
    vis = visualizer.draw_dataset_dict(d)
    imshow(vis.get_image()[:, :, ::-1])
    predictor = DefaultPredictor(cfg)
    outputs = predictor(img)
    # We can use `Visualizer` to draw the predictions on the image.
    v = Visualizer(img[:, :, ::-1], MetadataCatalog.get("amodal_train"), scale=1.2)
    v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    imshow(v.get_image()[:, :, ::-1])
    
    

## Train 

In [4]:
import os
import numpy as np
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("amodal_coco_train",)
cfg.DATASETS.TEST = ("amodal_coco_train",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.0005  # pick a good LR
cfg.SOLVER.STEPS = (1200,1300)
cfg.SOLVER.MAX_ITER = 1500
cfg.VIS_PERIOD = 20
cfg.OUTPUT_DIR = "myAmodalCheckpoint"
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()

[32m[02/29 01:22:38 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

[32m[02/29 01:22:39 d2.data.datasets.coco]: [0mLoaded 2276 images in COCO format from datasets/coco/annotations/COCO_amodal_train2014_with_classes_poly.json
[32m[02/29 01:22:39 d2.data.build]: [0mRemoved 0 images with no usable annotations. 2276 images left.
[32m[02/29 01:22:39 d2.data.build]: [0mDistribution of instances among all 80 categories:
[36m|   category    | #instances   |   category   | #instances   |   category    | #instances   |
|:-------------:|:-------------|:------------:|:-------------|:-------------:|:-------------|
|    person     | 2311         |   bicycle    | 21           |      car      | 279          |
|  motorcycle   | 47           |   airplane   | 65           |      bus      | 92           |
|     train     | 60           |    truck     | 115          |     boat      | 79           |
| traffic light | 13           | fire hydrant | 35           |   stop sign   | 12           |
| parking meter | 10           |    bench     | 48           |     bird     

[32m[02/29 01:24:40 d2.utils.events]: [0meta: 0:04:55  iter: 399  total_loss: 0.463  loss_cls: 0.120  loss_box_reg: 0.104  loss_mask: 0.178  loss_rpn_cls: 0.025  loss_rpn_loc: 0.010  time: 0.2799  data_time: 0.0156  lr: 0.000200  max_mem: 2821M
[32m[02/29 01:24:46 d2.utils.events]: [0meta: 0:04:50  iter: 419  total_loss: 0.429  loss_cls: 0.115  loss_box_reg: 0.081  loss_mask: 0.179  loss_rpn_cls: 0.013  loss_rpn_loc: 0.012  time: 0.2797  data_time: 0.0129  lr: 0.000210  max_mem: 2821M
[32m[02/29 01:24:52 d2.utils.events]: [0meta: 0:04:44  iter: 439  total_loss: 0.400  loss_cls: 0.100  loss_box_reg: 0.086  loss_mask: 0.169  loss_rpn_cls: 0.006  loss_rpn_loc: 0.011  time: 0.2795  data_time: 0.0130  lr: 0.000220  max_mem: 2821M
[32m[02/29 01:24:58 d2.utils.events]: [0meta: 0:04:39  iter: 459  total_loss: 0.428  loss_cls: 0.125  loss_box_reg: 0.098  loss_mask: 0.199  loss_rpn_cls: 0.009  loss_rpn_loc: 0.010  time: 0.2802  data_time: 0.0155  lr: 0.000230  max_mem: 2850M
[32m[02/29 

[32m[02/29 01:28:11 d2.utils.events]: [0meta: 0:01:53  iter: 1079  total_loss: 0.357  loss_cls: 0.106  loss_box_reg: 0.089  loss_mask: 0.175  loss_rpn_cls: 0.006  loss_rpn_loc: 0.010  time: 0.2841  data_time: 0.0115  lr: 0.000500  max_mem: 3003M
[32m[02/29 01:28:17 d2.utils.events]: [0meta: 0:01:48  iter: 1099  total_loss: 0.496  loss_cls: 0.141  loss_box_reg: 0.138  loss_mask: 0.194  loss_rpn_cls: 0.012  loss_rpn_loc: 0.014  time: 0.2841  data_time: 0.0129  lr: 0.000500  max_mem: 3003M
[32m[02/29 01:28:24 d2.utils.events]: [0meta: 0:01:43  iter: 1119  total_loss: 0.419  loss_cls: 0.101  loss_box_reg: 0.100  loss_mask: 0.172  loss_rpn_cls: 0.007  loss_rpn_loc: 0.011  time: 0.2843  data_time: 0.0158  lr: 0.000500  max_mem: 3046M
[32m[02/29 01:28:30 d2.utils.events]: [0meta: 0:01:37  iter: 1139  total_loss: 0.351  loss_cls: 0.084  loss_box_reg: 0.081  loss_mask: 0.169  loss_rpn_cls: 0.006  loss_rpn_loc: 0.009  time: 0.2844  data_time: 0.0160  lr: 0.000500  max_mem: 3046M
[32m[02

In [None]:
!tensorboard --logdir=myAmodalCheckpoint --port=6006

##  Inference & evaluation using the trained model
Now, let's run inference with the trained model on the balloon validation dataset. First, let's create a predictor using the model we just trained:

In [5]:
#  evaluate its performance using AP metric implemented in COCO API.
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader
evaluator = COCOEvaluator("amodal_coco_val", cfg, False, output_dir="myAmodalEvaluation")
val_loader = build_detection_test_loader(cfg, "amodal_coco_val")
inference_on_dataset(trainer.model, val_loader, evaluator)

[32m[02/29 01:30:47 d2.data.datasets.coco]: [0mLoaded 1223 images in COCO format from datasets/coco/annotations/COCO_amodal_val2014_with_classes_poly.json
[32m[02/29 01:30:47 d2.data.build]: [0mDistribution of instances among all 80 categories:
[36m|   category    | #instances   |   category   | #instances   |   category    | #instances   |
|:-------------:|:-------------|:------------:|:-------------|:-------------:|:-------------|
|    person     | 1197         |   bicycle    | 12           |      car      | 154          |
|  motorcycle   | 31           |   airplane   | 39           |      bus      | 44           |
|     train     | 42           |    truck     | 44           |     boat      | 49           |
| traffic light | 7            | fire hydrant | 17           |   stop sign   | 9            |
| parking meter | 2            |    bench     | 34           |     bird      | 42           |
|      cat      | 42           |     dog      | 62           |     horse     | 48       

Loading and preparing results...
DONE (t=0.30s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *segm*
DONE (t=4.65s).
Accumulating evaluation results...
DONE (t=1.11s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.448
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.630
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.515
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.292
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.430
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.529
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.464
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.636
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.646
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

OrderedDict([('bbox',
              {'AP': 46.560225428433654,
               'AP50': 62.74554515780276,
               'AP75': 52.21944267002813,
               'APs': 40.112298672111926,
               'APm': 47.32063596316492,
               'APl': 52.31635639062691,
               'AP-person': 61.32572172291171,
               'AP-bicycle': 32.132329831448615,
               'AP-car': 46.62611683740921,
               'AP-motorcycle': 44.90709937389528,
               'AP-airplane': 68.15856635824889,
               'AP-bus': 67.79633215906628,
               'AP-train': 64.5607911656831,
               'AP-truck': 43.16649622935681,
               'AP-boat': 29.441652868552648,
               'AP-traffic light': 7.231155446371703,
               'AP-fire hydrant': 82.214715589206,
               'AP-stop sign': 24.427157001414432,
               'AP-parking meter': 27.970297029702973,
               'AP-bench': 57.007036752995255,
               'AP-bird': 43.05151337250012,
     

In [None]:
import os
cfg.DATASETS.TEST = ("amodal_coco_val",)
cfg.OUTPUT_DIR = "myAmodalCheckpoint"
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
# cfg.MODEL.WEIGHTS =  model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")  # Let training initialize from model zoo
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.9   # set the testing threshold for this model
predictor = DefaultPredictor(cfg)

In [None]:
from detectron2.utils.visualizer import ColorMode
import random
from detectron2.data import DatasetCatalog
from detectron2.utils.visualizer import Visualizer

dataset_dicts = DatasetCatalog.get("amodal_coco_val")
for d in random.sample(dataset_dicts, 2):    
    im = cv2.imread(d["file_name"])
    outputs = predictor(im)
    v = Visualizer(im[:, :, ::-1],
                   metadata=MetadataCatalog.get("amodal_coco_val"), 
                   scale=0.8, 
                   instance_mode=ColorMode.IMAGE_BW   # remove the colors of unsegmented pixels
    )
    v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    imshow(v.get_image()[:, :, ::-1])