In [1]:
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import os, json, cv2, random

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultTrainer
from detectron2.evaluation.coco_evaluation import COCOEvaluator
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# if your dataset is in COCO format, this cell can be replaced by the following three lines:
from detectron2.data.datasets import register_coco_instances

## register the dataset 
# The raw dataset in full resolution
#register_coco_instances("pascal_raw_train_raw", {}, "/pascal_raw/json_annotation_train.json", "/pascal_raw/pascal_raw/original/raw")
#register_coco_instances("pascal_raw_val_raw", {}, "/pascal_raw/json_annotation_val.json", "/pascal_raw/pascal_raw/original/raw")
## The jpg dataset in full resolution
#register_coco_instances("pascal_raw_train_jpg", {}, "/pascal_raw/json_annotation_train.json", "/pascal_raw/pascal_raw/original/jpg")
#register_coco_instances("pascal_raw_val_jpg", {}, "/pascal_raw/json_annotation_val.json", "/pascal_raw/pascal_raw/original/jpg")
# register the downsampled set
register_coco_instances("pascal_raw_train_jpg_downsampled", {}, "/pascal_raw/pascal_raw_train.json", "/pascal_raw/pascal_raw/jpg")
register_coco_instances("pascal_raw_val_jpg_downsampled", {}, "/pascal_raw/pascal_raw_val.json", "/pascal_raw/pascal_raw/jpg")


In [3]:
cfg = get_cfg()
cfg.merge_from_file("configs/faster_rcnn_R_50_FPN_1x.yaml")

cfg.DATASETS.TRAIN = ("pascal_raw_train_jpg_downsampled",)
cfg.DATASETS.TEST = ("pascal_raw_val_jpg_downsampled",)
cfg.SOLVER.IMS_PER_BATCH = 2  # This is the real "batch size" commonly known to deep learning people
cfg.SOLVER.BASE_LR = 0.00025  # pick a good LR
cfg.SOLVER.MAX_ITER = 300 
cfg.DATALOADER.NUM_WORKERS = 2
cfg.SOLVER.STEPS = []        # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512   # The "RoIHead batch size". 128 is faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3

In [4]:
#model_zoo.get_config_file()

#cfg.merge_from_file(model_zoo.get_config_file("configs/faster_rcnn_R_50_FPN_1x.yaml"))
#cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("configs/faster_rcnn_R_50_FPN_1x.yaml")  # Let training initialize from model zoo
   # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
# only has one class (ballon). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)
# NOTE: this config means the number of classes, but a few popular unofficial tutorials incorrect uses num_classes+1 here.


In [6]:
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()


[11/23 09:28:39 d2.engine.defaults]: Model:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
      (res

[11/23 09:28:39 d2.data.datasets.coco]: Loaded 2126 images in COCO format from /pascal_raw/pascal_raw_train.json
[11/23 09:28:39 d2.data.build]: Removed 0 images with no usable annotations. 2126 images left.
[11/23 09:28:39 d2.data.dataset_mapper]: [DatasetMapper] Augmentations used in training: [ResizeShortestEdge(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style='choice'), RandomFlip()]
[11/23 09:28:39 d2.data.build]: Using training sampler TrainingSampler
[11/23 09:28:39 d2.data.common]: Serializing the dataset using: <class 'detectron2.data.common.NumpySerializedList'>
[11/23 09:28:39 d2.data.common]: Serializing 2126 elements to byte tensors and concatenating them all ...
[11/23 09:28:39 d2.data.common]: Serialized dataset takes 0.54 MiB


Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (12, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (12,) in the model! You might want to double check if this is expected.
Some model parameters or buffers are not found in the checkpoint:
roi_heads.box_predictor.bbox_pred.{bias, weight}
roi_heads.box_predictor.cls_score.{bias, 

[11/23 09:28:39 d2.engine.train_loop]: Starting training from iteration 0
[11/23 09:28:47 d2.utils.events]:  eta: 0:01:41  iter: 19  total_loss: 1.596  loss_cls: 1.447  loss_box_reg: 0.1345  loss_rpn_cls: 0.005855  loss_rpn_loc: 0.005591  time: 0.3582  data_time: 0.0272  lr: 1.6068e-05  max_mem: 2999M
[11/23 09:28:54 d2.utils.events]:  eta: 0:01:38  iter: 39  total_loss: 1.319  loss_cls: 1.152  loss_box_reg: 0.1439  loss_rpn_cls: 0.01144  loss_rpn_loc: 0.01072  time: 0.3677  data_time: 0.0066  lr: 3.2718e-05  max_mem: 2999M
[11/23 09:29:02 d2.utils.events]:  eta: 0:01:30  iter: 59  total_loss: 0.841  loss_cls: 0.6816  loss_box_reg: 0.1539  loss_rpn_cls: 0.006425  loss_rpn_loc: 0.00792  time: 0.3681  data_time: 0.0057  lr: 4.9367e-05  max_mem: 2999M
[11/23 09:29:09 d2.utils.events]:  eta: 0:01:21  iter: 79  total_loss: 0.5135  loss_cls: 0.3391  loss_box_reg: 0.1608  loss_rpn_cls: 0.007274  loss_rpn_loc: 0.007927  time: 0.3672  data_time: 0.0062  lr: 6.6017e-05  max_mem: 2999M
[11/23 09:

TypeError: object of type 'type' has no len()

In [10]:
trainer.test(model=trainer.model, cfg=cfg, evaluators=[COCOEvaluator("pascal_raw_val_jpg_downsampled", output_dir=cfg.OUTPUT_DIR),])

[11/23 09:34:38 d2.evaluation.coco_evaluation]: Fast COCO eval is not built. Falling back to official COCO eval.
[11/23 09:34:38 d2.data.datasets.coco]: Loaded 2127 images in COCO format from /pascal_raw/pascal_raw_val.json
[11/23 09:34:38 d2.data.dataset_mapper]: [DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[11/23 09:34:38 d2.data.common]: Serializing the dataset using: <class 'detectron2.data.common.NumpySerializedList'>
[11/23 09:34:38 d2.data.common]: Serializing 2127 elements to byte tensors and concatenating them all ...
[11/23 09:34:38 d2.data.common]: Serialized dataset takes 0.54 MiB
[11/23 09:34:38 d2.evaluation.evaluator]: Start inference on 2127 batches
[11/23 09:34:40 d2.evaluation.evaluator]: Inference done 11/2127. Dataloading: 0.0034 s/iter. Inference: 0.0869 s/iter. Eval: 0.0003 s/iter. Total: 0.0906 s/iter. ETA=0:03:11
[11/23 09:34:45 d2.evaluation.evaluator]: Inference done 6

[11/23 09:38:18 d2.evaluation.evaluator]: Inference done 2028/2127. Dataloading: 0.0186 s/iter. Inference: 0.0890 s/iter. Eval: 0.0004 s/iter. Total: 0.1081 s/iter. ETA=0:00:10
[11/23 09:38:23 d2.evaluation.evaluator]: Inference done 2080/2127. Dataloading: 0.0183 s/iter. Inference: 0.0890 s/iter. Eval: 0.0004 s/iter. Total: 0.1078 s/iter. ETA=0:00:05
[11/23 09:38:27 d2.evaluation.evaluator]: Total inference time: 0:03:48.164765 (0.107523 s / iter per device, on 1 devices)
[11/23 09:38:27 d2.evaluation.evaluator]: Total inference pure compute time: 0:03:08 (0.088941 s / iter per device, on 1 devices)
[11/23 09:38:28 d2.evaluation.coco_evaluation]: Preparing results for COCO format ...
[11/23 09:38:28 d2.evaluation.coco_evaluation]: Saving results to ./output/coco_instances_results.json
[11/23 09:38:28 d2.evaluation.coco_evaluation]: Evaluating predictions with official COCO API...
Loading and preparing results...
DONE (t=0.19s)
creating index...
index created!
Running per image evaluat

OrderedDict([('bbox',
              {'AP': 38.47251097374951,
               'AP50': 63.81919533926015,
               'AP75': 42.54397837722831,
               'APs': 9.870699564180521,
               'APm': 27.79871010290321,
               'APl': 41.662097757379755,
               'AP-bicycle': 3.3391498427242836,
               'AP-car': 57.4164216657729,
               'AP-person': 54.66196141275136})])