In [1]:
%%capture
!python -m pip install pyyaml==5.1
import sys, os, distutils.core
# Note: This is a faster way to install detectron2 in Colab, but it does not include all functionalities.
# See https://detectron2.readthedocs.io/tutorials/install.html for full installation instructions
# !git clone 'https://github.com/facebookresearch/detectron2'
# dist = distutils.core.run_setup("./detectron2/setup.py")
# !python -m pip install {' '.join([f"'{x}'" for x in dist.install_requires])}
# sys.path.insert(0, os.path.abspath('./detectron2'))

# Properly install detectron2. (Please do not install twice in both ways)
!python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'

In [2]:
import torch, detectron2
!nvcc --version
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
print("detectron2:", detectron2.__version__)

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Aug_15_22:02:13_PDT_2023
Cuda compilation tools, release 12.2, V12.2.140
Build cuda_12.2.r12.2/compiler.33191640_0
torch:  2.5 ; cuda:  cu121
detectron2: 0.6


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

import numpy as np
import pandas as pd
import cv2
from google.colab.patches import cv2_imshow

from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

### Register Train and Validation Set

#### Train

In [5]:
from detectron2.data.datasets import register_coco_instances
register_coco_instances("my_data_train", {}, "/content/drive/MyDrive/BumpsandPotholes/models/train.json", "/content/drive/MyDrive/BumpsandPotholes/models/train")

In [6]:
my_data_train_metadata = MetadataCatalog.get("my_data_train")
dataset_dicts = DatasetCatalog.get("my_data_train")

Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.

[01/05 14:47:08 d2.data.datasets.coco]: Loaded 374 images in COCO format from /content/drive/MyDrive/BumpsandPotholes/models/train.json


In [7]:
my_data_train_metadata

namespace(name='my_data_train',
          json_file='/content/drive/MyDrive/BumpsandPotholes/models/train.json',
          image_root='/content/drive/MyDrive/BumpsandPotholes/models/train',
          evaluator_type='coco',
          thing_classes=['Pothole', 'RoadPath', 'SpeedBump'],
          thing_dataset_id_to_contiguous_id={0: 0, 1: 1, 2: 2})

In [8]:
classes = my_data_train_metadata.thing_classes
classes_df = pd.DataFrame(classes)
classes_df.to_csv('/content/drive/MyDrive/BumpsandPotholes/models/classeslist.csv')

In [9]:
len(classes)

3

#### Validation

In [10]:
register_coco_instances("my_data_val", {}, "/content/drive/MyDrive/BumpsandPotholes/models/test.json", "/content/drive/MyDrive/BumpsandPotholes/models/test")

In [11]:
my_data_val_metadata = MetadataCatalog.get("my_data_val")
dataset_dicts = DatasetCatalog.get("my_data_val")

Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.

[01/05 14:48:13 d2.data.datasets.coco]: Loaded 94 images in COCO format from /content/drive/MyDrive/BumpsandPotholes/models/test.json


In [12]:
my_data_val_metadata

namespace(name='my_data_val',
          json_file='/content/drive/MyDrive/BumpsandPotholes/models/test.json',
          image_root='/content/drive/MyDrive/BumpsandPotholes/models/test',
          evaluator_type='coco',
          thing_classes=['Pothole', 'RoadPath', 'SpeedBump'],
          thing_dataset_id_to_contiguous_id={0: 0, 1: 1, 2: 2})

#### Sample Train Data

In [13]:
import random
import os
dataset_dicts = DatasetCatalog.get("my_data_train")
for d in random.sample(dataset_dicts, 10):

    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=my_data_train_metadata, scale=0.5)
    vis = visualizer.draw_dataset_dict(d)
    cv2_imshow(vis.get_image()[:, :, ::-1])

Output hidden; open in https://colab.research.google.com to view.

#### Data Augmentation

In [14]:
import detectron2.data.transforms as T

def build_sem_seg_train_aug(cfg):
    augs = [
        T.RandomApply(T.RandomBrightness(0.6, 1.4), prob=0.2),
        T.ResizeShortestEdge( cfg.INPUT.MIN_SIZE_TRAIN, cfg.INPUT.MAX_SIZE_TRAIN, cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING),
        T.RandomApply(T.RandomRotation(sample_style="choice", angle=[90, 180, 270]), prob=0.3),
        T.RandomFlip(prob=0.2, horizontal=False, vertical=True),
        T.RandomFlip(prob=0.2, horizontal=True, vertical=False),
        #T.RandomApply(T.Resize((550,430)), prob=1)
        #T.Resize((550,600)),
        #T.ResizeShortestEdge(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style='choice'),
        #T.Resize((800,600)),
        #T.RandomFlip(prob=0.5, horizontal=True, vertical=False),
        #T.RandomContrast(0.6, 1.3),
        #T.RandomSaturation(0.8, 1.4),
        #T.RandomRotation(angle=[90, 90]),
        #T.RandomLighting(0.7),
        #T.RandomFlip(prob=0.5, horizontal=False, vertical=True),
        #T.RandomFlip(prob=0.7, horizontal=True, vertical=False)
    ]
    return augs

#### Calculate Validation Loss

In [15]:
from detectron2.engine.hooks import HookBase
from detectron2.evaluation import inference_context
from detectron2.utils.logger import log_every_n_seconds
from detectron2.data import DatasetMapper, build_detection_test_loader
import detectron2.utils.comm as comm
import torch
import time
import logging
import datetime

class LossEvalHook(HookBase):
    def __init__(self, eval_period, model, data_loader):
        self._model = model
        self._period = eval_period
        self._data_loader = data_loader

    def _do_loss_eval(self):
        # Copying inference_on_dataset from evaluator.py
        total = len(self._data_loader)
        num_warmup = min(5, total - 1)

        start_time = time.perf_counter()
        total_compute_time = 0
        losses = []
        for idx, inputs in enumerate(self._data_loader):
            if idx == num_warmup:
                start_time = time.perf_counter()
                total_compute_time = 0
            start_compute_time = time.perf_counter()
            if torch.cuda.is_available():
                torch.cuda.synchronize()
            total_compute_time += time.perf_counter() - start_compute_time
            iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup)
            seconds_per_img = total_compute_time / iters_after_start
            if idx >= num_warmup * 2 or seconds_per_img > 5:
                total_seconds_per_img = (time.perf_counter() - start_time) / iters_after_start
                eta = datetime.timedelta(seconds=int(total_seconds_per_img * (total - idx - 1)))
                log_every_n_seconds(
                    logging.INFO,
                    "Loss on Validation  done {}/{}. {:.4f} s / img. ETA={}".format(
                        idx + 1, total, seconds_per_img, str(eta)
                    ),
                    n=5,
                )
            loss_batch = self._get_loss(inputs)
            losses.append(loss_batch)
        mean_loss = np.mean(losses)
        self.trainer.storage.put_scalar('validation_loss', mean_loss)
        comm.synchronize()

        return losses

    def _get_loss(self, data):
        # How loss is calculated on train_loop
        metrics_dict = self._model(data)
        metrics_dict = {
            k: v.detach().cpu().item() if isinstance(v, torch.Tensor) else float(v)
            for k, v in metrics_dict.items()
        }
        total_losses_reduced = sum(loss for loss in metrics_dict.values())
        return total_losses_reduced

    def after_step(self):
        next_iter = self.trainer.iter + 1
        is_final = next_iter == self.trainer.max_iter
        if is_final or (self._period > 0 and next_iter % self._period == 0):
            self._do_loss_eval()
        self.trainer.storage.put_scalars(timetest=12)

#### Custom Trainer

In [16]:
from detectron2.engine import DefaultTrainer
from detectron2.data import build_detection_test_loader, build_detection_train_loader

class MyTrainer(DefaultTrainer):
    @classmethod
    def build_train_loader(cls, cfg):
        mapper = DatasetMapper(cfg, is_train=True, augmentations=build_sem_seg_train_aug(cfg))
        return build_detection_train_loader(cfg, mapper=mapper)

    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        if output_folder is None:
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
        return COCOEvaluator(dataset_name, cfg, True, output_folder)

    def build_hooks(self):
        hooks = super().build_hooks()
        hooks.insert(-1,LossEvalHook(
            cfg.TEST.EVAL_PERIOD,
            self.model,
            build_detection_test_loader(
                self.cfg,
                self.cfg.DATASETS.TEST[0],
                DatasetMapper(self.cfg,True)
            )
        ))
        return hooks

#### Train Model

In [17]:
import gc
gc.collect()

13185

In [None]:
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
import os

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("my_data_train",)
cfg.DATASETS.TEST = ("my_data_val",)   # no metrics implemented for this dataset
cfg.DATALOADER.NUM_WORKERS = 4
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml") # initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.002
cfg.SOLVER.MAX_ITER = 4200 # Number of iterations
cfg.TEST.EVAL_PERIOD = 4201
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
#cfg.MODEL.BACKBONE.FREEZE_AT = 2
cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(classes) # classes

cfg.OUTPUT_DIR = "/content/drive/MyDrive/BumpsandPotholes/models/Detectron2/output/"
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)


trainer = MyTrainer(cfg) #DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

[01/05 14:52:02 d2.engine.defaults]: Model:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
      (res

model_final_a3ec72.pkl: 254MB [00:01, 196MB/s]                           
roi_heads.box_predictor.bbox_pred.{bias, weight}
roi_heads.box_predictor.cls_score.{bias, weight}
roi_heads.mask_head.predictor.{bias, weight}


[01/05 14:52:04 d2.engine.train_loop]: Starting training from iteration 0


  tensor = torch.from_numpy(np.ascontiguousarray(img))
  tensor = torch.from_numpy(np.ascontiguousarray(img))
  tensor = torch.from_numpy(np.ascontiguousarray(img))
  tensor = torch.from_numpy(np.ascontiguousarray(img))
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[01/05 14:52:26 d2.utils.events]:  eta: 0:58:35  iter: 19  total_loss: 3.186  loss_cls: 1.386  loss_box_reg: 0.05953  loss_mask: 0.6794  loss_rpn_cls: 0.8085  loss_rpn_loc: 0.2831    time: 0.8308  last_time: 0.7159  data_time: 0.0630  last_data_time: 0.0055   lr: 3.9962e-05  max_mem: 5490M
[01/05 14:52:45 d2.utils.events]:  eta: 0:54:27  iter: 39  total_loss: 1.808  loss_cls: 0.6512  loss_box_reg: 0.1081  loss_mask: 0.6029  loss_rpn_cls: 0.104  loss_rpn_loc: 0.1748    time: 0.8005  last_time: 0.6330  data_time: 0.0149  last_data_time: 0.0024   lr: 7.9922e-05  max_mem: 5490M
[01/05 14:53:00 d2.utils.events]:  eta: 0:52:23  iter: 59  total_loss: 1.154  loss_cls: 0.2108  loss_box_reg: 0.1285  loss_mask: 0.5608  loss_rpn_cls: 0.02424  loss_rpn_loc: 0.1207    time: 0.7962  last_time: 0.7369  data_time: 0.0378  last_data_time: 0.0058   lr: 0.00011988  max_mem: 5617M
[01/05 14:53:17 d2.utils.events]:  eta: 0:53:20  iter: 79  total_loss: 0.9859  loss_cls: 0.1656  loss_box_reg: 0.127  loss_mask

### Evaluate Model

#### Reload Trainer

In [22]:
#fr5 pt 2test default aug best at 1000
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
import os

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("my_data_train",)
cfg.DATASETS.TEST = ("my_data_val",)   # no metrics implemented for this dataset
cfg.DATALOADER.NUM_WORKERS = 5

cfg.OUTPUT_DIR = "/content/drive/MyDrive/BumpsandPotholes/models/Detectron2/output/"
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")

cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.001 #0.002
cfg.SOLVER.MAX_ITER = 4200    # Number of iterations
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(classes)  # classes
cfg.TEST.EVAL_PERIOD = 4201
#cfg.MODEL.BACKBONE.FREEZE_AT = 2

trainer = MyTrainer(cfg)
#trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=True)
#trainer.train()

[01/05 15:57:50 d2.engine.defaults]: Model:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
      (res

  return torch.load(f, map_location=torch.device("cpu"))


[01/05 15:57:51 d2.engine.hooks]: Loading scheduler from state_dict ...


#### Coco Evaluation

In [23]:
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader
evaluator = COCOEvaluator("my_data_train", cfg, False, output_dir="./output/")
val_loader = build_detection_test_loader(cfg, "my_data_train")
inference_on_dataset(trainer.model, val_loader, evaluator)
# another equivalent way is to use trainer.test

Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.

[01/05 15:58:27 d2.data.datasets.coco]: Loaded 374 images in COCO format from /content/drive/MyDrive/BumpsandPotholes/models/train.json
[01/05 15:58:27 d2.data.dataset_mapper]: [DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[01/05 15:58:27 d2.data.common]: Serializing the dataset using: <class 'detectron2.data.common._TorchSerializedList'>
[01/05 15:58:27 d2.data.common]: Serializing 374 elements to byte tensors and concatenating them all ...
[01/05 15:58:27 d2.data.common]: Serialized dataset takes 0.38 MiB
[01/05 15:58:27 d2.evaluation.evaluator]: Start inference on 374 batches
[01/05 15:58:30 d2.evaluation.evaluator]: Inference done 11/374. Dataloading: 0.0079 s/iter. Inference: 0.1676 s/iter. Eval: 0.0042 s/iter. Total: 0.1798 s/iter. ETA=0:01:05
[01/05 15:58:35 d2.evaluation.evaluator]: Inference done 28

OrderedDict([('bbox',
              {'AP': 82.237982035812,
               'AP50': 99.54126937583719,
               'AP75': 95.19361193815789,
               'APs': 66.95142286513239,
               'APm': 73.01712374229673,
               'APl': 84.69576924554787,
               'AP-Pothole': 73.61181469501894,
               'AP-RoadPath': 94.73915749961367,
               'AP-SpeedBump': 78.36297391280337}),
             ('segm',
              {'AP': 82.7240946248383,
               'AP50': 99.22751821903914,
               'AP75': 96.64367331262046,
               'APs': 66.80229777437052,
               'APm': 73.51960563939487,
               'APl': 85.61112827001793,
               'AP-Pothole': 72.85538936661995,
               'AP-RoadPath': 94.79659284929399,
               'AP-SpeedBump': 80.52030165860097})])

In [24]:
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader
evaluator = COCOEvaluator("my_data_val", cfg, False, output_dir="./output/")
val_loader = build_detection_test_loader(cfg, "my_data_val")
inference_on_dataset(trainer.model, val_loader, evaluator)
# another equivalent way is to use trainer.test

Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.

[01/05 16:00:15 d2.data.datasets.coco]: Loaded 94 images in COCO format from /content/drive/MyDrive/BumpsandPotholes/models/test.json
[01/05 16:00:15 d2.data.dataset_mapper]: [DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[01/05 16:00:15 d2.data.common]: Serializing the dataset using: <class 'detectron2.data.common._TorchSerializedList'>
[01/05 16:00:15 d2.data.common]: Serializing 94 elements to byte tensors and concatenating them all ...
[01/05 16:00:15 d2.data.common]: Serialized dataset takes 0.09 MiB
[01/05 16:00:15 d2.evaluation.evaluator]: Start inference on 94 batches




[01/05 16:00:18 d2.evaluation.evaluator]: Inference done 11/94. Dataloading: 0.0046 s/iter. Inference: 0.1557 s/iter. Eval: 0.0440 s/iter. Total: 0.2044 s/iter. ETA=0:00:16
[01/05 16:00:24 d2.evaluation.evaluator]: Inference done 35/94. Dataloading: 0.0048 s/iter. Inference: 0.1549 s/iter. Eval: 0.0558 s/iter. Total: 0.2157 s/iter. ETA=0:00:12
[01/05 16:00:29 d2.evaluation.evaluator]: Inference done 49/94. Dataloading: 0.0066 s/iter. Inference: 0.1637 s/iter. Eval: 0.0913 s/iter. Total: 0.2618 s/iter. ETA=0:00:11
[01/05 16:00:34 d2.evaluation.evaluator]: Inference done 75/94. Dataloading: 0.0055 s/iter. Inference: 0.1617 s/iter. Eval: 0.0707 s/iter. Total: 0.2381 s/iter. ETA=0:00:04
[01/05 16:00:38 d2.evaluation.evaluator]: Total inference time: 0:00:20.958208 (0.235485 s / iter per device, on 1 devices)
[01/05 16:00:38 d2.evaluation.evaluator]: Total inference pure compute time: 0:00:14 (0.160122 s / iter per device, on 1 devices)
[01/05 16:00:38 d2.evaluation.coco_evaluation]: Prepar

OrderedDict([('bbox',
              {'AP': 68.3281572621284,
               'AP50': 90.71621392209852,
               'AP75': 72.11305941741216,
               'APs': 37.77777777777777,
               'APm': 59.860895462248834,
               'APl': 68.968394119608,
               'AP-Pothole': 46.44304319947948,
               'AP-RoadPath': 90.27255566759494,
               'AP-SpeedBump': 68.26887291931072}),
             ('segm',
              {'AP': 70.71573619332558,
               'AP50': 92.43056909804294,
               'AP75': 79.88910809474574,
               'APs': 32.77777777777777,
               'APm': 61.0377383976907,
               'APl': 72.71507667512698,
               'AP-Pothole': 50.728439104447595,
               'AP-RoadPath': 91.00508091178908,
               'AP-SpeedBump': 70.41368856374012})])

...DONE