In [7]:
import detectron2
from detectron2 import model_zoo
from detectron2.config import get_cfg
from detectron2.engine import DefaultTrainer, DefaultPredictor
from detectron2.data import MetadataCatalog, DatasetCatalog, build_detection_test_loader
from detectron2.utils.visualizer import ColorMode, Visualizer
from detectron2.evaluation import PascalVOCDetectionEvaluator, inference_on_dataset
import os, json, cv2, random

### Preparing Datasets

#### Register dataset

In [8]:
from detectron2.data.datasets import register_pascal_voc

cls_names = ('truck', 'car', 'rider', 'person', 'train', 'motorcycle', 'bicycle', 'bus')
register_pascal_voc("my_dataset", '/mnt/c/Users/user/OneDrive - 國立中正大學/lesson/112-2/ML_CV/Exercise_1/datasets/Cityscapes_dataset/Cityscapes_dataset/VOC2007', "trainval", 2007, cls_names)
register_pascal_voc("my_test", '/mnt/c/Users/user/OneDrive - 國立中正大學/lesson/112-2/ML_CV/Exercise_1/datasets/Cityscapes_dataset/Cityscapes_dataset/VOC2007', "test", 2007, cls_names)


AssertionError: Dataset 'my_dataset' is already registered!

#### Set Model

In [9]:
from detectron2.modeling import build_model
from detectron2.checkpoint import DetectionCheckpointer

cfg = get_cfg()
cfg.merge_from_file("./PascalVOC-Detection/my_vgg.yaml")
cfg.DATASETS.TRAIN = ("my_dataset",)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 1
# cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml")  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2 # This is the real "batch size" commonly known to deep learning people
cfg.SOLVER.BASE_LR = 0.0002  # pick a good LR
cfg.SOLVER.MAX_ITER = 90000    # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
cfg.SOLVER.STEPS = []        # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128   # The "RoIHead batch size". 128 is faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 8  # 8 classes, class_names = ('truck', 'car', 'rider', 'person', 'train', 'motorcycle', 'bicycle', 'bus')

cfg.MODEL.BACKBONE.FREEZE_AT = 2
#cfg.MODEL.WEIGHTS = "./output/vgg16/model_0064999.pth"

KeyError: 'Non-existent config key: MODEL.FREEZE_AT'

### Train!

In [5]:
import wandb
wandb.login(relogin=True, key='78f66d0691441fbb503f17c6de791883d0e54f94')
wandb.init(
            # set the wandb project where this run will be logged
            project="Exercise_1", 
            name="VGG16-v2",
            notes="20240407_v2",
            sync_tensorboard=True
        )

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/yuu/.netrc
[34m[1mwandb[0m: Currently logged in as: [33myuu_910119[0m ([33m112-2_ml_cv[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [6]:
cfg.OUTPUT_DIR='./output/vgg16/ver2'
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

[32m[04/07 16:43:05 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): VGG(
    (vgg_block1): Sequential(
      (0): VGGBlock(
        (conv1): Conv2d(
          3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
        (conv2): Conv2d(
          64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
        (maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      )
    )
    (vgg_block2): Sequential(
      (0): VGGBlock(
        (conv1): Conv2d(
          64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
          (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)
        )
        (conv2): Conv2d(
          128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
          (norm): FrozenBatchNorm2d(num_features=128, ep

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[32m[04/07 16:44:05 d2.utils.events]: [0m eta: 0:01:03  iter: 19  total_loss: 3.232  loss_cls: 1.213  loss_box_reg: 0.2837  loss_rpn_cls: 0.6456  loss_rpn_loc: 0.8565    time: 0.8029  last_time: 0.7883  data_time: 0.0287  last_data_time: 0.0062   lr: 3.8162e-05  max_mem: 1537M
[32m[04/07 16:44:21 d2.utils.events]: [0m eta: 0:00:48  iter: 39  total_loss: 2.179  loss_cls: 0.7698  loss_box_reg: 0.196  loss_rpn_cls: 0.5921  loss_rpn_loc: 0.5639    time: 0.8047  last_time: 0.8268  data_time: 0.0061  last_data_time: 0.0063   lr: 7.8122e-05  max_mem: 1537M
[32m[04/07 16:44:37 d2.utils.events]: [0m eta: 0:00:32  iter: 59  total_loss: 2.063  loss_cls: 0.6909  loss_box_reg: 0.2571  loss_rpn_cls: 0.4992  loss_rpn_loc: 0.6901    time: 0.8103  last_time: 0.8424  data_time: 0.0065  last_data_time: 0.0050   lr: 0.00011808  max_mem: 1537M
[32m[04/07 16:44:54 d2.utils.events]: [0m eta: 0:00:16  iter: 79  total_loss: 2.052  loss_cls: 0.6938  loss_box_reg: 0.2214  loss_rpn_cls: 0.4832  loss_rpn_l

KeyboardInterrupt: 

### Inference & Evaluation

In [10]:
cfg.OUTPUT_DIR='./output/vgg16/again'
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
predictor = DefaultPredictor(cfg)

[32m[04/07 20:51:09 d2.checkpoint.detection_checkpoint]: [0m[DetectionCheckpointer] Loading from ./output/vgg16/again/model_final.pth ...


In [5]:
# ----------
# INFERENCE
# ----------
# VISUALIZE TEH PREDICT RESULT
im = cv2.imread('aachen_000012_000019_leftImg8bit.png')
outputs = predictor(im)
v = Visualizer(im[:, :, ::-1],
                metadata=MetadataCatalog.get('my_test'),
                scale=0.5,
                instance_mode=ColorMode.IMAGE_BW
)
out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
cv2.imshow('', out.get_image()[:, :, ::-1])
cv2.waitKey(0)



  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [11]:
# -------
# EVALUATE
# --------
evaluator = PascalVOCDetectionEvaluator("my_test")
val_loader = build_detection_test_loader(cfg, "my_test")
print(inference_on_dataset(predictor.model, val_loader, evaluator))

[32m[04/07 20:51:30 d2.data.build]: [0mDistribution of instances among all 8 categories:
[36m|  category  | #instances   |  category  | #instances   |  category  | #instances   |
|:----------:|:-------------|:----------:|:-------------|:----------:|:-------------|
|   truck    | 93           |    car     | 4667         |   rider    | 556          |
|   person   | 3419         |   train    | 23           | motorcycle | 149          |
|  bicycle   | 1175         |    bus     | 98           |            |              |
|   total    | 10180        |            |              |            |              |[0m
[32m[04/07 20:51:30 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[04/07 20:51:30 d2.data.common]: [0mSerializing the dataset using: <class 'detectron2.data.common._TorchSerializedList'>
[32m[04/07 20:51:30 d2.data.common]: [0mSerializing 492 elements to