In [3]:
%cd mmdetection

# !pip install -q --upgrade wandb
# !pip3 install openmim
# !mim install mmcv-full
# !pip install -e .

from mmdet.apis import init_detector, inference_detector, show_result_pyplot
import mmcv
from mmcv import Config
from mmdet.datasets import build_dataset
from mmdet.models import build_detector
from mmdet.apis import train_detector
from mmdet.datasets.builder import DATASETS
from mmdet.datasets.coco import CocoDataset,CustomDataset
from mmdet.apis import set_random_seed
import os.path as osp
import os
import numpy as np

import wandb
print(wandb.__version__)

/aiffel/aiffel/aimmo/mmdetection




0.13.6


In [4]:
@DATASETS.register_module(force=True)
class Aimmo(CustomDataset):
    CLASSES = ('car', 'truck', 'bus','pedestrian')
    def load_annotations(self, ann_file):
        cat2label = {k:i for i, k in enumerate(self.CLASSES)}
        image_list = mmcv.list_from_file(self.ann_file)
        data_infos = []
    
        for image_id in image_list:
            filename = os.path.splitext(image_id)[0]#os.path.splitext('{0:}/{1:}'.format(img_prefix, image_id))[0]
            #image = cv2.imread(filename)
            data_info = {'filename': filename,
                         'width': 1920, 
                         'height': 1024}
            label_prefix = self.img_prefix.replace('image', 'middle_texts')
            lines = mmcv.list_from_file(osp.join(label_prefix, str(image_id)))

            content = [line.strip().split(' ') for line in lines]
            bbox_names = [x[0] for x in content]
            bboxes = [ [float(info) for info in x[1:5]] for x in content]

            gt_bboxes = []
            gt_labels = []
            gt_bboxes_ignore = []
            gt_labels_ignore = []

            for bbox_name, bbox in zip(bbox_names, bboxes):
                if bbox_name in cat2label:
                    gt_bboxes.append(bbox)
                    gt_labels.append(cat2label[bbox_name])
                else:
                    gt_bboxes_ignore.append(bbox)
                    gt_labels_ignore.append(-1)
            
            data_anno = {
              'bboxes': np.array(gt_bboxes, dtype=np.float32).reshape(-1, 4),
              'labels': np.array(gt_labels, dtype=np.compat.long),
              'bboxes_ignore': np.array(gt_bboxes_ignore, dtype=np.float32).reshape(-1, 4),
              'labels_ignore': np.array(gt_labels_ignore, dtype=np.compat.long)
            }
            data_info.update(ann=data_anno)
            data_infos.append(data_info)
        return data_infos

In [5]:
import os
os.listdir('checkpoints')

['faster_rcnn_r50_caffe_fpn_mstrain_3x_coco_20210526_095054-1f77628b.pth',
 'faster_rcnn_r101_caffe_fpn_1x_coco.pth',
 'faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth',
 'fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_1x_coco-ae4d8b3d.pth']

In [7]:
config = 'configs/yolo/yolov3_d53_mstrain-608_273e_coco.py'
cfg = Config.fromfile(config)


cfg.runner['max_epochs'] = 12

cfg.log_config['interval'] = 1
cfg.model.bbox_head['num_classes'] = 4



cfg.dataset_type = 'Aimmo'
cfg.data_root = 'data/'




cfg.data.samples_per_gpu = 4
cfg.data.workers_per_gpu = 0

cfg.data.train.type = 'Aimmo'
cfg.data.train.ann_file = 'data/middle_train.txt'
cfg.data.train.img_prefix = 'data/image'

cfg.data.val.type = 'Aimmo'
cfg.data.val.ann_file = 'data/middle_val.txt'
cfg.data.val.img_prefix = 'data/image'

cfg.data.test.type = 'Aimmo'
cfg.data.test.ann_file = 'data/middle_sunday_test.txt'
cfg.data.test.img_prefix = 'data/test/image'


# modify num classes of the model in box head
cfg.model.bbox_head.num_classes = 4
# If we need to finetune a model based on a pre-trained detector, we need to
# use load_from to set the path of checkpoints.
# cfg.load_from = 'checkpoints/yolov3_d53_mstrain-608_273e_coco_20210518_115020-a2c3acb8.pth'

# The original learning rate (LR) is set for 8-GPU training.
# We divide it by 8 since we only use one GPU.
#cfg.optimizer.lr = 0.00125
#cfg.lr_config.warmup = None
cfg.log_config.interval = 10

# Change the evaluation metric since we use customized dataset.
cfg.evaluation.metric = 'bbox'
# We can set the evaluation interval to reduce the evaluation times
cfg.evaluation.interval = 1
# We can set the checkpoint saving interval to reduce the storage cost
cfg.checkpoint_config.interval = 1

#image resize
cfg.train_pipeline[4]['img_scale']=(960, 960)
cfg.test_pipeline[1]['img_scale']=(960, 960)
cfg.data.train.pipeline[4]['img_scale']=(960, 960)
cfg.data.val.pipeline[1]['img_scale']=(960, 960)
cfg.data.test.pipeline[1]['img_scale']=(960,960)

cfg.model.bbox_head.anchor_generator['base_sizes']=[[(116, 90), (110, 180), (330, 248)],
  [(30, 61), (70, 30), (59, 119)],
  [(7, 10), (11, 25), (28, 15)]]


# cfg.model.bbox_head.loss_cls = dict(type='FocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, reduction='mean',loss_weight=1.0,
#                  activated=False)

# Set seed thus the results are more reproducible
cfg.seed = 0
set_random_seed(0, deterministic=False)
cfg.gpu_ids = range(1)

# # We can also use tensorboard to log the training process
cfg.log_config.hooks = [
    dict(type='TextLoggerHook'),
    dict(type='TensorboardLoggerHook')]

cfg.data.samples_per_gpu = 6
cfg.data.workers_per_gpu = 0
cfg['device'] = 'cuda'

cfg.runner['max_epochs'] = 15


# We can initialize the logger for training and have a look
# at the final config used for training
print(f'Config:\n{cfg.pretty_text}')

Config:
checkpoint_config = dict(interval=1)
log_config = dict(
    interval=10,
    hooks=[dict(type='TextLoggerHook'),
           dict(type='TensorboardLoggerHook')])
custom_hooks = [dict(type='NumClassCheckHook')]
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = None
resume_from = None
workflow = [('train', 1)]
opencv_num_threads = 0
mp_start_method = 'fork'
auto_scale_lr = dict(enable=False, base_batch_size=64)
model = dict(
    type='YOLOV3',
    backbone=dict(
        type='Darknet',
        depth=53,
        out_indices=(3, 4, 5),
        init_cfg=dict(type='Pretrained', checkpoint='open-mmlab://darknet53')),
    neck=dict(
        type='YOLOV3Neck',
        num_scales=3,
        in_channels=[1024, 512, 256],
        out_channels=[512, 256, 128]),
    bbox_head=dict(
        type='YOLOV3Head',
        num_classes=4,
        in_channels=[512, 256, 128],
        out_channels=[1024, 512, 256],
        anchor_generator=dict(
            type='YOLOAnchorGenerator',
 

In [8]:
from mmdet.apis import multi_gpu_test, single_gpu_test
from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
from mmdet.apis import inference_detector, init_detector, show_result_pyplot
from mmdet.datasets import (build_dataloader, build_dataset,
                            replace_ImageToTensor)


checkpoint_file = 'yolox/epoch_12.pth'  # 학습한 weight file
model_ckpt = init_detector(cfg, checkpoint_file, device='cuda:0')
model_ckpt = MMDataParallel(model_ckpt, device_ids=[0])

dataset = build_dataset(cfg.data.test)
data_loader = build_dataloader(
        dataset,
        # 반드시 아래 samples_per_gpu 인자값은 1로 설정
        samples_per_gpu=1,
        workers_per_gpu=cfg.data.workers_per_gpu,
        dist=False,
        shuffle=False)

load checkpoint from local path: yolox/epoch_12.pth
The model and loaded state dict do not match exactly

unexpected key in source state_dict: ema_backbone_stem_conv_conv_weight, ema_backbone_stem_conv_bn_weight, ema_backbone_stem_conv_bn_bias, ema_backbone_stem_conv_bn_running_mean, ema_backbone_stem_conv_bn_running_var, ema_backbone_stem_conv_bn_num_batches_tracked, ema_backbone_stage1_0_conv_weight, ema_backbone_stage1_0_bn_weight, ema_backbone_stage1_0_bn_bias, ema_backbone_stage1_0_bn_running_mean, ema_backbone_stage1_0_bn_running_var, ema_backbone_stage1_0_bn_num_batches_tracked, ema_backbone_stage1_1_main_conv_conv_weight, ema_backbone_stage1_1_main_conv_bn_weight, ema_backbone_stage1_1_main_conv_bn_bias, ema_backbone_stage1_1_main_conv_bn_running_mean, ema_backbone_stage1_1_main_conv_bn_running_var, ema_backbone_stage1_1_main_conv_bn_num_batches_tracked, ema_backbone_stage1_1_short_conv_conv_weight, ema_backbone_stage1_1_short_conv_bn_weight, ema_backbone_stage1_1_short_conv_bn



In [9]:
outputs = single_gpu_test(model_ckpt, data_loader, True, 'my_result', 0.3)

[                                                  ] 0/393, elapsed: 0s, ETA:

  areas = (bboxes[:, 3] - bboxes[:, 1]) * (bboxes[:, 2] - bboxes[:, 0])
  areas = (bboxes[:, 3] - bboxes[:, 1]) * (bboxes[:, 2] - bboxes[:, 0])


[                                 ] 6/393, 1.3 task/s, elapsed: 5s, ETA:   308s

  areas = (bboxes[:, 3] - bboxes[:, 1]) * (bboxes[:, 2] - bboxes[:, 0])


[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 393/393, 1.5 task/s, elapsed: 269s, ETA:     0s

In [10]:
dataset.evaluate(outputs,metric='mAP')


---------------iou_thr: 0.5---------------


  area2 = (bboxes2[:, 2] - bboxes2[:, 0] + extra_length) * (
  area2 = (bboxes2[:, 2] - bboxes2[:, 0] + extra_length) * (
  area2 = (bboxes2[:, 2] - bboxes2[:, 0] + extra_length) * (
  area2 = (bboxes2[:, 2] - bboxes2[:, 0] + extra_length) * (
  area2 = (bboxes2[:, 2] - bboxes2[:, 0] + extra_length) * (
  area2 = (bboxes2[:, 2] - bboxes2[:, 0] + extra_length) * (
  area2 = (bboxes2[:, 2] - bboxes2[:, 0] + extra_length) * (
  area2 = (bboxes2[:, 2] - bboxes2[:, 0] + extra_length) * (
  area2 = (bboxes2[:, 2] - bboxes2[:, 0] + extra_length) * (
  area2 = (bboxes2[:, 2] - bboxes2[:, 0] + extra_length) * (
  area2 = (bboxes2[:, 2] - bboxes2[:, 0] + extra_length) * (
  area2 = (bboxes2[:, 2] - bboxes2[:, 0] + extra_length) * (
  bboxes2[:, 3] - bboxes2[:, 1] + extra_length)
  bboxes2[:, 3] - bboxes2[:, 1] + extra_length)
  bboxes2[:, 3] - bboxes2[:, 1] + extra_length)
  bboxes2[:, 3] - bboxes2[:, 1] + extra_length)
  area1 = (bboxes1[:, 2] - bboxes1[:, 0] + extra_length) * (
  area1 = (bbox


+------------+------+-------+--------+-------+
| class      | gts  | dets  | recall | ap    |
+------------+------+-------+--------+-------+
| car        | 2057 | 15798 | 0.000  | 0.000 |
| truck      | 911  | 2842  | 0.000  | 0.000 |
| bus        | 319  | 16074 | 0.000  | 0.000 |
| pedestrian | 549  | 4586  | 0.000  | 0.000 |
+------------+------+-------+--------+-------+
| mAP        |      |       |        | 0.000 |
+------------+------+-------+--------+-------+


OrderedDict([('AP50', 0.0), ('mAP', 0.0)])