In [None]:
import os
import shutil
import yaml
import time
import json
import cv2
import random
import numpy as np
import pandas as pd
from glob import glob
import matplotlib.pyplot as plt
from sklearn.model_selection import GroupKFold
from tqdm.notebook import tqdm
import seaborn as sns
import torch
from IPython.display import Image, clear_output
from collections import Counter
from ensemble_boxes import *
import copy
import os.path as osp
import mmcv
import numpy as np
from mmdet.datasets.builder import DATASETS
from mmdet.datasets.custom import CustomDataset
from mmcv import Config
from mmdet.apis import set_random_seed
from mmdet.apis import inference_detector, init_detector, show_result_pyplot
from mmdet.datasets import build_dataset
from mmdet.models import build_detector
from mmdet.apis import train_detector
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

In [None]:
VER = 'v6'
TEST = False
PARAMS = {
    'version': VER,
    'folds': 7,
    'val_fold': 0,
    'img_size': 1024,
    'batch_size': 4,
    'epochs': 50,
    'seed': 2020,
    'sup': 'nms', # 'nms' or 'wbf'
    'iou_th': .5,
    'skip_box_th': .0001,
    # 0
    #'config': 'faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py',    
    # 1
    #'config': 'faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco.py',
    #'checkpoint': 'mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_bbox_mAP-0.408__segm_mAP-0.37_20200504_163245-42aa3d00.pth',
    # 2
    'config': 'vfnet/vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco.py',
    'checkpoint': 'vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco_20201027pth-6879c318.pth',
    # 3
    #'config': 'vfnet/vfnet_r101_fpn_mdconv_c3-c5_mstrain_2x_coco.py',
    #'checkpoint': 'vfnet_r101_fpn_mdconv_c3-c5_mstrain_2x_coco_20201027pth-7729adb5.pth',
    'comments': ''
}
DATA_PATH = '/u01/mrorange/vinbigdata/data'
WRK_DIR = f'{DATA_PATH}/workmmd'
IMGS_PATH = f'{DATA_PATH}/train_{PARAMS["img_size"]}'
MDLS_PATH = f'/u01/mrorange/vinbigdata/models_mmdet_{VER}'
if not os.path.exists(MDLS_PATH):
    os.mkdir(MDLS_PATH)
with open(f'{MDLS_PATH}/params.json', 'w') as file:
    json.dump(PARAMS, file)
    
def seed_all(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_all(PARAMS['seed'])
start_time = time.time()

In [None]:
label2color = [
    [59,  238, 119], [222, 21,  229], [94,  49,  164], 
    [206, 221, 133], [117, 75,    3], [210, 224, 119], 
    [211, 176, 166], [63,  7,   197], [102, 65,   77], 
    [194, 134, 175], [209, 219,  50], [255, 44,   47], 
    [89,  125, 149], [110, 27,  100]
]

def plot_img(img, size=(18, 18), is_rgb=True, title='', cmap='gray'):
    plt.figure(figsize=size)
    plt.imshow(img, cmap=cmap)
    plt.suptitle(title)
    plt.show()

def plot_imgs(imgs, cols=2, size=10, is_rgb=True, title='', cmap='gray', img_size=None):
    rows = len(imgs) // cols + 1
    fig = plt.figure(figsize=(cols * size, rows * size))
    for i, img in enumerate(imgs):
        if img_size is not None:
            img = cv2.resize(img, img_size)
        fig.add_subplot(rows, cols, i + 1)
        plt.axis('off')
        plt.imshow(img, cmap=cmap)
    plt.suptitle(title)
    plt.axis('off')
    
def draw_bbox(image, box, label, color, thickness=3):   
    alpha = .1
    alpha_box = .4
    overlay_bbox = image.copy()
    overlay_text = image.copy()
    output = image.copy()
    text_width, text_height = cv2.getTextSize(label.upper(), cv2.FONT_HERSHEY_SIMPLEX, .6, 1)[0]
    cv2.rectangle(overlay_bbox, 
                  (box[0], box[1]), 
                  (box[2], box[3]), 
                  color, -1)
    cv2.addWeighted(overlay_bbox, alpha, output, 1 - alpha, 0, output)
    cv2.rectangle(overlay_text, 
                  (box[0], box[1] - 7 - text_height), 
                  (box[0] + text_width + 2, box[1]),
                  (0, 0, 0), -1)
    cv2.addWeighted(overlay_text, alpha_box, output, 1 - alpha_box, 0, output)
    cv2.rectangle(output, 
                  (box[0], box[1]), 
                  (box[2], box[3]),
                  color, thickness)
    cv2.putText(output, 
                label.upper(), 
                (box[0], box[1]-5),
                cv2.FONT_HERSHEY_SIMPLEX, 
                .6, (255, 255, 255), 1, 
                cv2.LINE_AA)
    return output

In [None]:
if TEST: 
    !mkdir {WRK_DIR}/checkpoints
    !wget -c http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_bbox_mAP-0.408__segm_mAP-0.37_20200504_163245-42aa3d00.pth \
          -O {WRK_DIR}/checkpoints/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_bbox_mAP-0.408__segm_mAP-0.37_20200504_163245-42aa3d00.pth
    !wget -c https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmdetection/v2.0/vfnet/vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco/vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco_20201027pth-6879c318.pth \
        -O {WRK_DIR}/checkpoints/vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco_20201027pth-6879c318.pth
    !wget -c https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmdetection/v2.0/vfnet/vfnet_r101_fpn_mdconv_c3-c5_mstrain_2x_coco/vfnet_r101_fpn_mdconv_c3-c5_mstrain_2x_coco_20201027pth-7729adb5.pth \
        -O {WRK_DIR}/checkpoints/vfnet_r101_fpn_mdconv_c3-c5_mstrain_2x_coco_20201027pth-7729adb5.pth
    
    config = f'{WRK_DIR}/mmdetection/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco.py'
    checkpoint = f'{WRK_DIR}/checkpoints/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_bbox_mAP-0.408__segm_mAP-0.37_20200504_163245-42aa3d00.pth'
    model = init_detector(config, checkpoint, device='cuda:0')
    img = 'tomatokillers.jpg'
    result = inference_detector(model, img)
    show_result_pyplot(model, img, result, score_thr=.5, fig_size=(3, 2))
else:
    print('no test mode')

In [None]:
train_df = pd.read_csv(f'{DATA_PATH}/train.csv')
train_df['img_path'] = train_df.apply(lambda row: f'{IMGS_PATH}/{row.image_id}.png', axis =1)
print('train loaded:', train_df.shape)
meta_df = pd.read_csv(f'{DATA_PATH}/train_meta_{PARAMS["img_size"]}.csv')
print('meta loaded:', meta_df.shape)
train_df = pd.merge(train_df, meta_df, on='image_id')
print('merged:', train_df.shape)
train_df['x_min'] = train_df.apply(lambda row: PARAMS['img_size']*row.x_min/row.dim1, axis=1)
train_df['y_min'] = train_df.apply(lambda row: PARAMS['img_size']*row.y_min/row.dim0, axis=1)
train_df['x_max'] = train_df.apply(lambda row: PARAMS['img_size']*row.x_max/row.dim1, axis=1)
train_df['y_max'] = train_df.apply(lambda row: PARAMS['img_size']*row.y_max/row.dim0, axis=1)
train_df['width'] = train_df.apply(lambda row: row.x_max - row.x_min, axis=1)
train_df['height'] = train_df.apply(lambda row: row.y_max - row.y_min, axis=1)
train_df = train_df[train_df.class_id != 14].reset_index(drop = True)
class_ids, class_names = list(zip(*set(zip(train_df.class_id, train_df.class_name))))
classes = list(np.array(class_names)[np.argsort(class_ids)])
classes = list(map(lambda x: str(x), classes))
print('classes:', classes)
"""
train_df = train_df[(train_df.width >= 10) & 
                    (train_df.height >= 10) &
                    (train_df.x_min >= 1) & 
                    (train_df.y_min >= 1) &
                    (train_df.x_max <= PARAMS['img_size'] - 1) & 
                    (train_df.y_max <= PARAMS['img_size'] - 1)]
train_df.reset_index(inplace=True)
"""
gkf  = GroupKFold(n_splits=PARAMS['folds'])
train_df['fold'] = -1
for fold, (train_idx, val_idx) in enumerate(gkf.split(train_df, groups=train_df.image_id.tolist())):
    train_df.loc[val_idx, 'fold'] = fold

train_df.head()

In [None]:
with open(f'{WRK_DIR}/train.txt', 'w') as file:
    tr_ids = list(train_df[train_df['fold'] != 0].image_id.unique())
    print('train:', len(tr_ids))
    file.write('\n'.join(tr_ids))
with open(f'{WRK_DIR}/val.txt', 'w') as file:
    val_ids = list(train_df[train_df['fold'] == 0].image_id.unique())
    print('val:', len(val_ids))
    file.write('\n'.join(val_ids))

In [None]:
@DATASETS.register_module()
class VBDDataset(CustomDataset):
    CLASSES = classes.copy()
    ANN_DF = train_df.copy()
    def load_annotations(self, ann_file):
        cat2label = {k: i for i, k in enumerate(self.CLASSES)}
        image_list = mmcv.list_from_file(self.ann_file)
        data_infos = []
        for image_id in image_list:
            img_anns = self.ANN_DF[self.ANN_DF.image_id == image_id]
            filename = img_anns['img_path'].values[0]
            data_info = dict(
                filename=filename, 
                width=PARAMS['img_size'], 
                height=PARAMS['img_size']
            )
            #print('=========== BEFORE ===========')
            #print('labels:', img_anns['class_id'].tolist())
            #print('boxes:', img_anns[['x_min', 'y_min', 'x_max', 'y_max']])
            boxes_list = []
            scores_list = []
            labels_list = []
            boxes_img = []
            labels_img = []
            cls_ids = img_anns['class_id'].unique().tolist()
            count_dict = Counter(img_anns['class_id'].tolist())
            for cid in cls_ids:
                if count_dict[cid] == 1:
                    labels_img.append(cid)
                    boxes_img.append(
                        img_anns[
                            img_anns.class_id == cid
                        ][
                            ['x_min', 'y_min', 'x_max', 'y_max']
                        ].to_numpy().squeeze().tolist()
                    )
                else:
                    cls_list =img_anns[img_anns.class_id == cid]['class_id'].tolist()
                    #labels_list.append(cls_list)
                    labels_list.extend(cls_list)
                    bbox = img_anns[
                        img_anns.class_id == cid
                    ][
                        ['x_min', 'y_min', 'x_max', 'y_max']
                    ].to_numpy() / PARAMS['img_size']
                    #boxes_list.append(bbox.tolist())
                    #scores_list.append(np.ones(len(cls_list)).tolist())
                    #weights.append(1)
                    boxes_list.extend(bbox.tolist())
                    scores_list.extend(np.ones(len(cls_list)).tolist())
            #print('norm:', boxes_list)
            #print('labels:', labels_list)
            #print('scores:', scores_list)
            if PARAMS['sup'] == 'nms':
                boxes, scores, box_labels = nms(
                    boxes=[boxes_list], 
                    scores=[scores_list], 
                    labels=[labels_list], 
                    #weights=weights,
                    weights=None,
                    iou_thr=PARAMS['iou_th']
                )
            elif PARAMS['sup'] == 'wbf':
                boxes, scores, box_labels = weighted_boxes_fusion(
                    boxes_list=[boxes_list], 
                    scores_list=[scores_list],
                    labels_list=[labels_list], 
                    weights=None,
                    iou_thr=PARAMS['iou_th'],
                    skip_box_thr=PARAMS['skip_box_th']
                )
            else:
                raise AttributeError('wrong supression param')
            try:
                boxes *= PARAMS['img_size']
                boxes = boxes.tolist()
                #print('back from norm:', boxes)
                box_labels = box_labels.astype(int).tolist()
                boxes.extend(boxes_img)
                box_labels.extend(labels_img)
                gt_labels = box_labels #img_anns['class_id'].tolist()
                gt_bboxes = boxes #img_anns[['x_min', 'y_min', 'x_max', 'y_max']]
            except:
                gt_labels = labels_img
                gt_bboxes = boxes_img
            #print('=========== AFTER ===========')
            #print('labels:', gt_labels)
            #print('boxes:', gt_bboxes)
            data_anno = dict(
                bboxes=np.array(gt_bboxes, dtype=np.float32).reshape(-1, 4),
                labels=np.array(gt_labels, dtype=np.long)
            )
            data_info.update(ann=data_anno)
            data_infos.append(data_info)
        return data_infos

In [None]:
import albumentations as A
train_transforms = A.Compose([
    A.OneOf([
        A.RandomBrightness(limit=.2, p=1), 
        A.RandomContrast(limit=.2, p=1), 
        A.RandomGamma(p=1)
    ], p=.5),
    A.OneOf([
        A.Blur(blur_limit=3, p=1),
        A.MedianBlur(blur_limit=3, p=1)
    ], p=.5),
    A.HorizontalFlip(p=.5),
    #A.Transpose(p=.25),
    #A.RandomRotate90(p=.25),
    A.ShiftScaleRotate(p=.5)
])

In [None]:
cfg = Config.fromfile(f'{WRK_DIR}/mmdetection/configs/{PARAMS["config"]}')
cfg.load_from = f'{WRK_DIR}/checkpoints/{PARAMS["checkpoint"]}'
#cfg.model.roi_head.bbox_head.num_classes = 14
cfg.model.bbox_head.num_classes = 14 # VFNet option
#cfg.model.rpn_head.loss_bbox=dict(
#    type='IoULoss', 
#    loss_weight=1.0)
cfg.dump(f'{MDLS_PATH}/init_config.py')

cfg.train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='LoadAnnotations', with_bbox=True),
    dict(
        type='Resize',
        img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
                   (1333, 768), (1333, 800)],
        multiscale_mode='value',
        keep_ratio=True),
    #dict(type='RandomFlip', flip_ratio=0.25),
    ########################################
    # Note that this key is part of bbox_params. 
    # Their difference is format='pascal_voc' means [x1, y1, x2, y2] style box encoding, 
    # while format='coco' means [x, y, w, h].
    dict(
        type='Albu',
        transforms=train_transforms,
        bbox_params=dict(
            type='BboxParams',
            format='pascal_voc',
            label_fields=['gt_labels'],
            min_visibility=0.0,
            filter_lost_elements=True),
        keymap={
            'img': 'image',
            #'gt_masks': 'masks',
            'gt_bboxes': 'bboxes',
        },
        update_pad_shape=False,
        skip_img_without_anno=True),
    #########################################
    dict(
        type='Normalize',
        mean=[103.53, 116.28, 123.675],
        std=[1.0, 1.0, 1.0],
        to_rgb=False),
    dict(type='Pad', size_divisor=32),
    dict(type='DefaultFormatBundle'),
    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
]

cfg.dataset_type = 'VBDDataset'
cfg.data_root = DATA_PATH
cfg.data.test.type = 'VBDDataset'
cfg.data.test.data_root = DATA_PATH
cfg.data.test.ann_file = f'{WRK_DIR}/train.txt'
cfg.data.test.img_prefix = ''
cfg.data.train.type = 'VBDDataset'
cfg.data.train.data_root = DATA_PATH
cfg.data.train.ann_file = f'{WRK_DIR}/train.txt'
cfg.data.train.img_prefix = ''
cfg.data.val.type = 'VBDDataset'
cfg.data.val.data_root = DATA_PATH
cfg.data.val.ann_file = f'{WRK_DIR}/val.txt'
cfg.data.val.img_prefix = ''
cfg.work_dir = MDLS_PATH

cfg.optimizer.lr = .02 / (8 * 16 / PARAMS['batch_size'])
#cfg.optimizer = dict(type='Adam', lr=.001)
cfg.optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
cfg.lr_config = dict(
    policy='CosineAnnealing',
    warmup='exp',
    warmup_iters=500,
    warmup_ratio=.1,
    min_lr_ratio=1e-5
)

cfg.log_config.interval = 128
cfg.runner.max_epochs = PARAMS['epochs']
cfg.checkpoint_config.interval = 1
cfg.evaluation = dict(interval=1, metric='mAP', save_best='mAP')

cfg.seed = PARAMS['seed']
set_random_seed(0, deterministic=False)

cfg.gpu_ids = range(1)
cfg.data.samples_per_gpu = PARAMS['batch_size']
cfg.data.workers_per_gpu = PARAMS['batch_size']
#cfg.workflow = [('train', 1), ('val', 1)]
cfg.workflow = [('train', 1)]

cfg.dump(f'{MDLS_PATH}/train_config.py')
print(f'Config:\n{cfg.pretty_text}')

In [None]:
datasets = [build_dataset(cfg.data.train)]
if len(cfg.workflow) == 2:
    datasets.append(build_dataset(cfg.data.val))
model = build_detector(
    cfg.model, 
    train_cfg=cfg.get('train_cfg'), 
    test_cfg=cfg.get('test_cfg'))
model.CLASSES = datasets[0].CLASSES
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
train_detector(model, datasets, cfg, distributed=False, validate=True)

In [None]:
imgs = []
sample = train_df.sample(n=3)['image_id'].values
for img_id in sample:
    boxes = train_df.loc[
        train_df['image_id'] == img_id,
        ['x_min', 'y_min', 'x_max', 'y_max']
    ].values
    img_labels = train_df.loc[
        train_df['image_id'] == img_id, 
        ['class_id']
    ].values.squeeze()
    path = train_df.loc[
        train_df['image_id'] == img_id,
        ['img_path']
    ].values[0][0]
    img = cv2.imread(path)
    for label_id, box in zip(img_labels, boxes):
        color = label2color[label_id]
        img = draw_bbox(
            img, 
            list(np.int_(box)), 
            classes[label_id], 
            color
        )
    imgs.append(img)
plot_imgs(imgs, size=8, cols=3, cmap=None)

imgs = []
checkpoint = f'{MDLS_PATH}/epoch_21.pth'
cfg = f'{MDLS_PATH}/init_config.py'
model_test = init_detector(cfg, checkpoint, device='cuda:0')
for img_id in sample:
    path = train_df.loc[
        train_df['image_id'] == img_id,
        ['img_path']
    ].values[0][0]
    img = mmcv.imread(path)
    result = inference_detector(model_test, img)
    #show_result_pyplot(model_test, img, result, score_thr=.2)
    boxes_list = [list(x[:, :4] / PARAMS['img_size']) for x in result if x.shape[0] != 0]
    boxes_list =  [item for sublist in boxes_list for item in sublist]
    scores_list = [x[:, 4].tolist() for x in result if x.shape[0] != 0]
    scores_list =  [item for sublist in scores_list for item in sublist]
    labels_list = [[i] * x.shape[0] for i, x in enumerate(result) if x.shape[0] != 0]
    labels_list =  [item for sublist in labels_list for item in sublist]
    boxes, scores, box_labels = nms(
        boxes=[boxes_list], 
        scores=[scores_list], 
        labels=[labels_list], 
        weights=None,
        iou_thr=.5
    )
    boxes *= PARAMS['img_size']
    for label_id, box, score in zip(box_labels, boxes, scores):
        if score >= .3:
            color = label2color[label_id]
            img = draw_bbox(
                img, 
                list(np.int_(box)), 
                classes[label_id], 
                color
            )
    imgs.append(img)
plot_imgs(imgs, size=8, cols=3, cmap=None)