# Yolov5 inference with WBF

In [None]:
import os
import cv2
import json
import random
import torch
import numpy as np
import pandas as pd
from shutil import copyfile
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

from google.colab import drive
drive.mount('/content/drive')

In [None]:
VERS = [
    'vclbyolo5_3',
    'vclbyolo5_4',
    'vclbyolo5_5'
]
WORK_DIR = '/content/drive/MyDrive/reef'
MDLS_PATHS = []
CONFIGS = []
for ver in VERS:
    mdl_path = f'{WORK_DIR}/models_{ver}'
    MDLS_PATHS.append(mdl_path)
    with open(f'{mdl_path}/config.json', 'r') as file:
        CONFIGS.append(json.load(file))
print('configs loaded:', CONFIGS)
DATA_PATH = f'{WORK_DIR}/data'
VAL_FOLD = CONFIGS[2]["val_video"]
YDATA_PATH = f'{WORK_DIR}/data_vv5_{VAL_FOLD}'

WBF_IOU_TH = .5, 
WBF_SKIP_BOX_TH = .0001

## Install packages

In [None]:
!pip install ensemble-boxes
!git clone https://github.com/ultralytics/yolov5
%cd yolov5
%pip install -qr requirements.txt

## Inference

In [None]:
MODELS = []
for mdl_path in MDLS_PATHS:
    model = torch.hub.load(
        '.', 
        'custom', 
        path=f'{mdl_path}/weights/best.pt',
        source='local',
        force_reload=True
    )
    model.conf = 0.01
    MODELS.append(model)
    print('loaded:', mdl_path)

In [None]:
def draw_boxes(img, bboxes, scores=[]):
    color = (0, 255, 0) if len(scores) > 0 else (0, 0, 255)
    for i in range(len(bboxes)):
        box = bboxes[i]
        text = '{} {:.1f}%'.format('pred', scores[i] * 100) if len(scores) > 0 else 'gt'
        x0 = int(box[0])
        y0 = int(box[1])
        x1 = int(box[2])
        y1 = int(box[3])
        cv2.rectangle(img, (x0, y0), (x1, y1), color, 2)
        cv2.putText(
            img, 
            text, 
            (x0, y0 - 3), 
            cv2.FONT_HERSHEY_PLAIN, 
            1.4, 
            color, 
            thickness=2
        )
    return img

In [None]:
train = pd.read_csv(f'{DATA_PATH}/train.csv')
train['pos'] = train.annotations != '[]'

In [None]:
from ensemble_boxes import *

In [None]:
def run_wbf(bboxes, scores, 
            img_size, iou_thr=0.55, 
            skip_box_thr=0.7, weights=None):
    #print('img shape for WBF:', img_size, img_size[1], img_size[0])
    boxes = [[[x[0] / img_size[1],
               x[1] / img_size[0],
               x[2] / img_size[1],
               x[3] / img_size[0]] for x in bbox] for bbox in bboxes]
    #print('boxes for WBF:', boxes)
    labels = [np.ones(len(s)) for s in scores]
    #print('scores for WBF:', scores)
    #print('labels for WBF:', labels)
    boxes, scores, labels = weighted_boxes_fusion(
        boxes, scores, labels, 
        weights=weights, iou_thr=iou_thr, 
        skip_box_thr=skip_box_thr
    )
    boxes = [[int(x[0] * img_size[1]),
              int(x[1] * img_size[0]),
              int(x[2] * img_size[1]),
              int(x[3] * img_size[0])] for x in boxes]
    return boxes, scores, labels

def predict(model, img, size, aug=True):
    results = model(img, size=size, augment=aug)
    preds = results.pandas().xyxy[0]
    bboxes = preds[['xmin','ymin','xmax','ymax']].values
    if len(bboxes):
        bboxes = bboxes.astype(int)
        confs = preds.confidence.values
        return bboxes, confs
    else:
        return [], []

In [None]:
%matplotlib inline

count = 0
max_count = 32

for i, x in train.iterrows():
    if x.video_id == VAL_FOLD:
        if not x.pos:
            continue
        count += 1
        img_path = f'{YDATA_PATH}/images/val/{x.image_id}.jpg'
        img = cv2.imread(img_path)
        print()
        print(count, '||', img_path, '||', img.shape)
        anno = eval(x.annotations)
        gt_bboxes = []
        for an in anno:
            gt_bboxes.append([
                an['x'],
                an['y'],
                an['x'] + an['width'], 
                an['y'] + an['height']               
            ])
        print('ground truth boxes:', gt_bboxes)
        pred_bboxes = []
        pred_scores = []
        for model, config in zip(MODELS, CONFIGS):
            bboxes, confs =  predict(model, img, size=config['resize'], aug=True)
            if len(bboxes) > 0:
                pred_bboxes.append(bboxes.tolist())
                pred_scores.append(confs.tolist())
        print('pred boxes BEFORE WBF:', pred_bboxes)
        print('pred scores BEFORE WBF:', pred_scores)
        pred_bboxes, pred_scores, labels = run_wbf(
            pred_bboxes, pred_scores, 
            img_size=img.shape, iou_thr=WBF_IOU_TH, 
            skip_box_thr=WBF_SKIP_BOX_TH, weights=None
        )
        print('pred boxes AFTER WBF:', pred_bboxes)
        print('pred scores AFTER WBF:', pred_scores)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = draw_boxes(img, gt_bboxes, scores=[])
        img = draw_boxes(img, pred_bboxes, scores=pred_scores)
        plt.figure(figsize=(20, 10))
        plt.imshow(img)
        plt.show()
        if count >= max_count:
            break