In [1]:
import sys
sys.path.append('../input/easydict-master/easydict-master/')
sys.path.append('../input/weightedboxfusion/')
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')
# https://github.com/mikel-brostrom/Yolov5_DeepSort_Pytorch
sys.path.append('../input/yolov5-deepsort-pytorch/Yolov5_DeepSort_Pytorch-master/Yolov5_DeepSort_Pytorch-master/deep_sort_pytorch/')
# Install helmet-assignment helper code
!pip install ../input/helmet-assignment-helpers/helmet-assignment-main/ > /dev/null 2>&1

In [2]:
import os
import gc
import re
import cv2
import yaml
import torch
import random
import shutil
import itertools
import subprocess
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from shutil import copyfile
from multiprocessing import Pool
from IPython.core.display import Video, display
from scipy.spatial.distance import cdist
from scipy.optimize import minimize, minimize_scalar
from sklearn.neighbors import KDTree
from sklearn.model_selection import train_test_split
from ensemble_boxes import nms

from helmet_assignment.score import NFLAssignmentScorer, check_submission
from helmet_assignment.features import add_track_features
from deep_sort.deep_sort import DeepSort
from utils.parser import get_config

import timm
import torch
import torch.nn.functional as F
import torchvision
from torch import nn
from torch.cuda.amp import autocast, GradScaler
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from torch.nn.modules.loss import _WeightedLoss
from torchvision import models
from torchvision import transforms

from albumentations.pytorch import ToTensorV2
from albumentations import (
    HorizontalFlip, VerticalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90,
    Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue,
    IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, IAAPiecewiseAffine, RandomResizedCrop,
    IAASharpen, IAAEmboss, RandomBrightnessContrast, Flip, OneOf, Compose, Normalize, Cutout, CoarseDropout,
    ShiftScaleRotate, CenterCrop, Resize)

# Configuration

In [3]:
CFG = {
    # Fast submission
    "nosave"        : True if len(os.listdir('../input/nfl-health-and-safety-helmet-assignment/test/')) == 6 else False,
    # Global setting
    "seed"          : 42,
    "num_workers"   : 4,
    "device"        : "cuda:0" if torch.cuda.is_available() else "cpu",
    "input_path"    : '../input/nfl-health-and-safety-helmet-assignment/test/',
    "output_path"   : "./inference/",
    "output_cnnpath": "./inference_cnn/",
    # Setting for own YOLOv5 model
    "use_own_models": False,
    "save_zip"      : False,
    "yolo_weight"   : '../input/nfl-helmet-assignment-yolov5-models/result_0/best.pt ../input/nfl-helmet-assignment-yolov5-models/result_1/best.pt ../input/nfl-helmet-assignment-yolov5-models/result_2/best.pt',
    "yolo_size"     : 800,
    "train_size"    : (480, 800),
    "org_size"      : (720, 1280),
    "thr_cut_area"  : 0.2,
    # Setting for postprocessing with cnn
    "model"         : "tf_efficientnetv2_s_in21k",
    "batch_size"    : 256,
    "cnn_weight"    : '../input/nfl-helmet-assignment-cnn-models/helmet_cnn.pt',
    "cnn_size"      : 128,
    "width"         : 4,
    "height"        : 4,
    "thr_do_cnn"    : 0,
    # Setting for ensembling baseline model and own model
    "thr_ens_iou"   : 1,
    "thr_min_conf"  : 0.3,
    # Setting for postprocessing to trace helmet
    "add_helmet_num": 10,
    "thr_strace_iou": 0.2,
    "thr_etrace_iou": 0.3,
    "thr_continuous": 3,
    "thr_add_helmet": 0.8,
    "thr_add_iou"   : 0.5,
    # Setting for postprocessing to delete FP helmet
    "thr_del_iou"   : 0.6,
    "thr_del_trace" : 10,
    "thr_del_conf"  : 0.5,
    # Setting for finding label with tracking data
    "dig_max"       : 30,
    "dig_step"      : 5,
    "max_iter"      : 1000
}

CFG

{'nosave': True,
 'seed': 42,
 'num_workers': 4,
 'device': 'cuda:0',
 'input_path': '../input/nfl-health-and-safety-helmet-assignment/test/',
 'output_path': './inference/',
 'output_cnnpath': './inference_cnn/',
 'use_own_models': False,
 'save_zip': False,
 'yolo_weight': '../input/nfl-helmet-assignment-yolov5-models/result_0/best.pt ../input/nfl-helmet-assignment-yolov5-models/result_1/best.pt ../input/nfl-helmet-assignment-yolov5-models/result_2/best.pt',
 'yolo_size': 800,
 'train_size': (480, 800),
 'org_size': (720, 1280),
 'thr_cut_area': 0.2,
 'model': 'tf_efficientnetv2_s_in21k',
 'batch_size': 256,
 'cnn_weight': '../input/nfl-helmet-assignment-cnn-models/helmet_cnn.pt',
 'cnn_size': 128,
 'width': 4,
 'height': 4,
 'thr_do_cnn': 0,
 'thr_ens_iou': 1,
 'thr_min_conf': 0.3,
 'add_helmet_num': 10,
 'thr_strace_iou': 0.2,
 'thr_etrace_iou': 0.3,
 'thr_continuous': 3,
 'thr_add_helmet': 0.8,
 'thr_add_iou': 0.5,
 'thr_del_iou': 0.6,
 'thr_del_trace': 10,
 'thr_del_conf': 0.5,
 

In [4]:
def rescale(x, y, w, h, tr_h, tr_w, or_h, or_w):
    # Rescale to training size
    x_tr = x * tr_w
    y_tr = y * tr_h
    w_tr = w * tr_w
    h_tr = h * tr_h
    # Rescale to original size
    x_or = int(or_w * x_tr / tr_w)
    y_or = int(or_h * y_tr / tr_h)
    w_or = int(or_w * w_tr / tr_w)
    h_or = int(or_h * h_tr / tr_h)
    # Minimum size should be 5.
    # https://www.kaggle.com/c/nfl-health-and-safety-helmet-assignment/discussion/277388
    w_or = 5 if w_or <= 5 else w_or
    h_or = 5 if h_or <= 5 else h_or
    # Calculate left and top by using center values
    left = 0 if x_or - w_or/2 < 0 else x_or - int(w_or/2)
    top  = 0 if y_or - h_or/2 < 0 else y_or - int(h_or/2)
    return [left, w_or, top, h_or]

def read_label(target_file, train_shape, org_shape):
    filename  = os.path.basename(target_file)[:-4]
    frame_num = int(filename[filename.rfind("_")+1:])
    with open(target_file, 'r+') as file:
        detect_result = file.read()
    detect_result = list(map(float, re.split(r'[\n ]', detect_result)[:-1]))
    detect_result = np.array(detect_result).reshape(-1,6)
    
    detect_results = []
    for i, v in enumerate(detect_result):
        rescaled = rescale(*v[1:5], *train_shape, *org_shape)
        detect_results.append([frame_num, filename,
                               int(detect_result[i,0]), *rescaled, detect_result[i,5]])

    return pd.DataFrame(detect_results, columns=["num","video_frame","label","left","width","top","height","conf"])

def get_range(df, org_size):
    oh, ow = org_size[:2]
    ratio  = ow/oh
    x_min, x_max = df.left.min(),  df.right.max()
    y_min, y_max = df.top.min(),   df.bottom.max()
    w_max, h_max = df.width.max(), df.height.max()
    x_min = 0  if x_min < w_max else x_min - w_max
    x_max = ow if x_max > ow - w_max else x_max + w_max
    y_min = 0  if y_min < h_max else y_min - h_max
    y_max = oh if y_max > oh -h_max  else y_max + h_max
    
    cw = x_max - x_min
    ch = y_max - y_min
    rw = cw / ow
    rh = ch / oh 
    if rw / rh > 0:
        r = int((cw / ratio - ch) / 2)
        y_min = 0  if y_min - r < 0  else y_min - r
        y_max = oh if y_max + r > oh else y_max + r
    else:
        r = int((ch * ratio - cw) / 2)
        x_min = 0  if x_min - r < 0  else x_min - r
        x_max = ow if x_max + r > ow else x_max + r
    
    return x_min, x_max, y_min, y_max

# Inference
Inference with own YOLOv5 model to detect helmet.

In [5]:
!mkdir /root/.config/Ultralytics
!cp ../input/arial-font/arial.ttf /root/.config/Ultralytics/Arial.ttf
!cp -r ../input/yolov5-11-march-2021/yolov5-master ./yolov5

In [6]:
baseline = pd.read_csv(f'../input/nfl-health-and-safety-helmet-assignment/test_baseline_helmets.csv')
baseline["right"]  = baseline.left + baseline.width
baseline["bottom"] = baseline.top  + baseline.height
baseline.head(1)

Unnamed: 0,video_frame,left,width,top,height,conf,right,bottom
0,58102_002798_Sideline_1,402,18,418,20,0.488037,420,438


In [7]:
%%time

df_all_results = pd.DataFrame()

if not CFG["nosave"] and CFG["use_own_models"]:
    for f in os.listdir(CFG["input_path"]):
        !rm -rf ./inference/

        in_path      = CFG["input_path"]  + f
        out_img_path = CFG["output_path"] + "frames/"
        out_cut_path = CFG["output_path"] + "cut_frames/"
        out_res_path = CFG["output_path"] + f[:-4]
        image_name   = f[:-4]
        extention    = "jpg"
        # Make save directories
        os.makedirs(out_img_path, exist_ok=True)
        os.makedirs(out_cut_path, exist_ok=True)
        os.makedirs(out_res_path, exist_ok=True)
        # Split into frames
        cmd = 'ffmpeg -i \"{}\" -qscale:v 2 \"{}/{}_%d.{}\"'.format(in_path, out_img_path, image_name, extention)
        subprocess.call(cmd, shell=True)

        # Cut the area for expanding helmet size
        df_baseline_cut = pd.DataFrame()
        for f in tqdm(os.listdir(out_img_path)):
            df = baseline[(baseline.video_frame==f[:-4])&(baseline.conf>CFG["thr_cut_area"])].copy()
            x_min, x_max, y_min, y_max = get_range(df, CFG["org_size"])
            img = cv2.imread(out_img_path + f)
            img_cut = img[y_min:y_max, x_min:x_max]
            df["x_min"] = x_min
            df["x_max"] = x_max
            df["y_min"] = y_min
            df["y_max"] = y_max
            cv2.imwrite(out_cut_path + f, img_cut)
            df_baseline_cut = df_baseline_cut.append(df)
        df_baseline_cut = df_baseline_cut[["video_frame","x_min","x_max","y_min","y_max"]].drop_duplicates()
        df_baseline_cut.reset_index(drop=True, inplace=True)

        # Detect helmets with YOLOv5
        if CFG["save_zip"]:
            !python ./yolov5/detect.py \
                          --weights {CFG["yolo_weight"]} \
                          --source {out_img_path} \
                          --img {CFG["yolo_size"]} \
                          --device 0 \
                          --save-txt \
                          --save-conf \
                          --project {out_res_path}
            # Save for debugging results
            shutil.make_archive(f"{image_name}_{i}", 'zip', root_dir=out_res_path)
        else:
            !python ./yolov5/detect.py \
                          --weights {CFG["yolo_weight"]} \
                          --source {out_cut_path} \
                          --img {CFG["yolo_size"]} \
                          --device 0 \
                          --nosave \
                          --save-txt \
                          --save-conf \
                          --project {out_res_path}

        # Read all of results
        label_path = out_res_path + "/exp/labels/"
        df_results = pd.DataFrame()
        for l in os.listdir(label_path):
            df_resize2org = df_baseline_cut[df_baseline_cut.video_frame==l[:-4]]
            or_h = df_resize2org.y_max - df_resize2org.y_min
            or_w = df_resize2org.x_max - df_resize2org.x_min
            df_res = read_label(label_path + l, CFG["train_size"], (or_h,or_w))
            df_results = df_results.append(df_res)
        df_results = df_results.sort_values("num").reset_index(drop=True)
        # Adjust left and top with cropped area
        df_results = df_results.merge(df_baseline_cut, on="video_frame")
        df_results["left"] += df_results["x_min"]
        df_results["top"]  += df_results["y_min"]
        df_all_results = df_all_results.append(df_results)

    df_all_results.reset_index(drop=True, inplace=True)
else:
    df_all_results = pd.DataFrame([], columns=["num","label"])

CPU times: user 2.41 ms, sys: 0 ns, total: 2.41 ms
Wall time: 3.02 ms


In [8]:
print(df_all_results.shape)
df_all_results.head()

(0, 2)


Unnamed: 0,num,label


In [9]:
df_all_results.to_csv("yolo_results.csv", index=False)

# Ensemble with baseline
In this competition, baseline result has been provided by organizer, so ensemble baseline result with own model results generated above.

In [10]:
baseline      = pd.read_csv(f'../input/nfl-health-and-safety-helmet-assignment/test_baseline_helmets.csv')
model_results = df_all_results.drop(["num","label"], axis=1).copy()
df_with_baseline = pd.concat([model_results, baseline])
df_with_baseline = df_with_baseline[(df_with_baseline.width>0)&(df_with_baseline.height>0)]
df_with_baseline = df_with_baseline[df_with_baseline.conf>CFG["thr_min_conf"]].reset_index(drop=True)
df_with_baseline["label"]   = 0
df_with_baseline["right"]   = df_with_baseline.left + df_with_baseline.width
df_with_baseline["bottom"]  = df_with_baseline.top  + df_with_baseline.height
df_with_baseline["left"]   /= 1280
df_with_baseline["right"]  /= 1280
df_with_baseline["top"]    /= 720
df_with_baseline["bottom"] /= 720

print(baseline.shape, model_results.shape, df_with_baseline.shape)
df_with_baseline.head(2)

(72386, 6) (0, 0) (57201, 9)


Unnamed: 0,video_frame,left,width,top,height,conf,label,right,bottom
0,58102_002798_Sideline_1,0.314063,18,0.580556,20,0.488037,0,0.328125,0.608333
1,58102_002798_Sideline_1,0.291406,21,0.597222,23,0.748047,0,0.307812,0.629167


In [11]:
if CFG["thr_ens_iou"] < 1:
    # Run NMS to delete duplicate BBoxes.
    df_ensembled = pd.DataFrame()
    for v in tqdm(df_with_baseline.video_frame.unique()):
        df = df_with_baseline[df_with_baseline.video_frame==v].copy()
        boxes  = [np.array(df[["left","top","right","bottom"]]).tolist()]
        scores = [list(np.array(df.conf))]
        labels = [list(np.array(df.label))]
        boxes, scores, labels = nms(boxes, scores, labels, weights=None, iou_thr=CFG["thr_ens_iou"])
        df_nms = pd.DataFrame(np.hstack([boxes, scores.reshape(-1,1)]))
        df_nms["video_frame"] = v
        df_ensembled = df_ensembled.append(df_nms)
else:
    df_ensembled = df_with_baseline[["left","top","right","bottom","conf","video_frame"]].copy()

In [12]:
df_ensembled.columns   = ["left","top","right","bottom","conf","video_frame"]
df_ensembled["left"]   = (df_ensembled.left *1280).astype(int)
df_ensembled["right"]  = (df_ensembled.right*1280).astype(int)
df_ensembled["top"]    = (df_ensembled.top   *720).astype(int)
df_ensembled["bottom"] = (df_ensembled.bottom*720).astype(int)
df_ensembled["width"]  = df_ensembled.right  - df_ensembled.left
df_ensembled["height"] = df_ensembled.bottom - df_ensembled.top
df_ensembled = df_ensembled[["video_frame","left","width","top","height","conf"]]

In [13]:
print(df_ensembled.shape)
df_ensembled.head(2)

(57201, 6)


Unnamed: 0,video_frame,left,width,top,height,conf
0,58102_002798_Sideline_1,402,18,418,19,0.488037
1,58102_002798_Sideline_1,373,21,430,23,0.748047


In [14]:
df_ensembled.to_csv("ensembled_results.csv", index=False)

# Postprosessing
Remove not helmet bbox using CNN helmet detector.

In [15]:
def check(l, r, t, b):
    if l < 0 or 1280 < r:
        return False
    if t < 0 or 720 < b:
        return False
    return True

def get_img(path):
    im_bgr = cv2.imread(path)
    im_rgb = im_bgr[:, :, ::-1]
    return im_rgb

In [16]:
class Model(nn.Module):
    def __init__(self, model_name, pretrained=True):
        super(Model, self).__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained, in_chans=3)
        self.n_features = self.model.classifier.in_features
        self.model.classifier = nn.Linear(self.n_features, 1)

    def forward(self, x):
        output = self.model(x)
        return output

class NFLDataset(Dataset):
    def __init__(self, df, transforms=None):
        super().__init__()
        self.df = df.reset_index(drop=True).copy()
        self.transforms   = transforms
        
    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, index: int):
        img = get_img(self.df.loc[index]['path'])
        if self.transforms:
            img = self.transforms(image=img)['image']
        return img
    
def get_inference_transforms():
    return Compose([
            Resize(CFG['cnn_size'], CFG['cnn_size']),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
            ToTensorV2(p=1.0)], p=1)

def inference_one_epoch(model, data_loader, device):
    model.eval()

    image_preds_all = []
    pbar = tqdm(enumerate(data_loader), total=len(data_loader))
    for step, (imgs) in pbar:
        imgs = imgs.to(device).float()
        image_preds = model(imgs)
        image_preds_all += [image_preds.detach().cpu().numpy()]
        
    image_preds_all = np.concatenate(image_preds_all, axis=0)
    return image_preds_all

In [17]:
if not CFG["nosave"] and 0 < CFG["thr_do_cnn"]:
    !rm -rf ./inference/
    !rm -rf {CFG['output_cnnpath']}

    cnn_results = []
    for f in os.listdir(CFG["input_path"]):
        !rm -rf ./inference/

        print(f)
        in_path      = CFG["input_path"]  + f
        out_img_path = CFG["output_path"] + "frames/"
        out_res_path = f"{CFG['output_cnnpath']}{f}/"
        image_name = f[:-4]
        extention  = "jpg"
        # Make save directories
        os.makedirs(out_img_path, exist_ok=True)
        os.makedirs(out_res_path, exist_ok=True)
        # Split into frames
        cmd = 'ffmpeg -i \"{}\" -qscale:v 2 \"{}/{}_%d.{}\"'.format(in_path, out_img_path, image_name, extention)
        subprocess.call(cmd, shell=True)

        for f2 in tqdm(os.listdir(out_img_path)):
            img = cv2.imread(out_img_path + f2)
            df  = df_ensembled[df_ensembled.video_frame==f2[:-4]].reset_index(drop=True)
            for i, (l, w, t, h, c) in df[["left","width","top","height","conf"]].iterrows():
                # Expand the cut size. The helmet should be placed center of image.
                l, w, t, h = int(l), int(w), int(t), int(h)
                ad_r = l + w + int(w/2*(CFG["width"] -1))
                ad_b = t + h + int(h/2*(CFG["height"]-1))
                ad_l = l - int(w/2*(CFG["width"] -1))
                ad_t = t - int(h/2*(CFG["height"]-1))
                path = ""
                do_inference = False
                # If expanded size is over the original size, it will not be used in prediction.
                if c < CFG["thr_do_cnn"] and check(ad_l, ad_r, ad_t, ad_b):
                    path = f"{out_res_path}{f2.replace('.jpg',f'_{i}.jpg')}"
                    cv2.imwrite(path, img[ad_t:ad_b, ad_l:ad_r])
                    do_inference = True
                cnn_results.append([f2[:-4], path, l, w, t, h, do_inference])

        !rm -rf {out_img_path}

In [18]:
if not CFG["nosave"] and 0 < CFG["thr_do_cnn"]:
    df_inference = pd.DataFrame(cnn_results, columns=["video_frame","path","left","width","top","height","do_inference"])

    print(df_inference.do_inference.value_counts())
    print(df_inference.shape)
    df_inference.head(2)

In [19]:
if not CFG["nosave"] and 0 < CFG["thr_do_cnn"]:
    # Define a model (EfficientNetV2)
    model = Model(CFG['model'], pretrained=False)
    model.load_state_dict(torch.load(CFG['cnn_weight']))
    model.to(CFG["device"])

    dataset = NFLDataset(df_inference[df_inference.do_inference],
                         transforms=get_inference_transforms())
    data_loader = torch.utils.data.DataLoader(dataset, 
                                              batch_size=CFG['batch_size'],
                                              num_workers=CFG['num_workers'],
                                              shuffle=False,
                                              pin_memory=False)

    with torch.no_grad():
        preds = inference_one_epoch(model, data_loader, CFG["device"])

In [20]:
if not CFG["nosave"] and 0 < CFG["thr_do_cnn"]:
    df_inference_done = df_inference[df_inference.do_inference].copy()
    df_inference_done["pred"] = preds
    # Concatenate results of CNN and the original data that is not used in CNN.
    df_inference = pd.concat([df_inference_done,
                              df_inference[df_inference.do_inference==False]]).reset_index(drop=True)
    df_inference.loc[df_inference.pred.isnull(), "pred"] = 1

    print(df_ensembled.shape)
    merge_cols   = ["video_frame","left","width","top","height"]
    df_ensembled = df_ensembled.merge(df_inference[(df_inference.pred > 0)][merge_cols], on=merge_cols)
    df_ensembled.reset_index(drop=True, inplace=True)

In [21]:
print(df_ensembled.shape)
df_ensembled.head()

(57201, 6)


Unnamed: 0,video_frame,left,width,top,height,conf
0,58102_002798_Sideline_1,402,18,418,19,0.488037
1,58102_002798_Sideline_1,373,21,430,23,0.748047
2,58102_002798_Sideline_1,711,20,405,21,0.827148
3,58102_002798_Sideline_1,392,21,452,22,0.82959
4,58102_002798_Sideline_1,457,19,366,23,0.837402


In [22]:
df_ensembled.to_csv("ensembled_results_with_postprocessing.csv", index=False)

# Postprocessing to trace helmets
In this section, I implemented tracing function. This competition data is video and fps is about 60, so if the helmet can not be detected specific image, we can add BBoxes manually by using before and after BBoxes info.

In [23]:
def calc_iou(a, b):
    # Calculate 1:N IoU
    a_area = (a[2] - a[0] + 1)     * (a[3] - a[1] + 1)
    b_area = (b[:,2] - b[:,0] + 1) * (b[:,3] - b[:,1] + 1)
    
    abx_mn = np.maximum(a[0], b[:,0]) # xmin
    aby_mn = np.maximum(a[1], b[:,1]) # ymin
    abx_mx = np.minimum(a[2], b[:,2]) # xmax
    aby_mx = np.minimum(a[3], b[:,3]) # ymax
    w = np.maximum(0, abx_mx - abx_mn + 1)
    h = np.maximum(0, aby_mx - aby_mn + 1)
    intersect = w*h
    
    iou = intersect / (a_area + b_area - intersect)
    return iou

def get_sorted_idx(c, n):
    # Get IoU ordered list. High IoU BBox is prioritized.
    next_helmets = np.array([n.left, n.top, n.right, n.bottom]).T
    idx_ious     = [[idx, calc_iou(np.array([cv[1], cv[3], cv[9], cv[10]]), next_helmets).max()] for idx, cv in enumerate(np.array(c))]
    idx_ious     = np.array(idx_ious)
    sorted_idx   = idx_ious[np.argsort(idx_ious[:, 1])][::-1]
    sorted_idx   = (sorted_idx[:, 0]).astype(int)
    return sorted_idx

def check_and_recalc(l, w, t, h):
    if l < 0:
        l = 0
    if 1280 < l + w:
        w = 1280 - l
    if t < 0:
        t = 0
    if 720 < t + h:
        h = 720 - t
    return l, w, t, h

def check_add_data(df1, df2, helmet_id):
    # Width and height should be over 5 for DeepSort.
    if df1.width < 5 or df1.height < 5:
        return False
    if CFG["add_helmet_num"] <= df1.added:
        return False
    if CFG["thr_add_helmet"] <= df1.conf:
        return True
    if CFG["thr_continuous"] <= df2[df2.helmet==helmet_id].shape[0]:
        return True
    return False

def trace(df):
    df = df.copy()
    
    for v in tqdm(df.video.unique()):
        df_video = df[df.video==v].copy()
        thr_trace_iou = CFG['thr_strace_iou'] if -1 < v.find("Side") else CFG["thr_etrace_iou"]
        max_helmet_no = 0
        max_frame_no  = df_video.frame.max()
        for idx, f in enumerate(sorted(df_video.frame.unique())):
            df_curt_video   = df_video[df_video.frame==f].reset_index(drop=True)
            df_next_video   = df_video[df_video.frame==f+1].reset_index(drop=True)
            dict_next_video = df_next_video.T.to_dict()
            if max_frame_no < f+1:
                continue
            # Sort with IoU
            idxes = get_sorted_idx(df_curt_video, df_next_video)
            for helmet in idxes:
                l, r = df_curt_video.left[helmet], df_curt_video.right[helmet]
                t, b = df_curt_video.top[helmet],  df_curt_video.bottom[helmet]
                # Calcurate distance of target bbox and next frame bboxes. 
                curt_helmet  = np.array([[l, t, r, b]])
                next_helmets = np.array(df_next_video[["left","top","right","bottom"]])
                nearest_idx  = np.argmin(cdist(curt_helmet, next_helmets))
                # Calculate IoU of target bbox and the nearest next frame bbox.
                curt_helmet  = np.array([l, t, r, b])
                next_helmet  = np.array([next_helmets[nearest_idx]])
                iou = calc_iou(curt_helmet, next_helmet)
                
                # Successful to trace
                if thr_trace_iou <= iou and dict_next_video[nearest_idx]["helmet"] is None:
                    h  = df_curt_video.helmet[helmet]
                    a  = df_curt_video.added[helmet]
                    cv = df_curt_video.video_frame[helmet]
                    d  = dict_next_video[nearest_idx]
                    nv, nl, nt, nr, nb = d["video_frame"], d["left"], d["top"], d["right"], d["bottom"]
                    if h is None:
                        # New detected helmet
                        helmet_id = max_helmet_no
                        max_helmet_no += 1
                    else:
                        # Tracing helmet
                        helmet_id = h
                    if 0 < a:
                        # Re-detected helmet using new bbox added manually.
                        # If failed to re-detect helmet, bboxes added manually will be removed.
                        df_video.loc[df_video.helmet==helmet_id, "added"] = 0
                    if idx==0:
                        df_video.loc[(df_video.video_frame==cv)&(df_video.left==l)&(df_video.top==t)&(df_video.right==r)&(df_video.bottom==b),
                                     "helmet"] = helmet_id
                    df_video.loc[(df_video.video_frame==nv)&(df_video.left==nl)&(df_video.top==nt)&(df_video.right==nr)&(df_video.bottom==nb),
                                 ["helmet","x_move","y_move"]] = [helmet_id, nl-l, nt-t]
                    dict_next_video[nearest_idx]["helmet"] = helmet_id
                    
                # Failed to trace because of no helmet on this location.
                else:
                    # Add new bbox manually.
                    if df_curt_video.helmet[helmet] is not None:
                        df_add = df_curt_video.iloc[helmet,:].copy()
                        # Add movement amount of previous image
                        df_add["left"] += df_add.x_move
                        df_add["top"]  += df_add.y_move
                        df_add[["left","width","top","height"]] = check_and_recalc(df_add.left, df_add.width, df_add.top, df_add.height)
                        if check_add_data(df_add, df_video, df_curt_video.helmet[helmet]):
                            df_add["video_frame"] = v + "_" + str(f+1)
                            df_add["frame"]  = f+1
                            df_add["right"]  = df_add.left + df_add.width
                            df_add["bottom"] = df_add.top  + df_add.height
                            df_add["conf"]   = CFG["thr_min_conf"]+0.01
                            df_add["added"] += 1
                            df_video = df_video.append(df_add)
        df = df[df.video != v]
        df = df.append(df_video).reset_index(drop=True)
        
    df.reset_index(drop=True, inplace=True)
    return df

def delete_sideline_helmets(df, df_side):
    # Detect and delete sideline helmet.
    df = df.copy()
    
    df_sideline_deleted = pd.DataFrame()
    for v in tqdm(df[df.video.str.contains("Side")].video.unique()):
        df_video  = df[df.video==v].reset_index(drop=True)
        direction = df_side[df_side.video==v].y_move_total.values[0]

        olds      = None
        mean_move = 0
        for f in sorted(df_video.frame.unique()):
            df_this_frame = df_video[df_video.frame==f].copy()
            l_min = df_this_frame.left.min()
            r_max = df_this_frame.right.max()
            t_min = df_this_frame.top.min()
            b_max = df_this_frame.bottom.max()

            if olds is None:
                # Save previous max detected area.
                olds = np.array([l_min, r_max, t_min, b_max])
            else:
                # Compare with previous area.
                diff_move = np.array([l_min, r_max, t_min, b_max]) - olds
                diff_move = np.sum(np.abs(diff_move))
                if   10 <= f < 20:
                    mean_move += diff_move
                elif f == 20:
                    # Calculate threthold for detecting sideline helmets.
                    mean_move /= 10
                elif 20 < f and mean_move*3 < diff_move:
                    if 0 <= direction:
                        # Players move to upper side. Sideline helmets appear in top of image.
                        df_video.loc[(df_video.frame==f)&(df_video.bottom <= 120), "width"] = 0
                    else:
                        # Players move to under side. Sideline helmets appear in bottom of image.
                        df_video.loc[(df_video.frame==f)&(630 <= df_video.top), "width"] = 0

                    # Delete all of helmets that have the same ids
                    df_delete_helmet_ids = df_video[(df_video.frame==f)&
                                                    (df_video.width==0)&
                                                    (df_video.helmet.notnull())].helmet.unique()
                    if 0 < len(df_delete_helmet_ids):
                        df_video = df_video[~df_video.helmet.isin(df_delete_helmet_ids)].copy()
                    # Save new area without sideline helmets
                    df_this_frame = df_video[(df_video.frame==f)&(df_video.width!=0)].copy()
                    l_min = df_this_frame.left.min()
                    r_max = df_this_frame.right.max()
                    t_min = df_this_frame.top.min()
                    b_max = df_this_frame.bottom.max()
                # Save previous max detected area to use in next frame
                olds = np.array([l_min, r_max, t_min, b_max])

        df_sideline_deleted = df_sideline_deleted.append(df_video[df_video.width!=0])

    df_sideline_deleted = pd.concat([df[df.video.str.contains("End")], df_sideline_deleted])
    return df_sideline_deleted.reset_index(drop=True)

def get_delete_bbox(df):
    # Delete helmet if there is no helmets before and after target frame
    df = df.copy()
    df_delete_boxes = pd.DataFrame()
    for v in tqdm(df.video.unique()):
        df_video = df[(df.video==v)].copy()
        n_max    = df_video.frame.max() - 1
        for frame2 in range(2, n_max):
            frame1 = frame2 - 1
            frame3 = frame2 + 1
            df_this_frame = df_video[(df_video.frame.isin([frame1, frame2, frame3]))].copy()
            if df_this_frame.shape[0]==0:
                continue
            df_this_frame.loc[df_this_frame.frame==frame1, "conf"] = 0.7
            df_this_frame.loc[df_this_frame.frame==frame2, "conf"] = 0.5
            df_this_frame.loc[df_this_frame.frame==frame3, "conf"] = 1
            boxes  = [np.array(df_this_frame[["left","top","right","bottom"]]).tolist()]
            scores = [list(np.array(df_this_frame.conf))]
            labels = [list(np.array(df_this_frame.label))]
            boxes, scores, labels = nms(boxes, scores, labels, weights=None, iou_thr=CFG["thr_del_iou"])
            delete_boxes = boxes[np.where(scores==0.5)[0]]
            delete_boxes = pd.DataFrame(delete_boxes)
            delete_boxes["video_frame"] = v + "_" + str(frame2)
            df_delete_boxes = df_delete_boxes.append(delete_boxes)
    df_delete_boxes.columns = ["left","top","right","bottom","video_frame"]
    return df_delete_boxes.reset_index(drop=True)

Add bboxes manually

In [24]:
if not CFG["nosave"]:
    df_ensembled["video"]  = df_ensembled.video_frame.apply(lambda x: "_".join(x.split("_")[:3]))
    df_ensembled["frame"]  = df_ensembled.video_frame.apply(lambda x: x.split("_")[3]).astype(int)
    df_ensembled["label"]  = 1
    df_ensembled["right"]  = df_ensembled.left + df_ensembled.width
    df_ensembled["bottom"] = df_ensembled.top  + df_ensembled.height
    df_ensembled["x_move"] = 0
    df_ensembled["y_move"] = 0
    df_ensembled["helmet"] = None
    df_ensembled["added"]  = 0

In [25]:
if not CFG["nosave"]:
    df_ensembled = trace(df_ensembled)
    df_ensembled = df_ensembled[df_ensembled.added==0].reset_index(drop=True)
    print(df_ensembled.shape)

In [26]:
print(df_ensembled.shape)
df_ensembled.head(2)

(57201, 6)


Unnamed: 0,video_frame,left,width,top,height,conf
0,58102_002798_Sideline_1,402,18,418,19,0.488037
1,58102_002798_Sideline_1,373,21,430,23,0.748047


In [27]:
df_ensembled.to_csv("ensembled_results_with_post_trace.csv", index=False)

Delete sideline helmets

In [28]:
if not CFG["nosave"]:
    df_side = df_ensembled.copy()
    df_side_frame = df_side.groupby("video", as_index=False).frame.max()
    df_side_frame.columns = ["video","frame_last"]
    df_side_frame["frame_half"] = (df_side_frame.frame_last/2).astype(int)
    df_side_frame["frame_4_3"]  = (df_side_frame.frame_last - (df_side_frame.frame_half/2)).astype(int)
    df_side = df_side.merge(df_side_frame, on="video")

    df_direction = df_side[df_side.frame_half <= df_side.frame].groupby("video", as_index=False).agg({"x_move":"mean", "y_move":"mean"})
    df_direction.columns = ["video","x_move_total","y_move_total"]
    df_side = df_side_frame.merge(df_direction, on="video")
    
    df_ensembled = delete_sideline_helmets(df_ensembled, df_side)

In [29]:
print(df_ensembled.shape)
df_ensembled.head(2)

(57201, 6)


Unnamed: 0,video_frame,left,width,top,height,conf
0,58102_002798_Sideline_1,402,18,418,19,0.488037
1,58102_002798_Sideline_1,373,21,430,23,0.748047


In [30]:
df_ensembled.to_csv("ensembled_results_with_post_trace_del.csv", index=False)

Delete helmet if there is no helmets before and after target frame

In [31]:
if not CFG["nosave"]:
    df_ensembled["left"]   /= 1280
    df_ensembled["right"]  /= 1280
    df_ensembled["top"]    /=720
    df_ensembled["bottom"] /=720
    
    df_delete_boxes = get_delete_bbox(df_ensembled)
    df_delete_boxes["delete"] = 1
    print(df_delete_boxes.shape)
    
    df_deleted = df_ensembled.merge(df_delete_boxes, on=["left","top","right","bottom","video_frame"], how="left")
    df_deleted.loc[CFG['thr_del_conf'] <= df_deleted.conf, "delete"] = np.nan
    df_deleted = df_deleted[df_deleted.delete.isnull()].reset_index(drop=True)

In [32]:
if not CFG["nosave"]:
    # Do group by with helmet id and delete some BBoxes if the confidence is low.
    df_traced_helmet_cnt = df_deleted.groupby(["video","helmet"], as_index=False).video_frame.count()
    df_traced_helmet_max = df_deleted.groupby(["video","helmet"], as_index=False).conf.max()
    df_traced_helmet_cnt.columns = ["video","helmet","traced_cnt"]
    df_traced_helmet_max.columns = ["video","helmet","traced_conf_max"]
    df_deleted = df_deleted.merge(df_traced_helmet_cnt, on=["video","helmet"])
    df_deleted = df_deleted.merge(df_traced_helmet_max, on=["video","helmet"])
    df_deleted = df_deleted[(df_deleted.traced_cnt >= CFG["thr_del_trace"])|
                            (df_deleted.traced_conf_max >= CFG["thr_del_conf"])].reset_index(drop=True)
    print(df_deleted.shape)

In [33]:
if not CFG["nosave"]:
    # Run NMS again because some helmets are added manually.
    df_ensembled = pd.DataFrame()
    for v in tqdm(df_deleted.video_frame.unique()):
        df = df_deleted[df_deleted.video_frame==v].copy()
        boxes  = [np.array(df[["left","top","right","bottom"]]).tolist()]
        scores = [list(np.array(df.conf))]
        labels = [list(np.array(df.label))]
        boxes, scores, labels = nms(boxes, scores, labels, weights=None, iou_thr=CFG["thr_add_iou"])
        df_nms = pd.DataFrame(np.hstack([boxes, scores.reshape(-1,1)]))
        df_nms["video_frame"] = v
        df_ensembled = df_ensembled.append(df_nms)

In [34]:
if not CFG["nosave"]:
    df_ensembled.columns   = ["left","top","right","bottom","conf","video_frame"]
    df_ensembled = df_ensembled.merge(df_deleted, on=["left","top","right","bottom","conf","video_frame"])
    df_ensembled = df_ensembled[["video_frame","left","width","top","height","conf","video","frame","x_move","y_move","helmet"]]
    df_ensembled["left"]   = (df_ensembled.left *1280).astype(int)
    df_ensembled["top"]    = (df_ensembled.top   *720).astype(int)
    df_ensembled["right"]  = df_ensembled.left + df_ensembled.width
    df_ensembled["bottom"] = df_ensembled.top  + df_ensembled.height

In [35]:
print(df_ensembled.shape)
df_ensembled.head(2)

(57201, 6)


Unnamed: 0,video_frame,left,width,top,height,conf
0,58102_002798_Sideline_1,402,18,418,19,0.488037
1,58102_002798_Sideline_1,373,21,430,23,0.748047


In [36]:
df_ensembled.to_csv("ensembled_results_with_post_trace_del2.csv", index=False)

# Find labels

This and next sections are based on the amaizing notebooks below. Thanks for sharing!
- https://www.kaggle.com/its7171/nfl-baseline-simple-helmet-mapping
- https://www.kaggle.com/robikscube/helper-code-helmet-mapping-deepsort

Note: Each videos starts 10 frames before the snap. Tracking data is including events such as snap, so a snap event in tracking data equal frame 10 in video data. By starting from snap event, we can caluculate offsets from time of snap. For example, frame 10 in video data is offset 0, and frame 1 is minus offset time. Tracking data is 6 fps and video data is 59.94 fps (see next cell).
The function, find nearest, seeks the nearest tracking data by calculating the distance between video frame and point data in tracking data. 

In [37]:
#        ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||=1sec in video data
#        ^        ^
#      start     snap (offset 0)
#  |        |         |         |         |         |         |      =1sec+a in tracking data

In [38]:
def add_cols(df):
    df['game_play'] = df['video_frame'].str.split('_').str[:2].str.join('_')
    if 'video' not in df.columns:
        df['video'] = df['video_frame'].str.split('_').str[:3].str.join('_') + '.mp4'
    return df

def find_nearest(array, value):
    value = int(value)
    array = np.asarray(array).astype(int)
    idx   = (np.abs(array - value)).argmin()
    return array[idx]

def norm_arr(a):
    a = a-a.min()
    a = a/a.max()
    return a
    
def dist(a1, a2):
    return np.linalg.norm(a1-a2)

def dist_for_different_len(a1, a2):
    assert len(a1) >= len(a2), f'{len(a1)}, {len(a2)}'
    len_diff = len(a1) - len(a2)
    a2 = norm_arr(a2)
    if len_diff == 0:
        a1 = norm_arr(a1)
        return dist(a1,a2), ()
    else:
        min_dist = 10000
        min_detete_idx = None
        cnt = 0
        del_list = list(itertools.combinations(range(len(a1)),len_diff))
        if len(del_list) > CFG["max_iter"]:
            del_list  = random.sample(del_list, CFG["max_iter"])
        for detete_idx in del_list:
            this_a1   = np.delete(a1, detete_idx)
            this_a1   = norm_arr(this_a1)
            this_dist = dist(this_a1, a2)
            if min_dist > this_dist:
                min_dist = this_dist
                min_detete_idx = detete_idx
        return min_dist, min_detete_idx
        
def rotate_arr(u, t, deg=True):
    if deg == True:
        t = np.deg2rad(t)
    R = np.array([[np.cos(t), -np.sin(t)],
                  [np.sin(t),  np.cos(t)]])
    return np.dot(R, u)

def dist_rot(tracking_df, a2):
    tracking_df = tracking_df.sort_values('x')
    x = tracking_df['x']
    y = tracking_df['y']
    min_dist = 10000
    min_idx  = None
    min_x    = None
    for dig in range(-CFG["dig_max"], CFG["dig_max"]+1, CFG["dig_step"]):
        arr = rotate_arr(np.array((x,y)), dig)
        this_dist, this_idx = dist_for_different_len(np.sort(arr[0]), a2)
        if min_dist > this_dist:
            min_dist = this_dist
            min_idx  = this_idx
            min_x    = arr[0]
    tracking_df['x_rot'] = min_x
    player_arr = tracking_df.sort_values('x_rot')['player'].values
    players    = np.delete(player_arr, min_idx)
    return min_dist, players

def mapping_df(args):
    video_frame, df = args
    if df.shape[0] == 0:
        return pd.DataFrame([], columns=['video_frame','left','width','top','height','label'])
    gameKey, playID, view, frame = video_frame.split('_')
    gameKey = int(gameKey)
    playID  = int(playID)
    frame   = int(frame)
    this_tracking = tracking[(tracking['gameKey']==gameKey) & (tracking['playID']==playID)]
    est_frame     = find_nearest(this_tracking.est_frame.values, frame)
    this_tracking = this_tracking[this_tracking['est_frame']==est_frame]
    len_this_tracking = len(this_tracking)
    df['center_h_p']  = (df['left']+df['width']/2).astype(int)
    df['center_h_m']  = (df['left']+df['width']/2).astype(int)*-1
    if len(df) > len_this_tracking:
        df = df.tail(len_this_tracking)
    df_p = df.sort_values('center_h_p').copy()
    df_m = df.sort_values('center_h_m').copy()
    
    if view == 'Endzone':
        this_tracking['x'], this_tracking['y'] = this_tracking['y'].copy(), this_tracking['x'].copy()
    a2_p = df_p['center_h_p'].values
    a2_m = df_m['center_h_m'].values

    min_dist_p, min_detete_idx_p = dist_rot(this_tracking ,a2_p)
    min_dist_m, min_detete_idx_m = dist_rot(this_tracking ,a2_m)
    if min_dist_p < min_dist_m:
        min_dist = min_dist_p
        min_detete_idx = min_detete_idx_p
        tgt_df = df_p
    else:
        min_dist = min_dist_m
        min_detete_idx = min_detete_idx_m
        tgt_df = df_m
        
    tgt_df['label'] = min_detete_idx
    return tgt_df[['video_frame','left','width','top','height','label']]

In [39]:
helmets  = df_ensembled.sort_values("video_frame").reset_index(drop=True)
tracking = pd.read_csv(f'../input/nfl-health-and-safety-helmet-assignment/test_player_tracking.csv')
tracking = add_track_features(tracking)
labels   = pd.read_csv(f'../input/nfl-health-and-safety-helmet-assignment/train_labels.csv')

In [40]:
helmets = add_cols(helmets)
labels  = add_cols(labels)
tracking.shape, helmets.shape, labels.shape

((19269, 18), (57201, 8), (952087, 15))

In [41]:
if not CFG["nosave"]:
    p = Pool(processes=4)
    submission_df_list = []
    df_list = list(helmets.groupby('video_frame'))
    with tqdm(total=len(df_list)) as pbar:
        for this_df in p.imap(mapping_df, df_list):
            submission_df_list.append(this_df)
            pbar.update(1)
    p.close()

    submission_df = pd.concat(submission_df_list).reset_index(drop=True)
    submission_df.to_csv('submission-baseline.csv', index=False)

# Apply DeepSort

In [42]:
%%writefile deepsort.yaml

DEEPSORT:
  REID_CKPT: "../input/yolov5-deepsort-pytorch/ckpt.t7"
  MAX_DIST: 0.2
  MIN_CONFIDENCE: 0.3
  NMS_MAX_OVERLAP: 0.5
  MAX_IOU_DISTANCE: 0.9
  MAX_AGE: 15
  N_INIT: 1
  NN_BUDGET: 30

Writing deepsort.yaml


In [43]:
"""
Helper functions from yolov5 to plot deepsort labels.
"""

def compute_color_for_id(label):
    """
    Simple function that adds fixed color depending on the id
    """
    palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1)

    color = [int((p * (label ** 2 - label + 1)) % 255) for p in palette]
    return tuple(color)

def plot_one_box(x, im, color=None, label=None, line_thickness=3):
    # Plots one bounding box on image 'im' using OpenCV
    assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to plot_on_box() input image.'
    tl     = line_thickness or round(0.002 * (im.shape[0] + im.shape[1]) / 2) + 1  # line/font thickness
    color  = color or [random.randint(0, 255) for _ in range(3)]
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
    cv2.rectangle(im, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
    if label: 
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(im, c1, c2, color, -1, cv2.LINE_AA)  # filled
        cv2.putText(im, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
    return im

In [44]:
def deepsort_helmets(video_data,
                     video_dir,
                     deepsort_config='deepsort.yaml',
                     plot=False,
                     plot_frames=[]):
    
    # Setup Deepsort
    cfg = get_config()
    cfg.merge_from_file(deepsort_config)    
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)
    
    # Run through frames.
    video_data = video_data.sort_values('frame').reset_index(drop=True)
    ds = []
    for frame, d in tqdm(video_data.groupby(['frame']), total=video_data['frame'].nunique()):
        d['x'] = (d['left'] + round(d['width']  / 2))
        d['y'] = (d['top']  + round(d['height'] / 2))

        xywhs = d[['x','y','width','height']].values

        cap = cv2.VideoCapture(f'{video_dir}/{myvideo}.mp4')
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame-1) # optional
        success, image = cap.read()
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        confs   = np.ones([len(d),])
        clss    = np.zeros([len(d),])
        outputs = deepsort.update(xywhs, confs, clss, image)

        if (plot and frame > cfg.DEEPSORT.N_INIT) or (frame in plot_frames):
            for j, (output, conf) in enumerate(zip(outputs, confs)): 

                bboxes = output[0:4]
                id  = output[4]
                cls = output[5]

                c = int(cls)  # integer class
                label = f'{id}'
                color = compute_color_for_id(id)
                im  = plot_one_box(bboxes, image, label=label, color=color, line_thickness=2)
            fig, ax = plt.subplots(figsize=(15, 10))
            video_frame = d['video_frame'].values[0]
            ax.set_title(f'Deepsort labels: {video_frame}')
            plt.imshow(im)
            plt.show()

        preds_df = pd.DataFrame(outputs, columns=['left','top','right','bottom','deepsort_cluster','class'])
        if len(preds_df) > 0:
            # TODO Fix this messy merge
            d = pd.merge_asof(d.sort_values(['left','top']),
                              preds_df[['left','top','deepsort_cluster']] \
                              .sort_values(['left','top']), on='left', suffixes=('','_deepsort'),
                              direction='nearest')
        ds.append(d)
    dout = pd.concat(ds)
    return dout

def add_deepsort_label_col(out):
    # Find the top occuring label for each deepsort_cluster
    sortlabel_map = out.groupby('deepsort_cluster')['label'].value_counts() \
        .sort_values(ascending=False).to_frame() \
        .rename(columns={'label':'label_count'}) \
        .reset_index() \
        .groupby(['deepsort_cluster']) \
        .first()['label'].to_dict()
    # Find the # of times that label appears for the deepsort_cluster.
    sortlabelcount_map = out.groupby('deepsort_cluster')['label'].value_counts() \
        .sort_values(ascending=False).to_frame() \
        .rename(columns={'label':'label_count'}) \
        .reset_index() \
        .groupby(['deepsort_cluster']) \
        .first()['label_count'].to_dict()
    
    out['label_deepsort'] = out['deepsort_cluster'].map(sortlabel_map)
    out['label_count_deepsort'] = out['deepsort_cluster'].map(sortlabelcount_map)

    return out

def score_vs_deepsort(myvideo, out, labels):
    # Score the base predictions compared to the deepsort postprocessed predictions.
    myvideo_mp4  = myvideo + '.mp4'
    labels_video = labels.query('video == @myvideo_mp4')
    scorer       = NFLAssignmentScorer(labels_video)
    out_deduped  = out.groupby(['video_frame','label']).first().reset_index()
    base_video_score = scorer.score(out_deduped)
    
    out_preds = out.drop('label', axis=1).rename(columns={'label_deepsort':'label'})
    print(out_preds.shape)
    out_preds = out_preds.groupby(['video_frame','label']).first().reset_index()
    print(out_preds.shape)
    deepsort_video_score = scorer.score(out_preds)
    print(f'{base_video_score:0.5f} before --> {deepsort_video_score:0.5f} deepsort')

In [45]:
if not CFG["nosave"]:
    # Add video and frame columns to submission.
    submission_df['video'] = submission_df['video_frame'].str.split('_').str[:3].str.join('_')
    submission_df['frame'] = submission_df['video_frame'].str.split('_').str[-1].astype('int')

    # Loop through test videos and apply. If in debug mode show the score change.
    out_ds = []
    outs   = []
    for myvideo, video_data in submission_df.groupby('video'):
        print(f'==== {myvideo} ====')
        out = deepsort_helmets(video_data, CFG["input_path"])
        out_ds.append(out)
        out = add_deepsort_label_col(out)
        outs.append(out)
    submission_deepsort = pd.concat(outs).copy()

# Check submission file

In [46]:
ss = pd.read_csv('../input/nfl-health-and-safety-helmet-assignment/sample_submission.csv')
if not CFG["nosave"]:
    # Final Checks
    submission_deepsort['label_deepsort'] = submission_deepsort['label_deepsort'].fillna(submission_deepsort['label'])
    submission_deepsort = submission_deepsort.drop('label', axis=1).rename(columns={'label_deepsort':'label'})[ss.columns]
    # Drop duplicate labels
    submission_deepsort = submission_deepsort.loc[~submission_deepsort[['video_frame','label']].duplicated()]
    check_submission(submission_deepsort)
    submission_deepsort.to_csv('submission_deepsort.csv', index=False)
else:
    ss.to_csv('submission.csv', index=False)

# Re-IDs
In Deepsort algorithm, unfortunately some of BBoxes have been removed even if the BBoxes detect helmet correctly. In this section, I implemented Re-IDs function by using helmet-ids that generated postprocessing section.

In [47]:
def reid(traced, traced_max_num, traced_helmet_ids, dict_tracing):
    reids = []
    for v in tqdm(traced.video.unique()):
        df_tracing    = traced[traced.video==v].reset_index(drop=True)
        df_max_num    = traced_max_num[traced_max_num.video==v].reset_index(drop=True)
        df_helmet_ids = traced_helmet_ids[traced_helmet_ids.video==v].reset_index(drop=True)
        all_labels    = df_helmet_ids.label.unique()

        this_video_reids = []
        for f in sorted(df_tracing.frame.unique()):
            dict_all_labels = {}
            for l in all_labels:
                if l=="H0": continue
                dict_all_labels[l] = [0, [0]*4]
            df_this_frame = df_tracing[df_tracing.frame==f].reset_index(drop=True)
            # Target helmet-ids that appeared in current frame
            this_helmets  = df_this_frame.helmet
            this_helmets  = df_max_num[df_max_num.helmet.isin(this_helmets)].helmet
            skipped_helmets = []
            # Run two cycles.
            for _ in range(2):
                for h in this_helmets:
                    # The "cnts" is ordered by descending.
                    # Labels with the highest number of occurrences have priority.
                    # After the first loop, the remainder should be assigned.
                    labels, cnts = dict_tracing[v][h]
                    for l, c in zip(labels, cnts):
                        if l != "H0":
                            if dict_all_labels[l][0] < c:
                                if dict_all_labels[l][0] != 0:
                                    skipped_helmets.append(dict_all_labels[l][1][2])
                                dict_all_labels[l][0] = c
                                dict_all_labels[l][1] = [v, f, h, l]
                                break
                            if dict_all_labels[l][0] == c:
                                # Some of helmet-ids have the same number of different labels.
                                # If that is happened, those will be processed on the next.
                                skipped_helmets.append(h)
                                if 0 < dict_all_labels[l][1][1]:
                                    skipped_helmets.append(dict_all_labels[l][1][2])
                                dict_all_labels[l][1] = [0]*4
                                break
                    else:
                        skipped_helmets.append(h)

                if len(this_helmets) != len(skipped_helmets):
                    this_helmets    = skipped_helmets
                    skipped_helmets = []

            # Successful to do re-id
            this_video_reids += [item for _, item in list(dict_all_labels.values()) if item[1] != 0]
            # the other helmet that need to be assigned will go on next.
            not_detected_helmets = [key for key, (cnt, _) in dict_all_labels.items() if cnt == 0]
            if len(not_detected_helmets) == 0:
                continue

            if 0 < len(this_helmets):
                # Calculate the nearest BBox
                this_helmets   = list(set(this_helmets))
                df_prev_traced = pd.DataFrame(this_video_reids, columns=["video","frame","helmet","label"])
                df_curt_traced = df_tracing[(df_tracing.frame==f)&(df_tracing.helmet.isin(this_helmets))][["helmet","left","top"]]

                assigned_helmets = []
                for l in not_detected_helmets:
                    df_prev_frame  = df_prev_traced[(df_prev_traced.label==l)]
                    if df_prev_frame.shape[0] == 0:
                        continue
                    prev_frame     = np.array(df_prev_frame.frame)[-1]
                    prev_helmet    = np.array(df_prev_frame.helmet)[-1]
                    prev_location  = np.array(df_tracing[(df_tracing.frame==prev_frame)&(df_tracing.helmet==prev_helmet)][["left","top"]])
                    curt_location  = np.array(df_curt_traced[~df_curt_traced.helmet.isin(assigned_helmets)])
                    nearest_helmet = curt_location[np.argmin(cdist(curt_location[:,1:], prev_location)), 0]
                    this_video_reids += [[v, f, nearest_helmet, l]]
                    assigned_helmets.append(nearest_helmet)
                    if len(assigned_helmets) == len(this_helmets):
                        break
        reids += this_video_reids
    
    df_re_ids = pd.DataFrame(reids, columns=["video","frame","helmet","label"])
    return df_re_ids

In [48]:
if not CFG["nosave"]:
    traced = df_ensembled.merge(submission_deepsort, on=["video_frame","left","width","top","height"], how="left")
    traced.loc[traced.label.isnull(), "label"] = "H0"
    print(traced.shape)

In [49]:
if not CFG["nosave"]:
    traced_helmet_ids = traced.groupby(["video","helmet","label"], as_index=False).video_frame.count()
    traced_helmet_ids.columns = ["video","helmet","label","cnt"]
    traced_helmet_ids = traced_helmet_ids.sort_values(["video","helmet","cnt"], ascending=[True,True,False])
    traced_max_num = traced_helmet_ids.groupby(["video","helmet"], as_index=False).cnt.max()
    traced_max_num = traced_helmet_ids.merge(traced_max_num, on=["video","helmet","cnt"])
    traced_max_num = traced_max_num.sort_values("cnt", ascending=False)

    dict_tracing = {}
    for v in traced_helmet_ids.video.unique():
        df_video = traced_helmet_ids[traced_helmet_ids.video==v].reset_index(drop=True)
        dict = {}
        for h in df_video.helmet.unique():
            this_helmet = df_video[df_video.helmet==h]
            dict[h] = [list(this_helmet.label), list(this_helmet.cnt)]
        dict_tracing[v] = dict    

In [50]:
if not CFG["nosave"]:
    df_re_ids = reid(traced, traced_max_num, traced_helmet_ids, dict_tracing)
    df_re_ids = df_re_ids.merge(traced[["video","frame","helmet","left","width","top","height"]],
                                on=["video","frame","helmet"])
    df_re_ids["video_frame"] = df_re_ids["video"] + "_" + df_re_ids["frame"].astype(str)
    df_re_ids = df_re_ids[["video_frame","label","left","width","top","height"]]

    df_re_ids.to_csv('submission.csv', index=False)

In [51]:
!rm -rf ./inference_cnn
!rm -rf ./inference
!rm -rf ./yolov5