In [1]:
import sys
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')
sys.path.append('../input/pytorch-optimizers/')
sys.path.append('../input/weightedboxfusion/')

In [2]:
import os
import gc
import cv2
import copy
import time
import yaml
import random
import shutil
import warnings
import subprocess
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from glob import glob
from tqdm import tqdm
from PIL import Image, ImageDraw
from shutil import copyfile
from IPython.core.display import Video, display
from sklearn.model_selection import train_test_split, KFold, GroupKFold, StratifiedKFold
from ensemble_boxes import nms

import timm
import torch
import torch.nn.functional as F
import torchvision
from torch import nn
from torch.cuda.amp import autocast, GradScaler
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from torch.nn.modules.loss import _WeightedLoss
from torchvision import models
from torchvision import transforms
from torch_optimizer.radam import RAdam

from albumentations.pytorch import ToTensorV2
from albumentations import (
    HorizontalFlip, VerticalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90,
    Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue,
    IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, IAAPiecewiseAffine, RandomResizedCrop,
    IAASharpen, IAAEmboss, RandomBrightnessContrast, Flip, OneOf, Compose, Normalize, Cutout, CoarseDropout,
    ShiftScaleRotate, CenterCrop, Resize, Rotate, RandomRotate90, RGBShift, ChannelShuffle)

warnings.simplefilter('ignore')
pd.set_option("max_columns", 150)
pd.set_option('display.max_rows', 150)

### Configuration

In [3]:
CFG = {
    "seed"        : 42,
    "make_dataset": False,
    "save_zip"    : True,
    "train_model" : False,
    'device'      : 'cuda:0',
    "input_img"   : '../input/nfl-health-and-safety-helmet-assignment/images/',
    "label_img"   : '../input/nfl-health-and-safety-helmet-assignment/image_labels.csv',
    "input_video" : '../input/nfl-health-and-safety-helmet-assignment/train/',
    "label_video" : '../input/nfl-health-and-safety-helmet-assignment/train_labels.csv',
    "output_path" : './train/',
    "model"       : "tf_efficientnetv2_s_in21k",
    "size"        : 128,
    "height"      : 4,
    "width"       : 4,
    "batch_size"  : 256,
    "epochs"      : 5,
    "lr"          : 0.001,
    'weight_decay': 1e-4,
    "accum_iter"  : 1,
    'early_stopping': 10,
    'verbose_step'  : 1,
    "num_workers"   : 4
}

CFG

{'seed': 42,
 'make_dataset': False,
 'save_zip': True,
 'train_model': False,
 'device': 'cuda:0',
 'input_img': '../input/nfl-health-and-safety-helmet-assignment/images/',
 'label_img': '../input/nfl-health-and-safety-helmet-assignment/image_labels.csv',
 'input_video': '../input/nfl-health-and-safety-helmet-assignment/train/',
 'label_video': '../input/nfl-health-and-safety-helmet-assignment/train_labels.csv',
 'output_path': './train/',
 'model': 'tf_efficientnetv2_s_in21k',
 'size': 128,
 'height': 4,
 'width': 4,
 'batch_size': 256,
 'epochs': 5,
 'lr': 0.001,
 'weight_decay': 0.0001,
 'accum_iter': 1,
 'early_stopping': 10,
 'verbose_step': 1,
 'num_workers': 4}

In [4]:
def seed_everything(seed = 42):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    
seed_everything(CFG["seed"])

In [5]:
def get_img(path):
    im_bgr = cv2.imread(path)
    im_rgb = im_bgr[:, :, ::-1]
    return im_rgb

# Load data and make training data

In [6]:
# Load video label csv file
df_video_images = pd.read_csv(CFG['label_video'])
df_video_images["path"] = CFG["output_path"] + df_video_images.video.str.replace(".mp4","") + "/" + df_video_images.video_frame + ".jpg"
df_video_images = df_video_images[df_video_images.isSidelinePlayer==False].reset_index(drop=True)

print(df_video_images.shape, df_video_images.path.nunique())
df_video_images.head(2)

(946783, 15) 52142


Unnamed: 0,video_frame,gameKey,playID,view,video,frame,label,left,width,top,height,impactType,isDefinitiveImpact,isSidelinePlayer,path
0,57583_000082_Endzone_1,57583,82,Endzone,57583_000082_Endzone.mp4,1,H90,245,21,327,29,,False,False,./train/57583_000082_Endzone/57583_000082_Endz...
1,57583_000082_Endzone_1,57583,82,Endzone,57583_000082_Endzone.mp4,1,V79,310,21,345,12,,False,False,./train/57583_000082_Endzone/57583_000082_Endz...


In [7]:
!rm -rf {CFG["output_path"]}

for f in tqdm(os.listdir(CFG['input_video'])):
    in_path      = CFG["input_video"] + f
    out_img_path = CFG["output_path"] + f[:-4]
    image_name   = f[:-4]
    extention    = "jpg"
    # Make save directories
    os.makedirs(out_img_path, exist_ok=True)
    # Split into frames
    cmd = 'ffmpeg -i \"{}\" -qscale:v 2 \"{}/{}_%d.{}\"'.format(in_path, out_img_path, image_name, extention)
    subprocess.call(cmd, shell=True)

100%|██████████| 120/120 [06:44<00:00,  3.37s/it]


In [8]:
target_imgs = glob(CFG["output_path"] + "/*/*.jpg")
df_video_images = df_video_images[df_video_images.path.isin(target_imgs)]
df_video_images = df_video_images[df_video_images.frame % 4 == 0].reset_index(drop=True)
df_video_images = df_video_images[["path","left","width","top","height"]]

print(df_video_images.shape, df_video_images.path.nunique())
df_video_images.head(2)

(235803, 5) 12991


Unnamed: 0,path,left,width,top,height
0,./train/57583_000082_Endzone/57583_000082_Endz...,153,29,318,31
1,./train/57583_000082_Endzone/57583_000082_Endz...,197,23,315,21


In [9]:
# Load image level csv file
df_images = pd.read_csv(CFG['label_img'])
df_images = df_images[df_images.label != "Helmet-Sideline"].reset_index(drop=True)
df_images["path"] = CFG['input_img'] + df_images.image

paths = df_images.path.unique()
random.shuffle(paths)
df_images = df_images[df_images.path.isin(paths[:int(len(paths)/2)])]
df_images = df_images[["path","left","width","top","height"]]

print(df_images.shape, df_images.path.nunique())
df_images.head(2)

(89502, 5) 4965


Unnamed: 0,path,left,width,top,height
0,../input/nfl-health-and-safety-helmet-assignme...,1099,16,456,15
1,../input/nfl-health-and-safety-helmet-assignme...,1117,15,478,16


In [10]:
df_all_images = pd.concat([df_video_images, df_images]).reset_index(drop=True)
df_all_images["right"]    = df_all_images.left + df_all_images.width
df_all_images["bottom"]   = df_all_images.top  + df_all_images.height
df_all_images["x_center"] = df_all_images.left + (df_all_images.width  / 2).astype(int)
df_all_images["y_center"] = df_all_images.top  + (df_all_images.height / 2).astype(int)
df_all_images["label"]    = "Helmet"
df_all_images["label_id"] = 0

print(df_all_images.shape, df_all_images.path.nunique())
df_all_images.head()

(325305, 11) 17956


Unnamed: 0,path,left,width,top,height,right,bottom,x_center,y_center,label,label_id
0,./train/57583_000082_Endzone/57583_000082_Endz...,153,29,318,31,182,349,167,333,Helmet,0
1,./train/57583_000082_Endzone/57583_000082_Endz...,197,23,315,21,220,336,208,325,Helmet,0
2,./train/57583_000082_Endzone/57583_000082_Endz...,359,15,0,19,374,19,366,9,Helmet,0
3,./train/57583_000082_Endzone/57583_000082_Endz...,440,23,296,29,463,325,451,310,Helmet,0
4,./train/57583_000082_Endzone/57583_000082_Endz...,467,23,294,22,490,316,478,305,Helmet,0


In [11]:
def check(l, r, t, b):
    if l < 0 or 1280 < r:
        return False
    if t < 0 or 720 < b:
        return False
    return True

if CFG["make_dataset"]:
    train_bboxes = []
    for f in tqdm(df_all_images.path.unique()):
        df  = df_all_images[df_all_images.path==f]
        df  = df.sample(int(df.shape[0]/4)).reset_index(drop=True)
        for i, (l, w, t, h) in df[["left","width","top","height"]].iterrows():
            r = l + w
            b = t + h
            # positive data
            if check(l, r, t, b):
                train_bboxes.append([f, l, r, t, b, 1])
            # negative data
            t_down,  b_down  = t + h, b + h
            #t_up,    b_up    = t - h, b - h
            l_left,  r_left  = l - int(w/2), r - int(w/2)
            l_right, r_right = l + int(w/2), r + int(w/2)
            if check(l, r, t_down, b_down):
                train_bboxes.append([f, l, r, t_down, b_down, 0])
            #if check(l, r, t_up,   b_up):
            #    train_bboxes.append([f, l, r, t_up,   b_up,   0])
            if check(l_left,  r_left,  t, b):
                train_bboxes.append([f, l_left,  r_left,  t, b, 0])
            if check(l_right, r_right, t, b):
                train_bboxes.append([f, l_right, r_right, t, b, 0])

In [12]:
if CFG["make_dataset"]:
    df_train_bbox = pd.DataFrame(train_bboxes, columns=["path","left","right","top","bottom","label"])
    df_train_bbox["conf"] = 0.5
    df_train_bbox.loc[df_train_bbox.label==1, "conf"] = 1
    df_train_bbox[["left","right"]] /= 1280
    df_train_bbox[["top","bottom"]] /= 720

    print(df_train_bbox.shape)
    df_train_bbox.head()

In [13]:
weights = None
iou_thr = 0.3

if CFG["make_dataset"]:
    df_train_bbox_removed = pd.DataFrame()
    for p in tqdm(df_train_bbox.path.unique()):
        df = df_train_bbox[df_train_bbox.path==p].copy()
        boxes  = [np.array(df[["left","top","right","bottom"]]).tolist()]
        scores = [list(np.array(df.conf))]
        labels = [list(np.ones(df.shape[0]))]
        boxes, scores, labels = nms(boxes, scores, labels, weights=weights, iou_thr=iou_thr)
        df_nms = pd.DataFrame(np.hstack([boxes, scores.reshape(-1,1)]))
        df_nms["path"] = p
        df_train_bbox_removed = df_train_bbox_removed.append(df_nms)

In [14]:
if CFG["make_dataset"]:
    df_train = df_train_bbox_removed.reset_index(drop=True)
    df_train.columns = ["left","top","right","bottom","conf","path"]
    df_train[["left","right"]] = (df_train[["left","right"]] * 1280).astype(int)
    df_train[["top","bottom"]] = (df_train[["top","bottom"]] *  720).astype(int)
    df_train["width"]  = df_train.right  - df_train.left
    df_train["height"] = df_train.bottom - df_train.top
    df_train = df_train[["path","left","right","width","top","bottom","height","conf"]]
    df_train["label"]  = 1
    df_train.loc[df_train.conf==0.5, "label"] = 0

    df_train.right  = df_train.left + df_train.width  + (df_train.width /2*(CFG["width"]  -1)).astype(int)
    df_train.bottom = df_train.top  + df_train.height + (df_train.height/2*(CFG["height"] -1)).astype(int)
    df_train.left   = df_train.left - (df_train.width /2*(CFG["width"]  -1)).astype(int)
    df_train.top    = df_train.top  - (df_train.height/2*(CFG["height"] -1)).astype(int)

    df_train = df_train[(0<=df_train.left)&(df_train.right<=1280)&(0<=df_train.top)&(df_train.bottom<=720)]
    df_train.to_csv("train.csv", index=False)
else:
    df_train = pd.read_csv("../input/nfl-helmet-assignment-cnn-models/train.csv")
    df_train.to_csv("train.csv", index=False)

In [15]:
print(df_train.shape)
df_train.head(2)

(143023, 9)


Unnamed: 0,path,left,right,width,top,bottom,height,conf,label
0,./train/57583_000082_Endzone/57583_000082_Endz...,163,254,23,284,367,21,1.0,1
1,./train/57583_000082_Endzone/57583_000082_Endz...,468,556,22,197,309,28,1.0,1


In [16]:
if CFG["save_zip"]:
    
    df_train = df_train.sort_values("path").reset_index(drop=True)
    
    !rm -rf ./label0
    !rm -rf ./label1    
    os.makedirs("./label0", exist_ok=True)
    os.makedirs("./label1", exist_ok=True)
    
    p_old = None
    for (p, l, r, w, t, b, h, c, label) in tqdm(np.array(df_train)):
        if os.path.isfile(p):
            filename = os.path.basename(p)
            if p_old != p:
                img   = get_img(p)
                p_old = p
            cut_img = img[t:b, l:r]
            cv2.imwrite(f"./label{label}/{filename}", cut_img)

    shutil.make_archive("label0", 'zip', root_dir="./label0")
    shutil.make_archive("label1", 'zip', root_dir="./label1")
    !rm -rf ./label0
    !rm -rf ./label1

100%|██████████| 143023/143023 [05:50<00:00, 408.36it/s]


# Define model functions

In [17]:
class Model(nn.Module):
    def __init__(self, model_name, pretrained=True):
        super(Model, self).__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained, in_chans=3)
        self.n_features = self.model.classifier.in_features
        self.model.classifier = nn.Linear(self.n_features, 1)

    def forward(self, x):
        output = self.model(x)
        return output

In [18]:
class NFLDataset(Dataset):
    def __init__(self, df, transforms=None, output_label=True):
        super().__init__()
        self.df = df.reset_index(drop=True).copy()
        self.transforms   = transforms
        self.output_label = output_label
        
    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, index: int):
        left   = self.df.loc[index].left
        right  = self.df.loc[index].right
        top    = self.df.loc[index].top
        bottom = self.df.loc[index].bottom
        img    = get_img(self.df.loc[index].path)[top:bottom, left:right]
        if self.transforms:
            img = self.transforms(image=img)['image']
        if self.output_label:
            return img, torch.from_numpy(np.array(self.df.loc[index].label))
        return img

In [19]:
def get_train_transforms():
    return Compose([
            Resize(CFG['size'], CFG['size']),
            HorizontalFlip(p=0.5),
            VerticalFlip(p=0.5),
            RandomRotate90(p=0.5),
            MotionBlur(p=0.5),
            HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5),
            RGBShift(p=0.5),
            ChannelShuffle(p=0.5),
            RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
            ToTensorV2(p=1.0)], p=1)
  
def get_valid_transforms():
    return Compose([
            Resize(CFG['size'], CFG['size']),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
            ToTensorV2(p=1.0)], p=1)

In [20]:
def prepare_dataloader(train, valid):
    train_ds = NFLDataset(train, transforms=get_train_transforms(), output_label=True)
    valid_ds = NFLDataset(valid, transforms=get_valid_transforms(), output_label=True)
    train_loader = torch.utils.data.DataLoader(train_ds,
                                               batch_size=CFG['batch_size'],
                                               drop_last=False,
                                               pin_memory=False,
                                               shuffle=True,
                                               num_workers=CFG['num_workers'])
    val_loader = torch.utils.data.DataLoader(valid_ds,
                                             batch_size=CFG['batch_size'],
                                             pin_memory=False,
                                             shuffle=False,
                                             num_workers=CFG['num_workers'])
    return train_loader, val_loader

In [21]:
def train_one_epoch(epoch, model, loss_fn, optimizer, train_loader, device, scheduler=None, schd_batch_update=False):
    model.train()

    t = time.time()
    running_loss = None

    pbar = tqdm(enumerate(train_loader), total=len(train_loader))
    for step, (imgs, labels) in pbar:
        imgs = imgs.to(device).float()
        image_labels = labels.reshape(-1,1).to(device).float()

        with autocast():
            image_preds = model(imgs)
            loss = loss_fn(image_preds, image_labels)
            scaler.scale(loss).backward()

            if running_loss is None:
                running_loss = loss.item()
            else:
                running_loss = running_loss * .99 + loss.item() * .01

            if ((step + 1) %  CFG['accum_iter'] == 0) or ((step + 1) == len(train_loader)):
                # may unscale_ here if desired (e.g., to allow clipping unscaled gradients)
                scaler.step(optimizer)
                scaler.update()
                optimizer.zero_grad() 
                if scheduler is not None and schd_batch_update:
                    scheduler.step()

            if ((step + 1) % CFG['verbose_step'] == 0) or ((step + 1) == len(train_loader)):
                description = f'epoch {epoch} loss: {running_loss:.4f}'
                pbar.set_description(description)
                
    if scheduler is not None and not schd_batch_update:
        scheduler.step()
        
def valid_one_epoch(epoch, model, loss_fn, val_loader, device, scheduler=None, schd_loss_update=False):
    model.eval()

    t = time.time()
    loss_sum   = 0
    sample_num = 0
    image_preds_all   = []
    image_targets_all = []
    
    pbar = tqdm(enumerate(val_loader), total=len(val_loader))
    for step, (imgs, labels) in pbar:
        imgs = imgs.to(device)
        image_labels = labels.reshape(-1,1).to(device).float()
        
        image_preds = model(imgs)
        image_preds_all   += [np.where(image_preds.detach().cpu().numpy()>0, 1, 0).reshape(1,-1)[0]]
        image_targets_all += [np.array(labels)]
        
        loss = loss_fn(image_preds, image_labels)
        loss_sum   += loss.item()*image_labels.shape[0]
        sample_num += image_labels.shape[0]  

        if ((step + 1) % CFG['verbose_step'] == 0) or ((step + 1) == len(val_loader)):
            description = f'epoch {epoch} loss: {loss_sum/sample_num:.4f}'
            pbar.set_description(description)
            
    image_preds_all   = np.concatenate(image_preds_all)
    image_targets_all = np.concatenate(image_targets_all)
    acc = (image_preds_all==image_targets_all).mean()
    print('validation multi-class accuracy = {:.4f}'.format(acc))
    
    if scheduler is not None:
        if schd_loss_update:
            scheduler.step(loss_sum/sample_num)
        else:
            scheduler.step()
    return acc

# Run training

In [22]:
print(df_train.shape)
print(df_train.label.value_counts())

df_train_label_0 = df_train[df_train.label==0].reset_index(drop=True)
df_train_label_1 = df_train[df_train.label==1].reset_index(drop=True)
df_train_label_0_sampled = df_train_label_0.sample(df_train_label_1.shape[0])

df_train = pd.concat([df_train_label_0_sampled, df_train_label_1]).reset_index(drop=True)

print(df_train.shape)
df_train.head(2)

(143023, 9)
0    71516
1    71507
Name: label, dtype: int64
(143014, 9)


Unnamed: 0,path,left,right,width,top,bottom,height,conf,label
0,./train/58048_000086_Sideline/58048_000086_Sid...,544,603,15,241,305,16,0.5,0
1,../input/nfl-health-and-safety-helmet-assignme...,846,897,13,81,169,22,0.5,0


In [23]:
if CFG["train_model"]:
    train, valid = train_test_split(df_train, test_size=0.3, random_state=CFG["seed"])
    train = train.reset_index(drop=True)
    valid = valid.reset_index(drop=True)
    print(train.shape, valid.shape)
    train_loader, val_loader = prepare_dataloader(train, valid)

    not_improved_cnt = 0
    best_acc = 0
    device   = torch.device(CFG['device'])
    model    = Model(CFG["model"])
    model.to(device)
    scaler    = GradScaler()
    optimizer = RAdam(model.parameters(), lr=CFG['lr'], weight_decay=CFG['weight_decay'])
    scheduler = None
    loss      = nn.BCEWithLogitsLoss().to(device)

    for epoch in range(CFG['epochs']):
        train_one_epoch(epoch, model, loss, optimizer, train_loader, device, scheduler=scheduler, schd_batch_update=False)

        with torch.no_grad():
            acc = valid_one_epoch(epoch, model, loss, val_loader, device, scheduler=None, schd_loss_update=False)

        if best_acc < acc:
            print('Best model will be saved to output path after completing this fold')
            best_model = copy.deepcopy(model)
            best_acc   = acc
            not_improved_cnt = 0
        elif CFG['early_stopping'] == not_improved_cnt:
            print("Met early stopping.")
            break
        else:
            not_improved_cnt += 1  

    torch.save(best_model.state_dict(), f'helmet_cnn.pt')

    del model, optimizer, train_loader, val_loader, scaler
    torch.cuda.empty_cache()

In [24]:
!rm -rf ./train