# Import

In [1]:
import sys
# sys.path.insert(0, "timm-efficientdet-pytorch")
sys.path.insert(0, "../efficientdet-pytorch-master")
sys.path.insert(0, "../omegaconf")

import torch
import os
from datetime import datetime
import time
import random
import cv2
import pandas as pd
import numpy as np
import albumentations as A
import matplotlib.pyplot as plt
from albumentations.pytorch.transforms import ToTensorV2
from sklearn.model_selection import StratifiedKFold
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from glob import glob
import pandas as pd
from effdet import get_efficientdet_config, EfficientDet, DetBenchTrain, DetBenchPredict
from effdet.efficientdet import HeadNet
from tqdm import tqdm
from IPython.core.debugger import set_trace
import warnings
from scipy.optimize import linear_sum_assignment

warnings.filterwarnings("ignore")

SEED = 42

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(SEED)

IMG_H = 1024
IMG_W = 1024
gpu_id = 'cuda:1'
device = torch.device(gpu_id)
# DETECTION_THRESHOLD = 0.4
# DETECTOR_FILTERING_THRESHOLD = 0.3

# Load data

In [2]:
video_labels = pd.read_csv('/home/thinh/nfl/train_labels.csv').fillna(0)
video_labels = video_labels[video_labels['frame'] != 0].reset_index(drop=True)
video_labels['video_name'] = video_labels['video'].apply(lambda x: "_".join(x.split("_")[:2]))
video_labels['image_name'] = video_labels['video'].str.replace('.mp4', '') + '_' + video_labels['frame'].astype(str) + '.png'

video_valid = ['57583_000082', '57586_004152', '57911_000147', '57997_003691', '57680_002206', '58095_004022', '57906_000718', '58005_001254', '57679_003316', '58103_003494', '57998_002181', '58048_000086']
images_valid = video_labels[ video_labels.video_name.isin(video_valid)].image_name.unique()

# Dataset

In [3]:
def get_valid_transforms():
    return A.Compose([
            A.Resize(height=IMG_H, width=IMG_W, p=1.0),
            ToTensorV2(p=1.0),
        ], p=1.0)

In [4]:
TRAIN_ROOT_PATH = 'train_images'

class DatasetRetriever(Dataset):
    def __init__(self, image_ids, transforms=None):
        super().__init__()
        self.image_ids = image_ids
        self.transforms = transforms

    def __getitem__(self, index: int):
        image_id = self.image_ids[index]
        image = cv2.imread(f'/home/thinh/nfl/{TRAIN_ROOT_PATH}/{image_id}', cv2.IMREAD_COLOR).copy().astype(np.float32)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        if self.transforms:
            sample = {'image': image}
            sample = self.transforms(**sample)
            image = sample['image']
        return image, image_id

    def __len__(self) -> int:
        return self.image_ids.shape[0]

# Model

In [5]:
def load_net(checkpoint_path):
    config = get_efficientdet_config('tf_efficientdet_d6')

    # config.num_classes = 2
    config.image_size = [IMG_H, IMG_W]
    config.norm_kwargs=dict(eps=.001, momentum=.01)

    net = EfficientDet(config, pretrained_backbone=False)
    checkpoint = torch.load(checkpoint_path, map_location=gpu_id)
    
    net.reset_head(num_classes=2)
    net.class_net = HeadNet(config, num_outputs=config.num_classes)
    net.load_state_dict(checkpoint['model_state_dict'])
    
    net = DetBenchPredict(net)
    net.eval()
    return net.to(device)

In [6]:
dataset = DatasetRetriever(
    image_ids=images_valid,
    transforms=get_valid_transforms()
)

def collate_fn(batch):
    return tuple(zip(*batch))

data_loader = DataLoader(
    dataset,
    batch_size=16,
    shuffle=False,
    num_workers=4,
    drop_last=False,
    collate_fn=collate_fn
)

In [7]:
def make_predictions(net, images):
    images = torch.stack(images).to(device).float()
    box_list = []
    score_list = []
    with torch.no_grad():
        
        target_res = {}
        target_res['img_scale'] = torch.tensor([1]*images.shape[0]).float().to(device)
        target_res['img_size'] = torch.tensor(images.shape[2:]).repeat(images.shape[0], 1).to(device)
        
        det = net(images, target_res)        
        
        for i in range(images.shape[0]):
            boxes = det[i].detach().cpu().numpy()[:,:4]    
            scores = det[i].detach().cpu().numpy()[:,4]   
            label = det[i].detach().cpu().numpy()[:,5]
            # useing only label = 2
            indexes = np.where(label == 2)[0]
#             indexes = np.where((scores > score_threshold) & (label == 2))[0]
#             boxes[:, 2] = boxes[:, 2] + boxes[:, 0]
#             boxes[:, 3] = boxes[:, 3] + boxes[:, 1]
            box_list.append(boxes[indexes])
            score_list.append(scores[indexes])
    return box_list, score_list


# Predict

In [8]:
def predict(model_name):
    net = load_net(f'/home/thinh/nfl/effdet5-models/tito-1024/{model_name}.bin')
    
    
    result_image_ids = []
    results_boxes = []
    results_scores = []
    for images, image_ids in tqdm(data_loader):
        box_list, score_list = make_predictions(net, images)
        for i, image in enumerate(images):
            boxes = box_list[i]
            scores = score_list[i]
            image_id = image_ids[i]
            boxes[:, 0] = (boxes[:, 0] * 1280 / IMG_W)
            boxes[:, 1] = (boxes[:, 1] * 720 / IMG_H)
            boxes[:, 2] = (boxes[:, 2] * 1280 / IMG_W)
            boxes[:, 3] = (boxes[:, 3] * 720 / IMG_H)
            boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
            boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
            boxes = boxes.astype(np.int32)
            boxes[:, 0] = boxes[:, 0].clip(min=0, max=1280-1)
            boxes[:, 2] = boxes[:, 2].clip(min=0, max=1280-1)
            boxes[:, 1] = boxes[:, 1].clip(min=0, max=720-1)
            boxes[:, 3] = boxes[:, 3].clip(min=0, max=720-1)

    #         zero_rows = np.where(boxes[:, 2:] == 0)[0]
    #         boxes = np.delete(boxes, zero_rows, axis=0)
    #         scores = np.delete(scores, zero_rows, axis=0)

    #         if boxes.shape[0] >= 2:
            result_image_ids += [image_id]*len(boxes)
            results_boxes.append(boxes)
            results_scores.append(scores)
            
            
    box_df = pd.DataFrame(np.concatenate(results_boxes), columns=['left', 'top', 'width', 'height'])
    test_df = pd.DataFrame({'score':np.concatenate(results_scores), 'image_name':result_image_ids})
    test_df = pd.concat([test_df, box_df], axis=1)
    # test_df = test_df[test_df.scores > DETECTOR_FILTERING_THRESHOLD]
    print(test_df.shape)
    
    
    #gameKey,playID,view,video,frame,left,width,top,height
    #57590,3607,Endzone,57590_003607_Endzone.mp4,1,1,1,1,1
    test_df['gameKey'] = test_df.image_name.str.split('_').str[0].astype(int)
    test_df['playID'] = test_df.image_name.str.split('_').str[1].astype(int)
    test_df['view'] = test_df.image_name.str.split('_').str[2]
    test_df['frame'] = test_df.image_name.str.split('_').str[3].str.replace('.png','').astype(int)
    test_df['video'] = test_df.image_name.str.rsplit('_',1).str[0] + '.mp4'
    test_df = test_df[["gameKey","playID","view","video","frame","left","width","top","height","score"]]
#     test_df

    test_df.to_csv(f'/home/thinh/nfl/effdet5-models/tito-1024/{model_name}.csv', index=False)

In [9]:
model_name = 'tito-checkpoint-D6-1024-A1-epoch030-fold0-gcp'
predict(model_name)

100%|██████████| 653/653 [35:36<00:00,  3.27s/it]


(230648, 6)


In [10]:
# for file in glob("/home/thinh/nfl/effdet5-models/tito-512/tito-checkpoint-512-deim-epoch*"):
#     model_name = os.path.basename(file).split(".")[0]
#     print(model_name)
#     predict(model_name)