In [1]:
import numpy as np
import pandas as pd
from scipy.optimize import linear_sum_assignment
import ast

IMG_H = 512
IMG_W = 512

In [32]:
def iou(bbox1, bbox2):
    bbox1 = [float(x) for x in bbox1]
    bbox2 = [float(x) for x in bbox2]

    (x0_1, y0_1, x1_1, y1_1) = bbox1
    (x0_2, y0_2, x1_2, y1_2) = bbox2

    # get the overlap rectangle
    overlap_x0 = max(x0_1, x0_2)
    overlap_y0 = max(y0_1, y0_2)
    overlap_x1 = min(x1_1, x1_2)
    overlap_y1 = min(y1_1, y1_2)

    # check if there is an overlap
    if overlap_x1 - overlap_x0 <= 0 or overlap_y1 - overlap_y0 <= 0:
            return 0

    # if yes, calculate the ratio of the overlap to each ROI size and the unified size
    size_1 = (x1_1 - x0_1) * (y1_1 - y0_1)
    size_2 = (x1_2 - x0_2) * (y1_2 - y0_2)
    size_intersection = (overlap_x1 - overlap_x0) * (overlap_y1 - overlap_y0)
    size_union = size_1 + size_2 - size_intersection

    return size_intersection / size_union

def precision_calc(gt_boxes, pred_boxes):
    cost_matix = np.ones((len(gt_boxes), len(pred_boxes)))
    for i, box1 in enumerate(gt_boxes):
        for j, box2 in enumerate(pred_boxes):
            dist = abs(box1[0]-box2[0])
            if dist > 4:
                continue
            iou_score = iou(box1[1:], box2[1:])

            if iou_score < 0.35:
                continue
            else:
                cost_matix[i,j]=0

    row_ind, col_ind = linear_sum_assignment(cost_matix)
    fn = len(gt_boxes) - row_ind.shape[0]
    fp = len(pred_boxes) - col_ind.shape[0]
    tp=0
    for i, j in zip(row_ind, col_ind):
        if cost_matix[i,j]==0:
            tp+=1
        else:
            fp+=1
            fn+=1
    return tp, fp, fn

def f1_calc(val_data, impact_images):
#     with open('val_data.txt', 'w') as writer:
#         writer.write(str(val_data))
    
    data_dict = {}
    for key, value in val_data.items():
        video = "_".join(key.split(".")[0].split("_")[:-1])
        frame = int(key.split(".")[0].split("_")[-1])
        
        if video not in data_dict:
            data_dict[video] = ([], [])
        
        gt_boxes, pred_boxes = [], []
        
        if key in impact_images:
            for ori_box in value[0]:
                box = ori_box.copy()
                temp = box[0]
                box[0] = box[1]
                box[1] = temp
                temp = box[2]
                box[2] = box[3]
                box[3] = temp
                
                box[0] = box[0] * 1280 / IMG_W
                box[1] = box[1] * 720 / IMG_H
                box[2] = box[2] * 1280 / IMG_W
                box[3] = box[3] * 720 / IMG_H
                gt_boxes.append([frame] + box)
            
        for ori_box in value[1]:
            box = ori_box.copy()
            box[0] = box[0] * 1280 / IMG_W
            box[1] = box[1] * 720 / IMG_H
            box[2] = box[2] * 1280 / IMG_W
            box[3] = box[3] * 720 / IMG_H
            pred_boxes.append([frame] + box)
            
        data_dict[video][0].extend(gt_boxes)
        data_dict[video][1].extend(pred_boxes)
    
    data_dict = post_processing(data_dict)
    ftp, ffp, ffn = [], [], []
    
    for video, data in data_dict.items():
        gt_boxes = data[0]
        pred_boxes = data[1]
        tp, fp, fn = precision_calc(gt_boxes, pred_boxes)
        ftp.append(tp)
        ffp.append(fp)
        ffn.append(fn)

    tp = np.sum(ftp)
    fp = np.sum(ffp)
    fn = np.sum(ffn)
    precision = tp / (tp + fp + 1e-6)
    recall =  tp / (tp + fn +1e-6)
    f1_score = 2*(precision*recall)/(precision+recall+1e-6)
    
    print(f'TP: {tp}, FP: {fp}, FN: {fn}, PRECISION: {precision:.4f}, RECALL: {recall:.4f}, F1 SCORE: {f1_score}')
    return f1_score

In [35]:
def post_processing(data_dict):
    new_data_dict = {}

    for video, (gt_boxes, pred_boxes) in data_dict.items():
        video_name = "_".join(video.split("_")[:2])
        view_name = video.split("_")[2]
        if view_name == "Endzone":
            view = "Sideline"
        else:
            view = 'Endzone'
        
        video_view_name = f'{video_name}_{view}'
        
        remove_boxes = []
        for box_index, box1 in enumerate(pred_boxes):
            count = 0
            for box2 in data_dict[video_view_name][1]:
                if abs(box1[0] - box2[0]) <= 4:
                    count += 1
                    
            if count == 0:
                remove_boxes.append(box_index)
        
        new_pred_boxes = []
        for box_index, box in enumerate(pred_boxes):
            if box_index not in remove_boxes:
                new_pred_boxes.append(box)
                
        new_data_dict[video] = (gt_boxes, new_pred_boxes)
        
    return new_data_dict

In [4]:
video_labels = pd.read_csv('/home/thinh/nfl/train_labels.csv').fillna(0)
# video_labels = video_labels[video_labels['impact'] > 0]
video_labels['image_name'] = video_labels['video'].str.replace('.mp4', '') + '_' + video_labels['frame'].astype(str) + '.png'
video_labels = video_labels[video_labels.groupby('image_name')['impact'].transform("sum") > 0].reset_index(drop=True)

video_labels.head()

Unnamed: 0,gameKey,playID,view,video,frame,label,left,width,top,height,impact,impactType,confidence,visibility,image_name
0,57583,82,Endzone,57583_000082_Endzone.mp4,38,V87,960,20,356,15,0.0,0,0.0,0.0,57583_000082_Endzone_38.png
1,57583,82,Endzone,57583_000082_Endzone.mp4,38,V86,871,20,352,17,0.0,0,0.0,0.0,57583_000082_Endzone_38.png
2,57583,82,Endzone,57583_000082_Endzone.mp4,38,V74,766,22,319,16,0.0,0,0.0,0.0,57583_000082_Endzone_38.png
3,57583,82,Endzone,57583_000082_Endzone.mp4,38,V34,560,24,463,23,0.0,0,0.0,0.0,57583_000082_Endzone_38.png
4,57583,82,Endzone,57583_000082_Endzone.mp4,38,H97,407,21,312,28,0.0,0,0.0,0.0,57583_000082_Endzone_38.png


In [5]:
len(video_labels['image_name'].unique())

1472

In [37]:
with open('val_data_epoch2.txt') as file:
    line = " ".join(file.readlines())
    line = line.replace('array(', '')
    line = line.replace(', dtype=float32)', '')
    line = line.replace(', shape=(0, 4)', '')
    val_data = ast.literal_eval(line)
#         print(line)

In [43]:
f1_calc(val_data, video_labels['image_name'].unique())

TP: 6, FP: 39, FN: 156, PRECISION: 0.1333, RECALL: 0.0370, F1 SCORE: 0.057970673669991035


0.057970673669991035

In [None]:
# TP: 128, FP: 3273, FN: 321, PRECISION: 0.0376, RECALL: 0.2851, F1 SCORE: 0.0664933004148733
# 0.0664933004148733

# TP: 128, FP: 3273, FN: 321, PRECISION: 0.0376, RECALL: 0.2851, F1 SCORE: 0.0664933004148733
# 0.0664933004148733