In [27]:
import numpy as np
import pandas as pd
from IPython.display import HTML
from pandarallel import pandarallel
import scipy.stats as stats
pandarallel.initialize()

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)


INFO: Pandarallel will run on 8 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.


In [2]:
path_shared = '~/Downloads/nfl-big-data-bowl-2021/{}'

games_df = pd.read_csv(path_shared.format('games.csv'))
plays_df = pd.read_csv(path_shared.format('plays.csv'))
players_df = pd.read_csv(path_shared.format('players.csv'))
track_df = pd.read_csv(path_shared.format('week1_norm.csv'))

In [3]:
params = lambda: None # create an empty object to add params
params.a_max = 8
params.v_max = 9
params.reax_t = params.v_max/params.a_max
params.avg_ball_speed = 19.5
params.tti_sigma = 0.45
vars(params)

{'a_max': 8,
 'v_max': 9,
 'reax_t': 1.125,
 'avg_ball_speed': 19.5,
 'tti_sigma': 0.45}

In [4]:
# game_id = 2018122314
# play_id = 4239
game_id = 2018090905
play_id = 2062
# game_id, play_id = random.choice(plays)

play_df = track_df[(track_df.playId == play_id) & (track_df.gameId == game_id)].sort_values(by = 'frameId')
play_df.head()

Unnamed: 0,gameId,playId,frameId,event,nflId,displayName,jerseyNumber,position,team,team_pos,teamAbbr,route,time,los,x,y,dis,o,s,s_dir,s_dir_rad,v_x,v_y,v_theta,v_mag,a_old,a_x,a_y,a_theta,a_mag
317421,2018090905,2062,1,,496735.0,Kareem Jackson,25.0,CB,away,DEF,HOU,,2018-09-09T18:28:11.700Z,40,54.51,31.08,0.08,260.44,0.83,108.4,1.89,0.79,-0.26,-0.32,0.83,0.38,0.0,0.0,0.0,0.0
317434,2018090905,2062,1,,0.0,Football,,,football,FTBL,FTBL,,2018-09-09T18:28:11.700Z,40,40.18,29.52,0.0,,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
317433,2018090905,2062,1,,2558094.0,Zach Cunningham,41.0,ILB,away,DEF,HOU,,2018-09-09T18:28:11.700Z,40,44.36,32.36,0.05,263.88,0.49,262.25,4.58,-0.49,-0.07,0.14,0.49,0.72,0.0,0.0,0.0,0.0
317432,2018090905,2062,1,,2552490.0,Benardrick McKinney,55.0,ILB,away,DEF,HOU,,2018-09-09T18:28:11.700Z,40,41.88,23.53,0.1,220.61,0.95,298.75,5.21,-0.83,0.46,-0.5,0.95,0.03,0.0,0.0,0.0,0.0
317430,2018090905,2062,1,,2552261.0,Kevin Johnson,30.0,CB,away,DEF,HOU,,2018-09-09T18:28:11.700Z,40,48.4,17.56,0.05,135.6,0.5,27.09,0.47,0.23,0.45,1.1,0.5,0.46,0.0,0.0,0.0,0.0


In [5]:
import time
from scipy.spatial import distance

def get_L_given_T(play_frame_group):
    frame_df = play_frame_group.loc[(play_df.nflId!=0)]
    ball_start = frame_df.loc[frame_df.position=='QB', ['x', 'y']].iloc[0].round(0)
    frame_df = frame_df.loc[frame_df.position!='QB']
    pocket_width = 10

    x = np.linspace(0.5, 119.5, 120)
    y = np.linspace(-0.5, 53.5, 55)
    y[0] = -0.2
    field_locs = np.stack(np.meshgrid(x, y)).reshape(2, -1).T  # (F, 2)
    
    t = np.linspace(0.1, 4, 40)
    receivers_df = frame_df.loc[(frame_df.team_pos == 'OFF') &
                                 ((frame_df.x > frame_df.los) |
                                  (frame_df.y > ball_start[1] + pocket_width // 2) |
                                  (frame_df.y < ball_start[1] - pocket_width // 2)),
                                 ['x', 'y', 'v_x', 'v_y', 'v_theta', 'v_mag', 'los', 'a_x', 'a_y']]
    dist_from_ball_np = np.linalg.norm((receivers_df.x - ball_start[0],
                                                     receivers_df.y - ball_start[1]), axis=0)
    # find the spot the qb would aim at, leading the receiver in their current dir by the ball time
    rec_x_np = receivers_df.x.to_numpy()[:,None]
    rec_y_np = receivers_df.y.to_numpy()[:,None]
    rec_v_x_np = receivers_df.v_x.to_numpy()[:,None]
    rec_v_y_np = receivers_df.v_y.to_numpy()[:,None]
    rec_a_x_np = receivers_df.a_x.to_numpy()[:,None]
    rec_a_y_np = receivers_df.a_y.to_numpy()[:,None]
    rec_v_theta_np = receivers_df.v_theta.to_numpy()[:,None]
    target_x = rec_x_np+rec_v_x_np*t+0.5*rec_a_x_np*t**2  # (R, T)
    target_y = rec_y_np+rec_v_y_np*t+0.5*rec_a_y_np*t**2  # (R, T)
    # receivers_df['target_x'] = receivers_df.x+receivers_df.v_x*receivers_df.dist_from_ball/params.avg_ball_speed
    # receivers_df['target_y'] = receivers_df.y+receivers_df.v_y*receivers_df.dist_from_ball/params.avg_ball_speed

    target_rec_locs = np.dstack((target_x, target_y))  # (R, T, 2)
    dist_infl = np.minimum(10, 4+(dist_from_ball_np**2)/54)
    speed_infl = (receivers_df.v_mag.to_numpy()/11.3)**2
    # (F, R, T, 2) showing the vec diff btwn Fth spot on field and Rth rec target spot
    reach_vecs = field_locs[:,None,None,:] - target_rec_locs
    # TODO uncomment below for likely speedup
    # reach_vecs[np.linalg.norm(reach_vecs, axis=-1) > 20] = 0
    Sigma_no_rot = np.array([[dist_infl * (1 + speed_infl), np.zeros_like(dist_infl)],
                             [np.zeros_like(dist_infl), dist_infl * (1 - speed_infl)]]).transpose(2, 0, 1)  # (R, 2, 2)
    rot = np.array([[np.cos(rec_v_theta_np), -np.sin(rec_v_theta_np)],
                    [np.sin(rec_v_theta_np), np.cos(rec_v_theta_np)]])[..., 0].transpose(2, 0, 1)  # (R, 2, 2)
    Sigma = np.einsum('...ji,...jk,...kl', rot, Sigma_no_rot, rot)  # (R, 2, 2)
    
    # (F, R, T) vectorized calc of reach_vec.T @ Sigma.inv @ reach_vec
    gauss_top = np.einsum('...i,...ij,...j', reach_vecs.transpose(0, 2, 1, 3), np.linalg.inv(Sigma), reach_vecs.transpose(0, 2, 1, 3)).transpose(0, 2, 1)
    # assumption that cov mtx is const over time
    gauss_bottom = np.broadcast_to(2*np.pi*np.sqrt(np.linalg.det(Sigma))[:,None], (len(rec_x_np), len(t)))  # (R, T)  bivariate Gaussian normalizer
    # (F, R, T). (i, j, k) element is probability that spot i on field is thrown to given receiver j is targeted on a throw taking k seconds
    gauss_pdf = np.exp(-gauss_top/2) / gauss_bottom
    
    # assumption: each rec has uniform prob of being targeted. can use heuristic based on separation later
    target_prob = gauss_pdf.sum(axis=1) / gauss_pdf.sum(axis=(0, 1))  # (F, T)
    # 90%ile is a guess at this point. can tune it or use a heuristic of each rec getting a 50 sq yd window
    #cutoff = np.percentile(target_prob, 90)
    #target_prob[target_prob < cutoff] = 0
#     target_prob_mesh = target_prob.reshape(len(y), len(x), len(t))
#     field_df = pd.DataFrame({
#         'ball_start_x': ball_start[0],
#         'ball_start_y': ball_start[1],
#         'ball_end_x': field_locs[:,0],
#         'ball_end_y': field_locs[:,1],
#         'cp_off': target_prob.mean(axis=1)
#     })
    return target_prob

Below is kinda pseudocodey.

In [35]:
# inputs
T_given_ts = pd.read_csv('T_given_ts.csv')
T_given_Ls = np.load('T_given_Ls.npy') # (100, F, T) ; Rishav's output
tenT = np.linspace(1, 40, 40)

def get_L_T_given_t(play_frame_group):
    frame_id = play_frame_group.name[2]
    if frame_id < 21:
        return
    
    _, alpha, theta = T_given_ts.loc[T_given_ts.t == frame_id].iloc[0]
    T_given_t = stats.gamma.pdf(tenT, a=alpha, loc=0, scale=theta)
    
    frame_df = play_frame_group.loc[(play_df.nflId!=0)]
    ball_start = frame_df.loc[frame_df.position=='QB', ['x', 'y']].iloc[0].round(0)
    T_given_L = T_given_Ls[int(ball_start[0])]  # (F, T)
    
    L_given_T = get_L_given_T(play_frame_group)  # (F, T)
    L_given_t = (L_given_T * T_given_t).sum(axis=1, keepdims=True)  # (F, 1) ; P(L|t) = \sum_x P(L|T=x)P(T=x|t)
    
    L_T_given_t = L_given_t * T_given_L  # (F, T)
#     field_df = pd.DataFrame({
#         'ball_start_x': ball_start[0],
#         'ball_start_y': ball_start[1],
#         'ball_end_x': field_locs[:,0],
#         'ball_end_y': field_locs[:,1],
#         'cp_off': target_prob.mean(axis=1)
#     })
    return L_T_given_t

In [36]:
%%time
field_dfs = play_df.loc[play_df.frameId <= play_df.loc[play_df.event=='pass_forward'].frameId.iloc[0]].groupby(['gameId', 'playId', 'frameId']).apply(get_L_T_given_t)
# field_dfs = field_dfs.reset_index(3, drop=True).reset_index()
field_dfs

CPU times: user 1.86 s, sys: 382 ms, total: 2.24 s
Wall time: 2.34 s


gameId      playId  frameId
2018090905  2062    1                                                       None
                    2                                                       None
                    3                                                       None
                    4                                                       None
                    5                                                       None
                    6                                                       None
                    7                                                       None
                    8                                                       None
                    9                                                       None
                    10                                                      None
                    11                                                      None
                    12                                                      None


In [8]:
from visualize import AnimatePlay
animated_play = AnimatePlay(play_df, 20, field_dfs)
HTML(animated_play.ani.to_jshtml())

NameError: name 'field_dfs' is not defined