In [3]:
import numpy as np
import pandas as pd

from pandarallel import pandarallel
pandarallel.initialize(nb_workers=3)

from IPython.display import HTML

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)


INFO: Pandarallel will run on 4 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


In [4]:
path_shared = '../data/{}'

games_df = pd.read_csv(path_shared.format('games.csv'))
plays_df = pd.read_csv(path_shared.format('plays.csv'))
players_df = pd.read_csv(path_shared.format('players.csv'))
track_df = pd.read_csv(path_shared.format('week1_norm.csv', usecols=['nflId', 'displayName', 'position', 'team_pos', 'x', 'y', 'v_x', 'v_y', 'v_mag', 'v_theta', 'a_x', 'a_y', 'a_mag', 'a_theta']))

In [5]:
params = lambda: None # create an empty object to add params
params.a_max = 6
params.s_max = 9
# params.reax_t = params.s_max/params.a_max
params.reax_t = 0.2
params.tti_sigma = 0.45
params.cell_length = 1
params.alpha = 1.05
vars(params)

{'a_max': 6,
 's_max': 9,
 'reax_t': 0.2,
 'tti_sigma': 0.45,
 'cell_length': 1,
 'alpha': 1.05}

In [6]:
# deep lockett vs chiefs (16)
# game_id = 2018122314
# play_id = 4239

# deep dissly vs bears (2)
# game_id = 2018091700
# play_id = 3936

# deep middle insane gronk (1) 
game_id = 2018090905
play_id = 2062

# game_id = 2018091700
# play_id = 1959

# detroit red zone vs jets (1)
game_id = 2018091000
play_id = 3016

# mullens to kittle, def by wagner (13)
# game_id = 2018120213
# play_id = 398																									

# mullens to bobby, 98yd pick6 (13)
# game_id = 2018120213
# play_id = 3952	

# mahomes to tyreek, KJ defense (16)
# game_id = 2018122314
# play_id = 73																									

# game_id, play_id = random.choice(plays)

play_df = track_df[(track_df.playId == play_id) & (track_df.gameId == game_id)].sort_values(by = 'frameId')
play_df.head()

Unnamed: 0,gameId,playId,frameId,event,nflId,displayName,jerseyNumber,position,position_general,team,team_pos,teamAbbr,route,time,los,x,y,dis,o,s,s_dir,s_dir_rad,v_x,v_y,v_theta,v_mag,a_old,a_x,a_y,a_theta,a_mag
889671,2018091000,3016,1,,79860.0,Matthew Stafford,9.0,QB,QB,home,OFF,DET,,2018-09-11T01:46:06.900Z,82.88,78.56,30.01,0.0,83.25,0.0,99.57,1.74,0.0,-0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
889685,2018091000,3016,1,,0.0,Football,,,,football,FTBL,FTBL,,2018-09-11T01:46:06.900Z,82.88,83.43,29.86,0.0,,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
889684,2018091000,3016,1,,2561366.0,Frankie Luvu,50.0,LB,LB,away,DEF,NYJ,,2018-09-11T01:46:06.900Z,82.88,84.46,27.04,0.02,290.89,0.09,231.11,4.03,-0.07,-0.06,0.68,0.09,0.02,0.0,0.0,0.0,0.0
889683,2018091000,3016,1,,2558052.0,Kenny Golladay,19.0,WR,WR,home,OFF,DET,IN,2018-09-11T01:46:06.900Z,82.88,82.86,11.83,0.0,81.11,0.0,0.43,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
889682,2018091000,3016,1,,2557979.0,Jamal Adams,33.0,SS,S,away,DEF,NYJ,,2018-09-11T01:46:06.900Z,82.88,87.96,35.89,0.04,236.49,0.17,188.67,3.29,-0.03,-0.17,1.42,0.17,0.14,0.0,0.0,0.0,0.0


In [7]:
dt = np.float32

In [8]:
# input: tracking frame
# output: (all, off, def, no) int probability of each pass (F, T)
def comp_prob(play_frame_group):
    # remove ball and qb from df
    frame_df = play_frame_group.loc[(play_df.nflId!=0)]
    ball_start = frame_df.loc[frame_df.position=='QB'][['x', 'y']].iloc[0].round().to_numpy(dtype=dt)
    frame_df = frame_df.loc[frame_df.position!='QB']
    
    # project motion by reaction time
    frame_df['v_x_r'] = frame_df.a_x*params.reax_t+frame_df.v_x
    frame_df['v_y_r'] = frame_df.a_y*params.reax_t+frame_df.v_y
    frame_df['v_r_mag'] = np.linalg.norm(np.array([frame_df.v_x_r, frame_df.v_y_r]), axis=0)
    frame_df['v_r_theta'] = np.arctan(frame_df.v_y_r/frame_df.v_x_r).fillna(0)
    frame_df['x_r'] = frame_df.x + frame_df.v_x*params.reax_t + 0.5*frame_df.a_x*params.reax_t**2
    frame_df['y_r'] = frame_df.y + frame_df.v_y*params.reax_t + 0.5*frame_df.a_y*params.reax_t**2

    # generate pass arrays
    x = np.linspace(0.5, 119.5, 120, dtype=dt)
    y = np.linspace(-0.5, 53.5, 55, dtype=dt)
    y[0] = -0.2
    xx, yy = np.meshgrid(x, y)
    field_locs = np.stack((xx, yy)).reshape(2, -1).T  # (F, 2)
    T = np.linspace(0.1, 4, 40, dtype=dt) # (T,)

    player_teams = frame_df['team_pos'].to_numpy() # J,
    player_ids = frame_df['nflId'].to_numpy()
    reaction_player_locs = frame_df[['x_r', 'y_r']].to_numpy(dtype=dt) # (J, 2)
    reaction_player_vels = frame_df[['v_x_r', 'v_y_r']].to_numpy(dtype=dt) #(J,2)

    # intercept vector between each player and field location
    int_d_vec = field_locs[:, None, :] - reaction_player_locs #F, J, 2
    int_d_mag = np.linalg.norm(int_d_vec, axis=2) # F, J
    #projecting player velocity on d_vec to get initial speed along d_vec
    int_s0 = np.clip(np.sum(int_d_vec*reaction_player_vels, axis=2)/int_d_mag, -params.s_max, params.s_max) #F, J,

    # calculate time to int based on phys model
    t_lt_smax = (params.s_max-int_s0)/params.a_max  #F, J,
    d_lt_smax = t_lt_smax*((int_s0+params.s_max)/2) #F, J,
    d_at_smax = int_d_mag - d_lt_smax               #F, J,
    t_at_smax = d_at_smax/params.s_max              #F, J,
    t_tot = t_lt_smax+t_at_smax+params.reax_t       #F, J,

    # int success if T-t_tot = dT <  0. Put through sigmoid to add temporal uncertainty around 
    int_dT = T[None,:,None] - t_tot[:,None,:]         #F, T, J
    p_int = (1/(1. + np.exp( -np.pi/np.sqrt(3.0)/params.tti_sigma * int_dT, dtype=dt) )) #F, T, J
    # use p_int as memoized values for integration

    # trajectory integration
    g = 10.72468 #y/s/s
    reach_vecs = ball_start - field_locs  # (F, 2)
    dx = -reach_vecs[:, 0] #F
    dy = -reach_vecs[:, 1] #F
    vx = dx[:, None]/T[None, :]   #F, T
    vy = dy[:, None]/T[None, :]   #F, T
    vz_0 = (T*g)/2                #T

    # note that idx (i, j, k) into below arrays is invalid when j < k
    traj_ts = np.tile(T, (len(field_locs), len(T), 1)) #(F, T, T)
    traj_locs_x_idx = np.rint(np.clip((ball_start[0]+vx[:, :, None]*T), 0, len(x)-1)).astype(int) # F, T, T
    traj_locs_y_idx = np.rint(np.clip((ball_start[1]+vy[:, :, None]*T), 0, len(y)-1)).astype(int) # F, T, T
    traj_locs_z = 2.0+vz_0[None, :, None]*traj_ts-0.5*g*traj_ts*traj_ts #F, T, T
    path_idxs = np.ravel_multi_index(np.stack((traj_locs_y_idx, traj_locs_x_idx)).reshape(2, -1), xx.shape)  # (F*T*T,)
    traj_t_idxs = np.rint(10*traj_ts - 1).flatten().astype(int)  # (F, T, T)
    p_int_traj = p_int[path_idxs, traj_t_idxs] # F*T*T, J
    p_int_traj = p_int_traj.reshape((*traj_locs_x_idx.shape, len(reaction_player_locs)))  # F, T, T, J
    
    # account for ball height on traj and normalize each locations int probability
    lambda_z = np.where((traj_locs_z<3)&(traj_locs_z>0), 1, 0) #F, T, T # maybe change this to a normal distribution 
    p_int_traj = p_int_traj * lambda_z[:, :, :, None]
    norm_factor = np.maximum(1., p_int_traj.sum(axis=-1))  #F, T, T
    p_int_traj_norm = (p_int_traj/norm_factor[..., None])  #F, T, T, J

    # independent int probs at each point on trajectory
    all_p_int_traj = np.sum(p_int_traj_norm, axis=-1)  # F, T, T
    off_p_int_traj = np.sum(p_int_traj_norm, axis=-1, where=(player_teams=='OFF'))
    def_p_int_traj = np.sum(p_int_traj_norm, axis=-1, where=(player_teams=='DEF'))
    ind_p_int_traj = p_int_traj_norm #use for analyzing specific players

    # calc decaying residual probs after you take away p_int on earlier times in the traj 
    compl_all_p_int_traj = 1-all_p_int_traj  # F, T, T
    remaining_compl_p_int_traj = np.cumprod(compl_all_p_int_traj, axis=-1)  # F, T, T
    # maximum 0 because if it goes negative the pass has been caught by then and theres no residual probability
    shift_compl_cumsum = np.roll(remaining_compl_p_int_traj, 1, axis=-1)  # F, T, T
    shift_compl_cumsum[:, :, 0] = 1
    
    # multiply residual prob by p_int at that location
    all_completion_prob_dt = shift_compl_cumsum * all_p_int_traj  # F, T, T
    off_completion_prob_dt = shift_compl_cumsum * off_p_int_traj  # F, T, T
    def_completion_prob_dt = shift_compl_cumsum * def_p_int_traj  # F, T, T
    
    # now accumulate values over total traj for each team and take at T=t
    all_completion_prob = np.cumsum(all_completion_prob_dt, axis=-1)  # F, T, T
    off_completion_prob = np.cumsum(off_completion_prob_dt, axis=-1)  # F, T, T
    def_completion_prob = np.cumsum(def_completion_prob_dt, axis=-1)  # F, T, T

        #     #### Toy example
#         all_p_int_traj = [0, 0, 0.1, 0.2, 0.8, 0.8]
#         c_all_p_int_traj=[1, 1, 0.9, 0.8, 0.2, 0.2]
#         rem_compl_p_int_traj = [1, 1, 0.9, 0.72, 0.144, 0.0288]
#         0.1 + 0.9*0.2 + 0.72 * 0.8 + 0.144*0.8 = 0.9712
#         adjust_compl_prob =        [0, 0, 0.1, 0.28, 0.84, 0.84]


    # this einsum takes the diagonal values over the last two axes where T = t
    # this takes care of the t > T issue.
    all_p_int_pass = np.einsum('ijj->ij', all_completion_prob)  # F, T
    off_p_int_pass = np.einsum('ijj->ij', off_completion_prob)  # F, T
    def_p_int_pass = np.einsum('ijj->ij', def_completion_prob)  # F, T
    no_p_int_pass = 1-all_p_int_pass

    assert np.allclose(all_p_int_pass, off_p_int_pass + def_p_int_pass, atol=0.01)
    return (off_p_int_pass, def_p_int_pass)

    # below gets cutoff for combined model
    # this is only for viz (in reality want to keep F, T above and mult by value/transition prob)
    field_p_int_all = all_p_int_pass.mean(axis=1)  # F,
    field_p_int_off = off_p_int_pass.mean(axis=1)  # F, 
    field_p_int_def = def_p_int_pass.mean(axis=1)  # F,
    field_p_no_int = 1-field_p_int_all
    assert np.allclose(field_p_int_all, field_p_int_off + field_p_int_def, atol=0.01)
    assert np.all(field_p_int_all <= 1.01) and np.all(field_p_int_all >= -0.01)
    assert np.all(field_p_int_off <= 1.01) and np.all(field_p_int_off >= -0.01)
    assert np.all(field_p_int_def <= 1.01) and np.all(field_p_int_def >= -0.01)

    field_df = pd.DataFrame({
        'ball_start_x': ball_start[0],
        'ball_start_y': ball_start[1], 
        'ball_end_x': field_locs[:,0],
        'ball_end_y': field_locs[:,1],
        'p_mass_1': (((field_p_int_off-field_p_int_def)+1.)/2.).round(3),
        'p_mass_2': field_p_no_int.round(3),
        # 'p_mass_players': p_int_norm,
    })

    return field_df

In [9]:
T = np.linspace(0.1, 4, 40)
L_given_ts = np.load('in/L_given_t.npy')
T_given_Ls_df = pd.read_pickle('in/T_given_L.pkl')
x = np.linspace(0.5, 119.5, 120, dtype=dt)
y = np.linspace(-0.5, 53.5, 55, dtype=dt)
y[0] = -0.2
xx, yy = np.meshgrid(x, y)
field_locs = np.stack((xx, yy)).reshape(2, -1).T  # (F, 2)
# from L_given_t in historical notebook
x_min, x_max = -9, 70
y_min, y_max = -39, 40

# input: qb_loc (2,), t=frames_after_snap (int)
# output: (P(L,T)|t) int probability of each pass (F, T)
def hist_trans_prob(ball_start, t):
    # frame_df = play_frame_group.loc[(play_df.nflId!=0)]
    # ball_start = frame_df.loc[frame_df.position=='QB', ['x', 'y']].iloc[0].round(0).to_numpy(dtype=int)
    reach_vecs = np.rint(field_locs - ball_start).astype(int)  # (F, 2)
    reach_dist = np.rint(np.linalg.norm(reach_vecs, axis=1)).astype(int)  # (F,)
    # t = play_frame_group.name[2] - 11  # adjust to frames after snap
    
    """ P(L|t) """
    # mask for zeroing out parts of the field that are too far to be thrown to per the L_given_t model
    L_t_mask = np.zeros_like(xx, dtype=dt)  # (Y, X)
    L_t_mask[max(ball_start[1]+y_min,0):min(ball_start[1]+y_max,len(y)-1),\
             max(ball_start[0]+x_min,0):min(ball_start[0]+x_max,len(x)-1)] = 1.
    L_t_mask = L_t_mask.flatten()  # (F,)
    # we clip reach vecs to be used to index into L_given_t.
    # eg if qb is far right, then the left field will be clipped to y=-39 and later zeroed out
    clipped_reach_vecs = np.stack((np.clip(reach_vecs[:,0], x_min, x_max),
                                  np.clip(reach_vecs[:,1], y_min, y_max)))  # (2, F)
    L_given_t = L_given_ts[t, clipped_reach_vecs[1], clipped_reach_vecs[0]] * L_t_mask  # (F,) ; index with y and then x
    L_given_t /= L_given_t.sum()  # renormalize since part of L|t may have been off field
    
    """ P(T|L) """
    # we find T|L for sufficiently close spots (1 < L <= 60)
    reach_dist_in_bounds_idx = (reach_dist > 1) & (reach_dist <= 60)
    reach_dist_in_bounds = reach_dist[reach_dist_in_bounds_idx]
    T_given_L_subset = T_given_Ls_df.set_index('dist').loc[reach_dist_in_bounds, 'p'].to_numpy()\
        .reshape(len(reach_dist_in_bounds), -1)  # (F~, T) ; F~ is subset of F that is in [1, 60] yds from ball
    T_given_L = np.zeros((len(field_locs), len(T)))  # (F, T)
    # fill in the subset of values computed above
    T_given_L[reach_dist_in_bounds_idx] = T_given_L_subset
    
    L_T_given_t = L_given_t[:,None] * T_given_L #(F, T)
    return L_T_given_t

In [10]:
# input: tracking frame
# output: frame_eppa (F, T)... (writes intermediate F,T to disk)
def frame_eppa(play_frame_group):
    ball_start = play_frame_group.loc[play_frame_group.position=='QB', ['x', 'y']].iloc[0].round(0).to_numpy(dtype=int)
    t = max(play_frame_group.name[2] - 11, 0)
    
    (ppc_off, ppc_def) = comp_prob(play_frame_group)
    h_trans_prob = hist_trans_prob(ball_start, t)
    assert(h_trans_prob.shape == ppc_off.shape)
    
    ppc_old = (ppc_off-ppc_def+1)/2
    ppc = ppc_off
    ppc_no = 1-ppc_off-ppc_def
    trans_prob = h_trans_prob * np.power(ppc, params.alpha)
 
    eppa = ppc*trans_prob

    x = np.linspace(0.5, 119.5, 120, dtype=dt)
    y = np.linspace(-0.5, 53.5, 55, dtype=dt)
    y[0] = -0.2
    field_locs = np.stack(np.meshgrid(x, y)).reshape(2, -1).T  # (F, 2)
         
    field_df = pd.DataFrame({
        'ball_end_x': field_locs[:,0],
        'ball_end_y': field_locs[:,1],
        'eppa': eppa.sum(axis=1),
        'ppcf': ppc.sum(axis=1),
        'ppcf_no': ppc_no.sum(axis=1),
        'hist_trans': h_trans_prob.sum(axis=1),
        'trans': trans_prob.sum(axis=1),
        # 'ppc_off': ppc_off.sum(axis=1),
        'ppcf_def': ppc_def.sum(axis=1),
        'ppcf_old': ppc_old.sum(axis=1),
        # 't_ppc_off': (trans_prob*ppc_off).sum(axis=1),
        # 't_ppc_def': (trans_prob*ppc_def).sum(axis=1),
        # 'txoff': (trans_prob*throw_int_prob_off).sum(axis=1),
        # 'txdef': (trans_prob*throw_int_prob_def).sum(axis=1),
    })
    return field_df
    

In [12]:
%%time
field_dfs = play_df.groupby(['gameId', 'playId', 'frameId']).apply(frame_eppa)
field_dfs = field_dfs.reset_index(3, drop=True).reset_index()
field_dfs.head()

CPU times: user 3min 2s, sys: 1min 1s, total: 4min 4s
Wall time: 4min 4s


Unnamed: 0,gameId,playId,frameId,ball_end_x,ball_end_y,eppa,ppcf,ppcf_no,hist_trans,trans,ppcf_def,ppcf_old
0,2018091000,3016,1,0.5,-0.2,0.0,0.021577,39.97125,0.0,0.0,0.007173,20.007202
1,2018091000,3016,1,1.5,-0.2,0.0,0.02201,39.970625,0.0,0.0,0.007365,20.007322
2,2018091000,3016,1,2.5,-0.2,0.0,0.022672,39.969683,0.0,0.0,0.007644,20.007514
3,2018091000,3016,1,3.5,-0.2,0.0,0.023818,39.967986,0.0,0.0,0.008196,20.007811
4,2018091000,3016,1,4.5,-0.2,0.0,0.024166,39.967506,0.0,0.0,0.008328,20.007919


In [14]:
from visualize import AnimatePlay
animated_play = AnimatePlay(play_df, 20, field_dfs.rename(columns={'eppa': 'p_mass_1'}))
HTML(animated_play.ani.to_jshtml())

# Testing P(L,T|t).

In [15]:
animated_play = AnimatePlay(play_df, 20, field_dfs.rename(columns={'hist_trans': 'p_mass_1'}))
HTML(animated_play.ani.to_jshtml())

In [16]:
animated_play = AnimatePlay(good_df, 20, field_dfs.rename(columns={'trans': 'p_mass_1'}))
HTML(animated_play.ani.to_jshtml())

NameError: name 'good_df' is not defined

In [None]:
animated_play = AnimatePlay(good_df, 20, field_dfs.rename(columns={'txoff': 'p_mass_1'}))
HTML(animated_play.ani.to_jshtml())

In [30]:
import time
import traceback, sys, code

g = play_df.groupby(['gameId', 'playId', 'frameId'])
for name, group in list(g)[40:]:
    start = time.time()
    print(f'{name} started')
    try: get_field_df(group)
    except:
        type, value, tb = sys.exc_info()
        traceback.print_exc()
        last_frame = lambda tb=tb: last_frame(tb.tb_next) if tb.tb_next else tb
        frame = last_frame().tb_frame
        ns = dict(frame.f_globals)
        ns.update(frame.f_locals)
        code.interact(local=ns)
        # breakpoint()
    print(f'{name} ended {time.time()-start} ')


NameError: name 'play_df' is not defined

In [26]:
animated_play = AnimatePlay(good_df, 20, field_dfs.rename(columns={'throw_int_def': 'p_mass_1'}))
HTML(animated_play.ani.to_jshtml())

NameError: name 'good_df' is not defined

In [None]:
animated_play = AnimatePlay(good_df, 20, field_dfs.rename(columns={'txdef': 'p_mass_1'}))
HTML(animated_play.ani.to_jshtml())

In [28]:
animated_play = AnimatePlay(good_df, 20, field_dfs.rename(columns={'throw_no_int': 'p_mass_1'}))
HTML(animated_play.ani.to_jshtml())

NameError: name 'good_df' is not defined

In [18]:
# tenT = np.linspace(1, 40, 40)
# import scipy.stats as stats
# def get_L_T_given_t(play_frame_group):
#     frame_id = play_frame_group.name[2]
#     if frame_id < 21 or frame_id>74:
#         return
#     alpha, theta = T_given_ts.loc[T_given_ts.index == frame_id].iloc[0]
#     T_given_t = stats.gamma.pdf(tenT, a=alpha, loc=0, scale=theta)
#     L_given_T = get_L_given_T(play_frame_group)  # (F, T)
#     L_given_t = (L_given_T * T_given_t).sum(axis=1, keepdims=True)  # (F, 1) ; P(L|t) = \sum_x P(L|T=x)P(T=x|t)
#     T_given_L = T_given_Ls[np.round(play_frame_group.los.iloc[0]).astype(int)]  # (F, T)
#     L_T_given_t = L_given_t * T_given_L  # (F, T)
#     return L_T_given_t

In [29]:
field_dfs['throw_int_off_def'] = ((field_dfs.throw_int_off-field_dfs.throw_int_def)+1.)/2.
animated_play = AnimatePlay(good_df, 20, field_dfs.rename(columns={'throw_int_off_def': 'p_mass_1'}))
HTML(animated_play.ani.to_jshtml())

NameError: name 'field_dfs' is not defined

In [14]:
def get_L_given_T_old(play_frame_group):
    frame_df = play_frame_group.loc[(play_df.nflId!=0)]
    ball_start = frame_df.loc[frame_df.position=='QB', ['x', 'y']].iloc[0].round(0)
    frame_df = frame_df.loc[frame_df.position!='QB']
    pocket_width = 10

    x = np.linspace(0.5, 119.5, 120, dtype=dt)
    y = np.linspace(-0.5, 53.5, 55, dtype=dt)
    y[0] = -0.2
    field_locs = np.stack(np.meshgrid(x, y)).reshape(2, -1).T  # (F, 2)
    
    t = np.linspace(0.1, 4, 40)
    receivers_df = frame_df.loc[(frame_df.team_pos == 'OFF') &
                                 ((frame_df.x > frame_df.los) |
                                  (frame_df.y > ball_start[1] + pocket_width // 2) |
                                  (frame_df.y < ball_start[1] - pocket_width // 2)),
                                 ['x', 'y', 'v_x', 'v_y', 'v_theta', 'v_mag', 'los', 'a_x', 'a_y']]
    dist_from_ball_np = np.linalg.norm((receivers_df.x - ball_start[0],
                                                     receivers_df.y - ball_start[1]), axis=0)
    # find the spot the qb would aim at, leading the receiver in their current dir by the ball time
    rec_x_np = receivers_df.x.to_numpy(dtype=dt)[:,None]
    rec_y_np = receivers_df.y.to_numpy(dtype=dt)[:,None]
    rec_v_x_np = receivers_df.v_x.to_numpy(dtype=dt)[:,None]
    rec_v_y_np = receivers_df.v_y.to_numpy(dtype=dt)[:,None]
    rec_a_x_np = receivers_df.a_x.to_numpy(dtype=dt)[:,None]
    rec_a_y_np = receivers_df.a_y.to_numpy(dtype=dt)[:,None]
    rec_v_theta_np = receivers_df.v_theta.to_numpy(dtype=dt)[:,None]
    target_x = rec_x_np+rec_v_x_np*t+0.5*rec_a_x_np*t**2  # (R, T)
    target_y = rec_y_np+rec_v_y_np*t+0.5*rec_a_y_np*t**2  # (R, T)
    # receivers_df['target_x'] = receivers_df.x+receivers_df.v_x*receivers_df.dist_from_ball/params.avg_ball_speed
    # receivers_df['target_y'] = receivers_df.y+receivers_df.v_y*receivers_df.dist_from_ball/params.avg_ball_speed

    target_rec_locs = np.dstack((target_x, target_y))  # (R, T, 2)
    dist_infl = np.minimum(10, 4+(dist_from_ball_np**2)/54)
    speed_infl = (receivers_df.v_mag.to_numpy(dtype=dt)/11.3)**2
    # (F, R, T, 2) showing the vec diff btwn Fth spot on field and Rth rec target spot
    reach_vecs = field_locs[:,None,None,:] - target_rec_locs
    # TODO uncomment below for likely speedup
    # reach_vecs[np.linalg.norm(reach_vecs, axis=-1) > 20] = 0
    Sigma_no_rot = np.array([[dist_infl * (1 + speed_infl), np.zeros_like(dist_infl)],
                             [np.zeros_like(dist_infl), dist_infl * (1 - speed_infl)]]).transpose(2, 0, 1)  # (R, 2, 2)
    rot = np.array([[np.cos(rec_v_theta_np), -np.sin(rec_v_theta_np)],
                    [np.sin(rec_v_theta_np), np.cos(rec_v_theta_np)]])[..., 0].transpose(2, 0, 1)  # (R, 2, 2)
    Sigma = np.einsum('...ji,...jk,...kl', rot, Sigma_no_rot, rot)  # (R, 2, 2)
    
    # (F, R, T) vectorized calc of reach_vec.T @ Sigma.inv @ reach_vec
    gauss_top = np.einsum('...i,...ij,...j', reach_vecs.transpose(0, 2, 1, 3), np.linalg.inv(Sigma), reach_vecs.transpose(0, 2, 1, 3)).transpose(0, 2, 1)
    # assumption that cov mtx is const over time
    gauss_bottom = np.broadcast_to(2*np.pi*np.sqrt(np.linalg.det(Sigma))[:,None], (len(rec_x_np), len(t)))  # (R, T)  bivariate Gaussian normalizer
    # (F, R, T). (i, j, k) element is probability that spot i on field is thrown to given receiver j is targeted on a throw taking k seconds
    gauss_pdf = np.exp(-gauss_top/2) / gauss_bottom
    
    # assumption: each rec has uniform prob of being targeted. can use heuristic based on separation later
    target_prob = gauss_pdf.sum(axis=1) / gauss_pdf.sum(axis=(0, 1))  # (F, T)
    # 90%ile is a guess at this point. can tune it or use a heuristic of each rec getting a 50 sq yd window
    #cutoff = np.percentile(target_prob, 90)
    #target_prob[target_prob < cutoff] = 0
#     target_prob_mesh = target_prob.reshape(len(y), len(x), len(t))
#     field_df = pd.DataFrame({
#         'ball_start_x': ball_start[0],
#         'ball_start_y': ball_start[1],
#         'ball_end_x': field_locs[:,0],
#         'ball_end_y': field_locs[:,1],
#         'cp_off': target_prob.mean(axis=1)
#     })
    return target_prob

In [31]:
field_dfs.loc[(field_dfs.frameId==46)&(field_dfs.ball_end_x>60)&(field_dfs.ball_end_x<70)&(field_dfs.ball_end_y>10)&(field_dfs.ball_end_y<20)].sort_values('p_mass_1', ascending=False).head()

NameError: name 'field_dfs' is not defined

In [32]:
# USELESS JUNK BELOW

In [33]:
    # # field_df = pd.DataFrame({'ball_start_x': ball_start[0], 'ball_start_y': ball_start[1], 'ball_end_x': ball_end[0], 'ball_end_y': ball_end[1]})
    # field_df['ball_tof'] = np.sqrt((field_df.ball_start_x-field_df.ball_end_x)**2+(field_df.ball_start_y-field_df.ball_end_y)**2)/params.avg_ball_speed

    # def getPitchControl(field_row):
    #     frame_df['int_d_x'] = field_row['ball_end_x']-frame_df.x_r
    #     frame_df['int_d_y'] = field_row['ball_end_y']-frame_df.y_r
    #     frame_df['int_d_mag'] = np.linalg.norm(np.array([frame_df.int_d_x, frame_df.int_d_y]), axis=0)
    #     frame_df['int_d_theta'] = np.arctan(frame_df.int_d_y/frame_df.int_d_x).fillna(0)

    #     if frame_df['int_d_mag'].min() > field_row.ball_tof*params.v_max*1.2:
    #         field_row['cp_off'], field_row['cp_def'], field_row['cp_dead'] = 0, 0, 1
    #         return field_row

    #     frame_df['int_v0'] = (frame_df['v_x_r']*frame_df['int_d_x'] + frame_df['v_y_r']*frame_df['int_d_y'])/frame_df['int_d_mag']

    #     frame_df['d_lt_vm'] = (params.v_max**2-frame_df.int_v0**2)/(2*params.a_max)
    #     frame_df['t_lt_vm'] = (params.v_max-frame_df.int_v0)/(params.a_max)
    #     frame_df['d_at_vm'] = frame_df.int_d_mag-frame_df.d_lt_vm
    #     frame_df['t_at_vm'] = frame_df.d_at_vm/params.v_max
    #     frame_df['t_tot'] = frame_df.t_lt_vm+frame_df.t_at_vm
    #     frame_df['int_dT'] = field_row['ball_tof'] - frame_df.t_tot
    #     frame_df['P_int_T_j'] = (1/(1. + np.exp( -np.pi/np.sqrt(3.0)/params.tti_sigma * (frame_df.int_dT) ) )).round(2)

    #     # topOff = frame_df.loc[frame_df.team_pos=="OFF"].sort_values('P_int_T_j', ascending=False)['P_int_T_j'].iloc[0]
    #     # topDef = frame_df.loc[frame_df.team_pos=="DEF"].sort_values('P_int_T_j', ascending=False)['P_int_T_j'].iloc[0]
    #     # total_p_int = 
    #     frame_df['norm_factor'] = np.minimum(1.0, 1.0/frame_df['P_int_T_j'].sum())
    #     frame_df['P_int_T_j_norm'] = frame_df.P_int_T_j*frame_df.norm_factor.round(2)
    #     field_row['cp_dead'] = 1.0-frame_df['P_int_T_j_norm'].sum()
    #     field_row['cp_off'] = frame_df.loc[frame_df.team_pos=='OFF']['P_int_T_j_norm'].sum()
    #     field_row['cp_def'] = frame_df.loc[frame_df.team_pos=='DEF']['P_int_T_j_norm'].sum()

    #     return field_row

    # # field_df.iloc[2278:2281].apply(getPitchControl, axis=1)
    # field_df = field_df.apply(getPitchControl, axis=1)
    # # frame_df[['displayName', 'teamAbbr', 't_tot', 'int_dT', 'P_int_T_j', 'norm_factor', 'P_int_T_j_norm', 'dead_ball', 'cp']].sort_values('P_int_T_j', ascending=False)
    # print(f'{frame_df.iloc[0].frameId}', end=' ')
    # return field_df

In [34]:
# Select the game and play that you wish to see in week 1
import random

plays = list(track_df.groupby(['gameId', 'playId'], as_index=False).first()[['gameId', 'playId']].to_records(index=False))

game_id = 2018122314
play_id = 4239

# game_id, play_id = random.choice(plays)

play_df = track_df[(track_df.playId == play_id) & (track_df.gameId == game_id)].sort_values(by = 'time')
play_df.head()

Unnamed: 0,gameId,playId,frameId,event,nflId,displayName,jerseyNumber,position,position_general,team,team_pos,teamAbbr,route,time,los,x,y,dis,o,s,s_dir,s_dir_rad,v_x,v_y,v_theta,v_mag,a_old,a_x,a_y,a_theta,a_mag


In [35]:
frame = 40
frame_df = play_df[(play_df.nflId!=0)&(play_df.frameId == 40)][['nflId', 'displayName', 'position', 'team_pos', 'x', 'y', 'v_x', 'v_y', 'v_mag', 'v_theta', 'a_x', 'a_y', 'a_mag', 'a_theta']]

In [36]:
frame_df.head()

Unnamed: 0,nflId,displayName,position,team_pos,x,y,v_x,v_y,v_mag,v_theta,a_x,a_y,a_mag,a_theta


In [37]:
a_max = 8
v_max = 9
reax_t = v_max/a_max

frame_df['v_x_r'] = frame_df.a_x*reax_t+frame_df.v_x
frame_df['v_y_r'] = frame_df.a_y*reax_t+frame_df.v_y
frame_df['v_r_mag'] = np.linalg.norm(np.array([frame_df.v_x_r, frame_df.v_y_r]), axis=0)
frame_df['v_r_theta'] = np.arctan(frame_df.v_y_r/frame_df.v_x_r).fillna(0)

frame_df['x_r'] = frame_df.x + frame_df.v_x*reax_t - 0.5*frame_df.a_x*reax_t**2
frame_df['y_r'] = frame_df.y + frame_df.v_y*reax_t - 0.5*frame_df.a_y*reax_t**2
frame_df

Unnamed: 0,nflId,displayName,position,team_pos,x,y,v_x,v_y,v_mag,v_theta,a_x,a_y,a_mag,a_theta,v_x_r,v_y_r,v_r_mag,v_r_theta,x_r,y_r


In [None]:
frame_df['T_b'] = 2.7
frame_df['x_b'] = 75
frame_df['y_b'] = 20

frame_df['int_d_x'] = frame_df.x_b-frame_df.x_r
frame_df['int_d_y'] = frame_df.y_b-frame_df.y_r
frame_df['int_d_mag'] = np.linalg.norm(np.array([frame_df.int_d_x, frame_df.int_d_y]), axis=0)
frame_df['int_d_theta'] = np.arctan(frame_df.int_d_y/frame_df.int_d_x).fillna(0)

frame_df

In [None]:
frame_df['int_v0'] = (frame_df['v_x_r']*frame_df['int_d_x'] + frame_df['v_y_r']*frame_df['int_d_y'])/frame_df['int_d_mag']

frame_df['d_lt_vm'] = (v_max**2-frame_df.int_v0**2)/(2*a_max)
frame_df['t_lt_vm'] = (v_max-frame_df.int_v0)/(a_max)
frame_df['d_at_vm'] = frame_df.int_d_mag-frame_df.d_lt_vm
frame_df['t_at_vm'] = frame_df.d_at_vm/v_max
frame_df['t_tot'] = frame_df.t_lt_vm+frame_df.t_at_vm
frame_df['int_dT'] = frame_df.T_b - frame_df.t_tot

frame_df.sort_values('t_tot', ascending=True)


In [None]:
tti_sigma = 0.45
frame_df['P_int_T_j'] = (1/(1. + np.exp( -np.pi/np.sqrt(3.0)/tti_sigma * (frame_df.int_dT) ) )).round(2) 

frame_df[['displayName', 'team_pos', 'P_int_T_j']].sort_values('P_int_T_j', ascending=False)

In [None]:
animated_play = AnimatePlay(play_df, 20)
HTML(animated_play.ani.to_jshtml())

In [None]:


frame = 40
frame_df = play_df[(play_df.nflId!=0)&(play_df.frameId == 40)][['nflId', 'displayName', 'position', 'team_pos', 'x', 'y', 'v_x', 'v_y', 'v_mag', 'v_theta', 'a_x', 'a_y', 'a_mag', 'a_theta']]

frame_df = frame_df.copy()
frame_df = frame_df[['nflId', 'displayName', 'position', 'team_pos', 'x', 'y', 'v_x', 'v_y', 'v_mag', 'v_theta', 'a_x', 'a_y', 'a_mag', 'a_theta']]

start_loc = frame_df.loc[frame_df.position=='QB'][['x', 'y']].iloc[0]
frame_df = frame_df.loc[frame_df.position!='QB']

target_locs = np.array([np.array([x, y]) for y in range(52, -1, -1) for x in range(int(start_loc[0]), 120) ])
target_locs = target_locs + 0.5

start_locs = np.full_like(target_locs, start_loc)
tofs = np.linalg.norm(start_locs - target_locs, axis=1)/params.avg_ball_speed

frame_df['v_x_r'] = frame_df.a_x*reax_t+frame_df.v_x
frame_df['v_y_r'] = frame_df.a_y*reax_t+frame_df.v_y
frame_df['v_r_mag'] = np.linalg.norm(np.array([frame_df.v_x_r, frame_df.v_y_r]), axis=0)
frame_df['v_r_theta'] = np.arctan(frame_df.v_y_r/frame_df.v_x_r).fillna(0)

frame_df['x_r'] = frame_df.x + frame_df.v_x*reax_t - 0.5*frame_df.a_x*reax_t**2
frame_df['y_r'] = frame_df.y + frame_df.v_y*reax_t - 0.5*frame_df.a_y*reax_t**2

frame_df

In [None]:
PCS = dict()

for i, (ball_x, ball_y) in enumerate(target_locs):
    # i, (ball_x, ball_y) = random.choice(list(enumerate(target_locs)))
    # print(i, ball_x, ball_y, tofs[i])

    df = frame_df.copy()
    df['int_d_x'] = ball_x-df.x_r
    df['int_d_y'] = ball_y-df.y_r
    df['int_d_mag'] = np.linalg.norm(np.array([df.int_d_x, df.int_d_y]), axis=0)
    df['int_d_theta'] = np.arctan(df.int_d_y/df.int_d_x).fillna(0)

    df['int_v0'] = (df['v_x_r']*df['int_d_x'] + df['v_y_r']*df['int_d_y'])/df['int_d_mag']

    df['d_lt_vm'] = (params.v_max**2-df.int_v0**2)/(2*params.a_max)
    df['t_lt_vm'] = (params.v_max-df.int_v0)/(params.a_max)
    df['d_at_vm'] = df.int_d_mag-df.d_lt_vm
    df['t_at_vm'] = df.d_at_vm/v_max
    df['t_tot'] = df.t_lt_vm+df.t_at_vm
    df['int_dT'] = tofs[i] - df.t_tot
    df['P_int_T_j'] = (1/(1. + np.exp( -np.pi/np.sqrt(3.0)/params.tti_sigma * (df.int_dT) ) )).round(2)

    topOff = df.loc[df.team_pos=="OFF"].sort_values('P_int_T_j', ascending=False)['P_int_T_j'].iloc[0]
    topDef = df.loc[df.team_pos=="DEF"].sort_values('P_int_T_j', ascending=False)['P_int_T_j'].iloc[0]

    # topOff-topDef
    PCS[(ball_x, ball_y)] = round(topOff-topDef, 2)

