In [1]:
import numpy as np
import pandas as pd
import dask.dataframe as dd

from pandarallel import pandarallel
pandarallel.initialize(nb_workers=16)

from IPython.display import HTML

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)


INFO: Pandarallel will run on 16 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


In [2]:
path_shared = '../data/{}'

games_df = pd.read_csv(path_shared.format('games.csv'))
plays_df = pd.read_csv(path_shared.format('plays.csv'))
players_df = pd.read_csv(path_shared.format('players.csv'))
coverage_df = pd.read_csv(path_shared.format('coverages_week1.csv'))
track_df = pd.read_csv(path_shared.format('week1_norm.csv', usecols=['nflId', 'displayName', 'position', 'team_pos', 'x', 'y', 'v_x', 'v_y', 'v_mag', 'v_theta', 'a_x', 'a_y', 'a_mag', 'a_theta']))

plays_df = pd.merge(plays_df, coverage_df, left_on=['gameId', 'playId'], right_on=['gameId', 'playId'])

In [3]:

plays_df[plays_df.coverage.str.contains('Zone')].sort_values('offensePlayResult', ascending=False).head()

Unnamed: 0,gameId,playId,playDescription,quarter,down,yardsToGo,possessionTeam,playType,yardlineSide,yardlineNumber,offenseFormation,personnelO,defendersInTheBox,numberOfPassRushers,personnelD,typeDropback,preSnapVisitorScore,preSnapHomeScore,gameClock,absoluteYardlineNumber,penaltyCodes,penaltyJerseyNumbers,passResult,offensePlayResult,playResult,epa,isDefensivePI,coverage
307,2018090903,3264,(10:33) (No Huddle) R.Tannehill pass deep midd...,4,1,10,MIA,play_type_pass,MIA,25,SINGLEBACK,"1 RB, 2 TE, 2 WR",6.0,4.0,"3 DL, 4 LB, 4 DB",TRADITIONAL,10.0,17.0,10:33:00,35.0,,,C,75,75,5.759313,False,Cover 6 Zone
119,2018090901,2324,(9:59) (Shotgun) B.Roethlisberger pass short r...,3,1,10,PIT,play_type_pass,PIT,26,SHOTGUN,"1 RB, 1 TE, 3 WR",7.0,4.0,"4 DL, 2 LB, 5 DB",TRADITIONAL,7.0,7.0,09:59:00,36.0,,,C,67,67,4.14681,False,Cover 3 Zone
557,2018090906,354,(9:51) R.Fitzpatrick pass deep right to D.Jack...,1,1,10,TB,play_type_pass,TB,42,SINGLEBACK,"1 RB, 1 TE, 3 WR",6.0,4.0,"4 DL, 2 LB, 5 DB",TRADITIONAL,0.0,7.0,09:51:00,52.0,,,C,58,58,4.733419,False,Cover 3 Zone
914,2018091001,124,(13:25) (Shotgun) D.Carr pass short middle to ...,1,1,10,OAK,play_type_pass,OAK,35,EMPTY,"1 RB, 2 TE, 2 WR",5.0,5.0,"3 DL, 4 LB, 4 DB",TRADITIONAL,0.0,0.0,13:25:00,45.0,,,C,45,45,2.638069,False,Cover 3 Zone
868,2018091000,1672,(2:00) (Shotgun) S.Darnold pass deep left to R...,2,3,2,NYJ,play_type_pass,DET,41,SHOTGUN,"1 RB, 1 TE, 3 WR",6.0,3.0,"3 DL, 3 LB, 5 DB",TRADITIONAL,10.0,7.0,02:00:00,69.0,,,C,41,41,4.850242,False,Cover 2 Zone


In [4]:
params = lambda: None # create an empty object to add params
params.a_max = 7
params.s_max = 9
params.reax_t = params.s_max/params.a_max
params.avg_ball_speed = 20
params.tti_sigma = 0.45
params.cell_length = 1
vars(params)

{'a_max': 7,
 's_max': 9,
 'reax_t': 1.2857142857142858,
 'avg_ball_speed': 20,
 'tti_sigma': 0.45,
 'cell_length': 1}

In [5]:
# deep lockett vs chiefs
# game_id = 2018122314
# play_id = 4239

# deep dissly vs bears
# game_id = 2018091700
# play_id = 3936

# deep middle insane gronk 
game_id = 2018090905
play_id = 2062

# game_id, play_id = random.choice(plays)

play_df = track_df[(track_df.playId == play_id) & (track_df.gameId == game_id)].sort_values(by = 'frameId')
play_df.head()

Unnamed: 0,gameId,playId,frameId,event,nflId,displayName,jerseyNumber,position,position_general,team,team_pos,teamAbbr,route,time,los,x,y,dis,o,s,s_dir,s_dir_rad,v_x,v_y,v_theta,v_mag,a_old,a_x,a_y,a_theta,a_mag
317421,2018090905,2062,1,,496735.0,Kareem Jackson,25.0,CB,DB,away,DEF,HOU,,2018-09-09T18:28:11.700Z,50.91,65.42,31.08,0.08,260.44,0.83,108.4,1.89,0.79,-0.26,-0.32,0.83,0.38,0.0,0.0,0.0,0.0
317434,2018090905,2062,1,,0.0,Football,,,,football,FTBL,FTBL,,2018-09-09T18:28:11.700Z,50.91,51.09,29.52,0.0,,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
317433,2018090905,2062,1,,2558094.0,Zach Cunningham,41.0,ILB,LB,away,DEF,HOU,,2018-09-09T18:28:11.700Z,50.91,55.27,32.36,0.05,263.88,0.49,262.25,4.58,-0.49,-0.07,0.14,0.49,0.72,0.0,0.0,0.0,0.0
317432,2018090905,2062,1,,2552490.0,Benardrick McKinney,55.0,ILB,LB,away,DEF,HOU,,2018-09-09T18:28:11.700Z,50.91,52.79,23.53,0.1,220.61,0.95,298.75,5.21,-0.83,0.46,-0.5,0.95,0.03,0.0,0.0,0.0,0.0
317430,2018090905,2062,1,,2552261.0,Kevin Johnson,30.0,CB,DB,away,DEF,HOU,,2018-09-09T18:28:11.700Z,50.91,59.31,17.56,0.05,135.6,0.5,27.09,0.47,0.23,0.45,1.1,0.5,0.46,0.0,0.0,0.0,0.0


In [6]:
plays_df.loc[(plays_df.gameId == game_id) & (plays_df.playId == play_id)]

Unnamed: 0,gameId,playId,playDescription,quarter,down,yardsToGo,possessionTeam,playType,yardlineSide,yardlineNumber,offenseFormation,personnelO,defendersInTheBox,numberOfPassRushers,personnelD,typeDropback,preSnapVisitorScore,preSnapHomeScore,gameClock,absoluteYardlineNumber,penaltyCodes,penaltyJerseyNumbers,passResult,offensePlayResult,playResult,epa,isDefensivePI,coverage
438,2018090905,2062,(1:05) (Shotgun) T.Brady pass deep middle to R...,2,1,10,NE,play_type_pass,NE,41,SHOTGUN,"2 RB, 1 TE, 2 WR",6.0,4.0,"4 DL, 2 LB, 5 DB",TRADITIONAL,6.0,14.0,01:05:00,69.0,,,C,28,28,1.248695,False,Cover 3 Zone


In [7]:
games_df[games_df.gameId==game_id]

Unnamed: 0,gameId,gameDate,gameTimeEastern,homeTeamAbbr,visitorTeamAbbr,week
5,2018090905,09/09/2018,13:00:00,NE,HOU,1


In [8]:
%%time

def get_field_df(play_frame_group):
    frame_df = play_frame_group.loc[(play_df.nflId!=0)]
    # frame_df.set_index('nflId')
    ball_start = frame_df.loc[frame_df.position=='QB'][['x', 'y']].iloc[0].round().to_numpy()
    frame_df = frame_df.loc[frame_df.position!='QB']
    frame_df['v_x_r'] = frame_df.a_x*params.reax_t+frame_df.v_x
    frame_df['v_y_r'] = frame_df.a_y*params.reax_t+frame_df.v_y
    frame_df['v_r_mag'] = np.linalg.norm(np.array([frame_df.v_x_r, frame_df.v_y_r]), axis=0)
    frame_df['v_r_theta'] = np.arctan(frame_df.v_y_r/frame_df.v_x_r).fillna(0)

    frame_df['x_r'] = frame_df.x + frame_df.v_x*params.reax_t + 0.5*frame_df.a_x*params.reax_t**2
    frame_df['y_r'] = frame_df.y + frame_df.v_y*params.reax_t + 0.5*frame_df.a_y*params.reax_t**2
    
    x = np.linspace(0.5, 119.5, 120)
    y = np.linspace(-0.5, 53.5, 55)
    y[0] = -0.2
    field_locs = np.stack(np.meshgrid(x, y)).reshape(2, -1).T  # (F, 2)
    T = np.linspace(0.1, 4, 40) # (T,)

    player_teams = frame_df['team_pos'].to_numpy() # J,
    reaction_player_locs = frame_df[['x_r', 'y_r']].to_numpy() # (J, 2)
    reaction_player_vels = frame_df[['v_x_r', 'v_y_r']].to_numpy() #(J,2)

    # tofs = np.linalg.norm(field_locs-ball_start, axis=1)/params.avg_ball_speed # (F,)

    int_d_vec = field_locs[:, None, :] - reaction_player_locs #F, J, 2
    int_d_mag = np.linalg.norm(int_d_vec, axis=2) # F, J
    int_s0 = np.clip(np.sum(int_d_vec*reaction_player_vels, axis=2)/int_d_mag, -params.s_max, params.s_max) #F, J,  #taking norm between vectors int_d and player velocity

    t_lt_smax = (params.s_max-int_s0)/params.a_max  #F, J,
    d_lt_smax = t_lt_smax*((int_s0+params.s_max)/2) #F, J,
    d_at_smax = int_d_mag - d_lt_smax               #F, J,
    t_at_smax = d_at_smax/params.s_max              #F, J,
    t_tot = t_lt_smax+t_at_smax                     #F, J,

    # int_dT = tofs[:, None] - t_tot                  #F, J, T
    # int_dT = int_d_mag - T[:, None, None]           #T, F, J
    int_dT = np.subtract.outer(t_tot, T)                 #F, J, T
    p_int = (1/(1. + np.exp( -np.pi/np.sqrt(3.0)/params.tti_sigma * int_dT) )) #F, J, T
    p_int = np.mean(p_int, axis=2) #F, J

    # F, T 
    # T, 3 (t, x, y )
    # np.take (p_int, (t, x, y))

    norm_factor = np.maximum(1., p_int.sum(axis=1))     #F,
    p_int_norm = (p_int/norm_factor[:, None])  #F, J

    p_no_int = 1-p_int_norm.sum(axis=1)                 #F
    
    p_int_off = p_int_norm.sum(axis=1, where=(player_teams=='OFF')) #F
    p_int_def = np.sum(p_int_norm, axis=1, where=(player_teams=='DEF')) #F
    field_df = pd.DataFrame({
        'ball_start_x': ball_start[0],
        'ball_start_y': ball_start[1], 
        'ball_end_x': field_locs[:,0],
        'ball_end_y': field_locs[:,1],
        'p_mass_1': (((p_int_off-p_int_def)+1.)/2.).round(3),
        'p_mass_2': p_no_int.round(3),
        # 'p_mass_players': p_int_norm,
    })

    return field_df
# play_df.loc[play_df.frameId==1]
# play_df.loc[play_df.frameId==1].apply(get_field_df)
# get_field_df(play_df.loc[play_df.frameId==1])
field_dfs = play_df.groupby(['gameId', 'playId', 'frameId']).parallel_apply(get_field_df)
field_dfs = field_dfs.reset_index(3, drop=True).reset_index()
field_dfs

CPU times: user 153 ms, sys: 144 ms, total: 297 ms
Wall time: 1.98 s


Unnamed: 0,gameId,playId,frameId,ball_start_x,ball_start_y,ball_end_x,ball_end_y,p_mass_1,p_mass_2
0,2018090905,2062,1,46.0,30.0,0.5,-0.2,0.417,0.0
1,2018090905,2062,1,46.0,30.0,1.5,-0.2,0.417,0.0
2,2018090905,2062,1,46.0,30.0,2.5,-0.2,0.417,0.0
3,2018090905,2062,1,46.0,30.0,3.5,-0.2,0.417,0.0
4,2018090905,2062,1,46.0,30.0,4.5,-0.2,0.417,-0.0
...,...,...,...,...,...,...,...,...,...
455395,2018090905,2062,69,42.0,32.0,115.5,53.5,0.417,0.0
455396,2018090905,2062,69,42.0,32.0,116.5,53.5,0.417,0.0
455397,2018090905,2062,69,42.0,32.0,117.5,53.5,0.417,0.0
455398,2018090905,2062,69,42.0,32.0,118.5,53.5,0.417,0.0


In [9]:
from visualize import AnimatePlay
animated_play = AnimatePlay(play_df, 20, field_dfs)
HTML(animated_play.ani.to_jshtml())

In [10]:
field_dfs.loc[(field_dfs.frameId==46)&(field_dfs.ball_end_x>60)&(field_dfs.ball_end_x<70)&(field_dfs.ball_end_y>10)&(field_dfs.ball_end_y<20)].sort_values('p_mass_1', ascending=False).head()

Unnamed: 0,gameId,playId,frameId,ball_start_x,ball_start_y,ball_end_x,ball_end_y,p_mass_1,p_mass_2
298389,2018090905,2062,46,42.0,30.0,69.5,10.5,0.445,0.0
299109,2018090905,2062,46,42.0,30.0,69.5,16.5,0.442,0.0
298388,2018090905,2062,46,42.0,30.0,68.5,10.5,0.441,-0.0
298509,2018090905,2062,46,42.0,30.0,69.5,11.5,0.441,-0.0
299469,2018090905,2062,46,42.0,30.0,69.5,19.5,0.44,-0.0


In [None]:
# USELESS JUNK BELOW

In [None]:
    # # field_df = pd.DataFrame({'ball_start_x': ball_start[0], 'ball_start_y': ball_start[1], 'ball_end_x': ball_end[0], 'ball_end_y': ball_end[1]})
    # field_df['ball_tof'] = np.sqrt((field_df.ball_start_x-field_df.ball_end_x)**2+(field_df.ball_start_y-field_df.ball_end_y)**2)/params.avg_ball_speed

    # def getPitchControl(field_row):
    #     frame_df['int_d_x'] = field_row['ball_end_x']-frame_df.x_r
    #     frame_df['int_d_y'] = field_row['ball_end_y']-frame_df.y_r
    #     frame_df['int_d_mag'] = np.linalg.norm(np.array([frame_df.int_d_x, frame_df.int_d_y]), axis=0)
    #     frame_df['int_d_theta'] = np.arctan(frame_df.int_d_y/frame_df.int_d_x).fillna(0)

    #     if frame_df['int_d_mag'].min() > field_row.ball_tof*params.v_max*1.2:
    #         field_row['cp_off'], field_row['cp_def'], field_row['cp_dead'] = 0, 0, 1
    #         return field_row

    #     frame_df['int_v0'] = (frame_df['v_x_r']*frame_df['int_d_x'] + frame_df['v_y_r']*frame_df['int_d_y'])/frame_df['int_d_mag']

    #     frame_df['d_lt_vm'] = (params.v_max**2-frame_df.int_v0**2)/(2*params.a_max)
    #     frame_df['t_lt_vm'] = (params.v_max-frame_df.int_v0)/(params.a_max)
    #     frame_df['d_at_vm'] = frame_df.int_d_mag-frame_df.d_lt_vm
    #     frame_df['t_at_vm'] = frame_df.d_at_vm/params.v_max
    #     frame_df['t_tot'] = frame_df.t_lt_vm+frame_df.t_at_vm
    #     frame_df['int_dT'] = field_row['ball_tof'] - frame_df.t_tot
    #     frame_df['P_int_T_j'] = (1/(1. + np.exp( -np.pi/np.sqrt(3.0)/params.tti_sigma * (frame_df.int_dT) ) )).round(2)

    #     # topOff = frame_df.loc[frame_df.team_pos=="OFF"].sort_values('P_int_T_j', ascending=False)['P_int_T_j'].iloc[0]
    #     # topDef = frame_df.loc[frame_df.team_pos=="DEF"].sort_values('P_int_T_j', ascending=False)['P_int_T_j'].iloc[0]
    #     # total_p_int = 
    #     frame_df['norm_factor'] = np.minimum(1.0, 1.0/frame_df['P_int_T_j'].sum())
    #     frame_df['P_int_T_j_norm'] = frame_df.P_int_T_j*frame_df.norm_factor.round(2)
    #     field_row['cp_dead'] = 1.0-frame_df['P_int_T_j_norm'].sum()
    #     field_row['cp_off'] = frame_df.loc[frame_df.team_pos=='OFF']['P_int_T_j_norm'].sum()
    #     field_row['cp_def'] = frame_df.loc[frame_df.team_pos=='DEF']['P_int_T_j_norm'].sum()

    #     return field_row

    # # field_df.iloc[2278:2281].apply(getPitchControl, axis=1)
    # field_df = field_df.apply(getPitchControl, axis=1)
    # # frame_df[['displayName', 'teamAbbr', 't_tot', 'int_dT', 'P_int_T_j', 'norm_factor', 'P_int_T_j_norm', 'dead_ball', 'cp']].sort_values('P_int_T_j', ascending=False)
    # print(f'{frame_df.iloc[0].frameId}', end=' ')
    # return field_df

In [None]:
# Select the game and play that you wish to see in week 1
import random

plays = list(track_df.groupby(['gameId', 'playId'], as_index=False).first()[['gameId', 'playId']].to_records(index=False))

game_id = 2018122314
play_id = 4239

# game_id, play_id = random.choice(plays)

play_df = track_df[(track_df.playId == play_id) & (track_df.gameId == game_id)].sort_values(by = 'time')
play_df.head()

In [None]:
frame = 40
frame_df = play_df[(play_df.nflId!=0)&(play_df.frameId == 40)][['nflId', 'displayName', 'position', 'team_pos', 'x', 'y', 'v_x', 'v_y', 'v_mag', 'v_theta', 'a_x', 'a_y', 'a_mag', 'a_theta']]

In [None]:
frame_df.head()

In [None]:
a_max = 8
v_max = 9
reax_t = v_max/a_max

frame_df['v_x_r'] = frame_df.a_x*reax_t+frame_df.v_x
frame_df['v_y_r'] = frame_df.a_y*reax_t+frame_df.v_y
frame_df['v_r_mag'] = np.linalg.norm(np.array([frame_df.v_x_r, frame_df.v_y_r]), axis=0)
frame_df['v_r_theta'] = np.arctan(frame_df.v_y_r/frame_df.v_x_r).fillna(0)

frame_df['x_r'] = frame_df.x + frame_df.v_x*reax_t - 0.5*frame_df.a_x*reax_t**2
frame_df['y_r'] = frame_df.y + frame_df.v_y*reax_t - 0.5*frame_df.a_y*reax_t**2
frame_df

In [None]:
frame_df['T_b'] = 2.7
frame_df['x_b'] = 75
frame_df['y_b'] = 20

frame_df['int_d_x'] = frame_df.x_b-frame_df.x_r
frame_df['int_d_y'] = frame_df.y_b-frame_df.y_r
frame_df['int_d_mag'] = np.linalg.norm(np.array([frame_df.int_d_x, frame_df.int_d_y]), axis=0)
frame_df['int_d_theta'] = np.arctan(frame_df.int_d_y/frame_df.int_d_x).fillna(0)

frame_df

In [None]:
frame_df['int_v0'] = (frame_df['v_x_r']*frame_df['int_d_x'] + frame_df['v_y_r']*frame_df['int_d_y'])/frame_df['int_d_mag']

frame_df['d_lt_vm'] = (v_max**2-frame_df.int_v0**2)/(2*a_max)
frame_df['t_lt_vm'] = (v_max-frame_df.int_v0)/(a_max)
frame_df['d_at_vm'] = frame_df.int_d_mag-frame_df.d_lt_vm
frame_df['t_at_vm'] = frame_df.d_at_vm/v_max
frame_df['t_tot'] = frame_df.t_lt_vm+frame_df.t_at_vm
frame_df['int_dT'] = frame_df.T_b - frame_df.t_tot

frame_df.sort_values('t_tot', ascending=True)


In [None]:
tti_sigma = 0.45
frame_df['P_int_T_j'] = (1/(1. + np.exp( -np.pi/np.sqrt(3.0)/tti_sigma * (frame_df.int_dT) ) )).round(2) 

frame_df[['displayName', 'team_pos', 'P_int_T_j']].sort_values('P_int_T_j', ascending=False)

In [None]:
animated_play = AnimatePlay(play_df, 20)
HTML(animated_play.ani.to_jshtml())

In [None]:


frame = 40
frame_df = play_df[(play_df.nflId!=0)&(play_df.frameId == 40)][['nflId', 'displayName', 'position', 'team_pos', 'x', 'y', 'v_x', 'v_y', 'v_mag', 'v_theta', 'a_x', 'a_y', 'a_mag', 'a_theta']]

frame_df = frame_df.copy()
frame_df = frame_df[['nflId', 'displayName', 'position', 'team_pos', 'x', 'y', 'v_x', 'v_y', 'v_mag', 'v_theta', 'a_x', 'a_y', 'a_mag', 'a_theta']]

start_loc = frame_df.loc[frame_df.position=='QB'][['x', 'y']].iloc[0]
frame_df = frame_df.loc[frame_df.position!='QB']

target_locs = np.array([np.array([x, y]) for y in range(52, -1, -1) for x in range(int(start_loc[0]), 120) ])
target_locs = target_locs + 0.5

start_locs = np.full_like(target_locs, start_loc)
tofs = np.linalg.norm(start_locs - target_locs, axis=1)/params.avg_ball_speed

frame_df['v_x_r'] = frame_df.a_x*reax_t+frame_df.v_x
frame_df['v_y_r'] = frame_df.a_y*reax_t+frame_df.v_y
frame_df['v_r_mag'] = np.linalg.norm(np.array([frame_df.v_x_r, frame_df.v_y_r]), axis=0)
frame_df['v_r_theta'] = np.arctan(frame_df.v_y_r/frame_df.v_x_r).fillna(0)

frame_df['x_r'] = frame_df.x + frame_df.v_x*reax_t - 0.5*frame_df.a_x*reax_t**2
frame_df['y_r'] = frame_df.y + frame_df.v_y*reax_t - 0.5*frame_df.a_y*reax_t**2

frame_df

In [None]:
PCS = dict()

for i, (ball_x, ball_y) in enumerate(target_locs):
    # i, (ball_x, ball_y) = random.choice(list(enumerate(target_locs)))
    # print(i, ball_x, ball_y, tofs[i])

    df = frame_df.copy()
    df['int_d_x'] = ball_x-df.x_r
    df['int_d_y'] = ball_y-df.y_r
    df['int_d_mag'] = np.linalg.norm(np.array([df.int_d_x, df.int_d_y]), axis=0)
    df['int_d_theta'] = np.arctan(df.int_d_y/df.int_d_x).fillna(0)

    df['int_v0'] = (df['v_x_r']*df['int_d_x'] + df['v_y_r']*df['int_d_y'])/df['int_d_mag']

    df['d_lt_vm'] = (params.v_max**2-df.int_v0**2)/(2*params.a_max)
    df['t_lt_vm'] = (params.v_max-df.int_v0)/(params.a_max)
    df['d_at_vm'] = df.int_d_mag-df.d_lt_vm
    df['t_at_vm'] = df.d_at_vm/v_max
    df['t_tot'] = df.t_lt_vm+df.t_at_vm
    df['int_dT'] = tofs[i] - df.t_tot
    df['P_int_T_j'] = (1/(1. + np.exp( -np.pi/np.sqrt(3.0)/params.tti_sigma * (df.int_dT) ) )).round(2)

    topOff = df.loc[df.team_pos=="OFF"].sort_values('P_int_T_j', ascending=False)['P_int_T_j'].iloc[0]
    topDef = df.loc[df.team_pos=="DEF"].sort_values('P_int_T_j', ascending=False)['P_int_T_j'].iloc[0]

    # topOff-topDef
    PCS[(ball_x, ball_y)] = round(topOff-topDef, 2)

