In [5]:
import numpy as np
import pandas as pd
import dask.dataframe as dd

from pandarallel import pandarallel
pandarallel.initialize(nb_workers=16)

from IPython.display import HTML

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)


INFO: Pandarallel will run on 16 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


In [6]:
path_shared = '../data/{}'

games_df = pd.read_csv(path_shared.format('games.csv'))
plays_df = pd.read_csv(path_shared.format('plays.csv'))
players_df = pd.read_csv(path_shared.format('players.csv'))
coverage_df = pd.read_csv(path_shared.format('coverages_week1.csv'))
track_df = pd.read_csv(path_shared.format('week1_norm.csv', usecols=['nflId', 'displayName', 'position', 'team_pos', 'x', 'y', 'v_x', 'v_y', 'v_mag', 'v_theta', 'a_x', 'a_y', 'a_mag', 'a_theta']))

plays_df = pd.merge(plays_df, coverage_df, left_on=['gameId', 'playId'], right_on=['gameId', 'playId'])

In [7]:

plays_df[plays_df.coverage.str.contains('Zone')].sort_values('offensePlayResult', ascending=False).head()

Unnamed: 0,gameId,playId,playDescription,quarter,down,yardsToGo,possessionTeam,playType,yardlineSide,yardlineNumber,offenseFormation,personnelO,defendersInTheBox,numberOfPassRushers,personnelD,typeDropback,preSnapVisitorScore,preSnapHomeScore,gameClock,absoluteYardlineNumber,penaltyCodes,penaltyJerseyNumbers,passResult,offensePlayResult,playResult,epa,isDefensivePI,coverage
307,2018090903,3264,(10:33) (No Huddle) R.Tannehill pass deep midd...,4,1,10,MIA,play_type_pass,MIA,25,SINGLEBACK,"1 RB, 2 TE, 2 WR",6.0,4.0,"3 DL, 4 LB, 4 DB",TRADITIONAL,10.0,17.0,10:33:00,35.0,,,C,75,75,5.759313,False,Cover 6 Zone
119,2018090901,2324,(9:59) (Shotgun) B.Roethlisberger pass short r...,3,1,10,PIT,play_type_pass,PIT,26,SHOTGUN,"1 RB, 1 TE, 3 WR",7.0,4.0,"4 DL, 2 LB, 5 DB",TRADITIONAL,7.0,7.0,09:59:00,36.0,,,C,67,67,4.14681,False,Cover 3 Zone
557,2018090906,354,(9:51) R.Fitzpatrick pass deep right to D.Jack...,1,1,10,TB,play_type_pass,TB,42,SINGLEBACK,"1 RB, 1 TE, 3 WR",6.0,4.0,"4 DL, 2 LB, 5 DB",TRADITIONAL,0.0,7.0,09:51:00,52.0,,,C,58,58,4.733419,False,Cover 3 Zone
914,2018091001,124,(13:25) (Shotgun) D.Carr pass short middle to ...,1,1,10,OAK,play_type_pass,OAK,35,EMPTY,"1 RB, 2 TE, 2 WR",5.0,5.0,"3 DL, 4 LB, 4 DB",TRADITIONAL,0.0,0.0,13:25:00,45.0,,,C,45,45,2.638069,False,Cover 3 Zone
868,2018091000,1672,(2:00) (Shotgun) S.Darnold pass deep left to R...,2,3,2,NYJ,play_type_pass,DET,41,SHOTGUN,"1 RB, 1 TE, 3 WR",6.0,3.0,"3 DL, 3 LB, 5 DB",TRADITIONAL,10.0,7.0,02:00:00,69.0,,,C,41,41,4.850242,False,Cover 2 Zone


In [8]:
params = lambda: None # create an empty object to add params
params.a_max = 7
params.v_max = 9
params.reax_t = params.v_max/params.a_max
params.avg_ball_speed = 20
params.tti_sigma = 0.45
params.cell_length = 1
vars(params)

{'a_max': 7,
 'v_max': 9,
 'reax_t': 1.2857142857142858,
 'avg_ball_speed': 20,
 'tti_sigma': 0.45,
 'cell_length': 1}

In [9]:
track_df['v_x_r'] = track_df.a_x*params.reax_t+track_df.v_x
track_df['v_y_r'] = track_df.a_y*params.reax_t+track_df.v_y
track_df['v_r_mag'] = np.linalg.norm(np.array([track_df.v_x_r, track_df.v_y_r]), axis=0)
track_df['v_r_theta'] = np.arctan(track_df.v_y_r/track_df.v_x_r).fillna(0)

track_df['x_r'] = track_df.x + track_df.v_x*params.reax_t - 0.5*track_df.a_x*params.reax_t**2
track_df['y_r'] = track_df.y + track_df.v_y*params.reax_t - 0.5*track_df.a_y*params.reax_t**2
track_df.head()

Unnamed: 0,gameId,playId,frameId,event,nflId,displayName,jerseyNumber,position,position_general,team,team_pos,teamAbbr,route,time,los,x,y,dis,o,s,s_dir,s_dir_rad,v_x,v_y,v_theta,v_mag,a_old,a_x,a_y,a_theta,a_mag,v_x_r,v_y_r,v_r_mag,v_r_theta,x_r,y_r
0,2018090600,75,1,,310.0,Matt Ryan,2.0,QB,QB,away,OFF,ATL,,2018-09-07T01:07:14.599Z,40,38.38,26.63,0.02,109.57,0.0,60.93,1.06,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,38.38,26.63
1,2018090600,75,1,,79848.0,Malcolm Jenkins,27.0,SS,S,home,DEF,PHI,,2018-09-07T01:07:14.599Z,40,41.22,16.83,0.01,285.63,0.01,246.66,4.31,-0.01,-0.0,0.41,0.01,0.01,0.0,0.0,0.0,0.0,-0.01,0.0,0.01,-0.0,41.207143,16.83
2,2018090600,75,1,,2495454.0,Julio Jones,11.0,WR,WR,away,OFF,ATL,HITCH,2018-09-07T01:07:14.599Z,40,38.76,9.14,0.01,110.45,0.02,196.86,3.44,-0.01,-0.02,1.28,0.02,0.03,0.0,0.0,0.0,0.0,-0.01,-0.02,0.022361,1.107149,38.747143,9.114286
3,2018090600,75,1,,2495613.0,Corey Graham,24.0,FS,S,home,DEF,PHI,,2018-09-07T01:07:14.599Z,40,43.8,31.29,0.01,250.12,0.09,348.91,6.09,-0.02,0.09,-1.38,0.09,0.42,0.0,0.0,0.0,0.0,-0.02,0.09,0.092195,-1.352127,43.774286,31.405714
4,2018090600,75,1,,2533040.0,Mohamed Sanu,12.0,WR,WR,away,OFF,ATL,HITCH,2018-09-07T01:07:14.599Z,40,39.33,17.15,0.0,77.61,0.0,13.97,0.24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,39.33,17.15


In [10]:
# deep lockett vs chiefs
# game_id = 2018122314
# play_id = 4239

# deep dissly vs bears
# game_id = 2018091700
# play_id = 3936

game_id = 2018090905
play_id = 2062

# game_id, play_id = random.choice(plays)

play_df = track_df[(track_df.playId == play_id) & (track_df.gameId == game_id)].sort_values(by = 'frameId')
play_df.head()

Unnamed: 0,gameId,playId,frameId,event,nflId,displayName,jerseyNumber,position,position_general,team,team_pos,teamAbbr,route,time,los,x,y,dis,o,s,s_dir,s_dir_rad,v_x,v_y,v_theta,v_mag,a_old,a_x,a_y,a_theta,a_mag,v_x_r,v_y_r,v_r_mag,v_r_theta,x_r,y_r
317421,2018090905,2062,1,,496735.0,Kareem Jackson,25.0,CB,DB,away,DEF,HOU,,2018-09-09T18:28:11.700Z,40,54.51,31.08,0.08,260.44,0.83,108.4,1.89,0.79,-0.26,-0.32,0.83,0.38,0.0,0.0,0.0,0.0,0.79,-0.26,0.831685,-0.317948,55.525714,30.745714
317434,2018090905,2062,1,,0.0,Football,,,,football,FTBL,FTBL,,2018-09-09T18:28:11.700Z,40,40.18,29.52,0.0,,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,40.18,29.52
317433,2018090905,2062,1,,2558094.0,Zach Cunningham,41.0,ILB,LB,away,DEF,HOU,,2018-09-09T18:28:11.700Z,40,44.36,32.36,0.05,263.88,0.49,262.25,4.58,-0.49,-0.07,0.14,0.49,0.72,0.0,0.0,0.0,0.0,-0.49,-0.07,0.494975,0.141897,43.73,32.27
317432,2018090905,2062,1,,2552490.0,Benardrick McKinney,55.0,ILB,LB,away,DEF,HOU,,2018-09-09T18:28:11.700Z,40,41.88,23.53,0.1,220.61,0.95,298.75,5.21,-0.83,0.46,-0.5,0.95,0.03,0.0,0.0,0.0,0.0,-0.83,0.46,0.948947,-0.506075,40.812857,24.121429
317430,2018090905,2062,1,,2552261.0,Kevin Johnson,30.0,CB,DB,away,DEF,HOU,,2018-09-09T18:28:11.700Z,40,48.4,17.56,0.05,135.6,0.5,27.09,0.47,0.23,0.45,1.1,0.5,0.46,0.0,0.0,0.0,0.0,0.23,0.45,0.505371,1.098299,48.695714,18.138571


In [11]:
plays_df.loc[(plays_df.gameId == game_id) & (plays_df.playId == play_id)]

Unnamed: 0,gameId,playId,playDescription,quarter,down,yardsToGo,possessionTeam,playType,yardlineSide,yardlineNumber,offenseFormation,personnelO,defendersInTheBox,numberOfPassRushers,personnelD,typeDropback,preSnapVisitorScore,preSnapHomeScore,gameClock,absoluteYardlineNumber,penaltyCodes,penaltyJerseyNumbers,passResult,offensePlayResult,playResult,epa,isDefensivePI,coverage
438,2018090905,2062,(1:05) (Shotgun) T.Brady pass deep middle to R...,2,1,10,NE,play_type_pass,NE,41,SHOTGUN,"2 RB, 1 TE, 2 WR",6.0,4.0,"4 DL, 2 LB, 5 DB",TRADITIONAL,6.0,14.0,01:05:00,69.0,,,C,28,28,1.248695,False,Cover 3 Zone


In [12]:
games_df[games_df.gameId==game_id]

Unnamed: 0,gameId,gameDate,gameTimeEastern,homeTeamAbbr,visitorTeamAbbr,week
5,2018090905,09/09/2018,13:00:00,NE,HOU,1


In [13]:
%%time

def get_field_df(play_frame_group):
    frame_df = play_frame_group.loc[(play_df.nflId!=0)]

    ball_start = frame_df.loc[frame_df.position=='QB'][['x', 'y']].iloc[0].round(0)
    frame_df = frame_df.loc[frame_df.position!='QB']

    x = np.arange(ball_start[0]+0.5, 120, params.cell_length)
    y = np.append(np.arange(0+0.5, 53.3, params.cell_length), 53.3)
    xx, yy = np.meshgrid(x, y)
    ball_end = np.stack((xx, yy)).reshape(2,-1)

    field_df = pd.DataFrame({'ball_start_x': ball_start[0], 'ball_start_y': ball_start[1], 'ball_end_x': ball_end[0], 'ball_end_y': ball_end[1]})
    field_df['ball_tof'] = np.sqrt((field_df.ball_start_x-field_df.ball_end_x)**2+(field_df.ball_start_y-field_df.ball_end_y)**2)/params.avg_ball_speed

    def getPitchControl(field_row):
        frame_df['int_d_x'] = field_row['ball_end_x']-frame_df.x_r
        frame_df['int_d_y'] = field_row['ball_end_y']-frame_df.y_r
        frame_df['int_d_mag'] = np.linalg.norm(np.array([frame_df.int_d_x, frame_df.int_d_y]), axis=0)
        frame_df['int_d_theta'] = np.arctan(frame_df.int_d_y/frame_df.int_d_x).fillna(0)

        if frame_df['int_d_mag'].min() > field_row.ball_tof*params.v_max*1.2:
            field_row['cp_off'], field_row['cp_def'], field_row['cp_dead'] = 0, 0, 1
            return field_row

        frame_df['int_v0'] = (frame_df['v_x_r']*frame_df['int_d_x'] + frame_df['v_y_r']*frame_df['int_d_y'])/frame_df['int_d_mag']

        frame_df['d_lt_vm'] = (params.v_max**2-frame_df.int_v0**2)/(2*params.a_max)
        frame_df['t_lt_vm'] = (params.v_max-frame_df.int_v0)/(params.a_max)
        frame_df['d_at_vm'] = frame_df.int_d_mag-frame_df.d_lt_vm
        frame_df['t_at_vm'] = frame_df.d_at_vm/params.v_max
        frame_df['t_tot'] = frame_df.t_lt_vm+frame_df.t_at_vm
        frame_df['int_dT'] = field_row['ball_tof'] - frame_df.t_tot
        frame_df['P_int_T_j'] = (1/(1. + np.exp( -np.pi/np.sqrt(3.0)/params.tti_sigma * (frame_df.int_dT) ) )).round(2)

        # topOff = frame_df.loc[frame_df.team_pos=="OFF"].sort_values('P_int_T_j', ascending=False)['P_int_T_j'].iloc[0]
        # topDef = frame_df.loc[frame_df.team_pos=="DEF"].sort_values('P_int_T_j', ascending=False)['P_int_T_j'].iloc[0]
        # total_p_int = 
        frame_df['norm_factor'] = np.minimum(1.0, 1.0/frame_df['P_int_T_j'].sum())
        frame_df['P_int_T_j_norm'] = frame_df.P_int_T_j*frame_df.norm_factor.round(2)
        field_row['cp_dead'] = 1.0-frame_df['P_int_T_j_norm'].sum()
        field_row['cp_off'] = frame_df.loc[frame_df.team_pos=='OFF']['P_int_T_j_norm'].sum()
        field_row['cp_def'] = frame_df.loc[frame_df.team_pos=='DEF']['P_int_T_j_norm'].sum()

        return field_row

    # field_df.iloc[2278:2281].apply(getPitchControl, axis=1)
    field_df = field_df.parallel_apply(getPitchControl, axis=1)
    # frame_df[['displayName', 'teamAbbr', 't_tot', 'int_dT', 'P_int_T_j', 'norm_factor', 'P_int_T_j_norm', 'dead_ball', 'cp']].sort_values('P_int_T_j', ascending=False)
    print(f'{frame_df.iloc[0].frameId}', end=' ')
    return field_df

field_dfs = play_df.loc[play_df.frameId <= play_df.loc[play_df.event=='pass_forward'].frameId.iloc[0]].groupby(['gameId', 'playId', 'frameId']).apply(get_field_df)
field_dfs = field_dfs.reset_index(3, drop=True).reset_index()
field_dfs.head()

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 CPU times: user 5.06 s, sys: 5.91 s, total: 11 s
Wall time: 4min 13s


Unnamed: 0,gameId,playId,frameId,ball_end_x,ball_end_y,ball_start_x,ball_start_y,ball_tof,cp_dead,cp_def,cp_off
0,2018090905,2062,1,35.5,0.5,35.0,30.0,1.475212,1.0,0.0,0.0
1,2018090905,2062,1,36.5,0.5,35.0,30.0,1.476906,1.0,0.0,0.0
2,2018090905,2062,1,37.5,0.5,35.0,30.0,1.480287,1.0,0.0,0.0
3,2018090905,2062,1,38.5,0.5,35.0,30.0,1.485345,1.0,0.0,0.0
4,2018090905,2062,1,39.5,0.5,35.0,30.0,1.492062,1.0,0.0,0.0


In [15]:
animated_play = AnimatePlay(play_df, 20, field_dfs)
HTML(animated_play.ani.to_jshtml())

In [None]:
# USELESS JUNK BELOW

In [None]:
# Select the game and play that you wish to see in week 1
import random

plays = list(track_df.groupby(['gameId', 'playId'], as_index=False).first()[['gameId', 'playId']].to_records(index=False))

game_id = 2018122314
play_id = 4239

# game_id, play_id = random.choice(plays)

play_df = track_df[(track_df.playId == play_id) & (track_df.gameId == game_id)].sort_values(by = 'time')
play_df.head()

In [None]:
frame = 40
frame_df = play_df[(play_df.nflId!=0)&(play_df.frameId == 40)][['nflId', 'displayName', 'position', 'team_pos', 'x', 'y', 'v_x', 'v_y', 'v_mag', 'v_theta', 'a_x', 'a_y', 'a_mag', 'a_theta']]

In [None]:
frame_df.head()

In [None]:
a_max = 8
v_max = 9
reax_t = v_max/a_max

frame_df['v_x_r'] = frame_df.a_x*reax_t+frame_df.v_x
frame_df['v_y_r'] = frame_df.a_y*reax_t+frame_df.v_y
frame_df['v_r_mag'] = np.linalg.norm(np.array([frame_df.v_x_r, frame_df.v_y_r]), axis=0)
frame_df['v_r_theta'] = np.arctan(frame_df.v_y_r/frame_df.v_x_r).fillna(0)

frame_df['x_r'] = frame_df.x + frame_df.v_x*reax_t - 0.5*frame_df.a_x*reax_t**2
frame_df['y_r'] = frame_df.y + frame_df.v_y*reax_t - 0.5*frame_df.a_y*reax_t**2
frame_df

In [None]:
frame_df['T_b'] = 2.7
frame_df['x_b'] = 75
frame_df['y_b'] = 20

frame_df['int_d_x'] = frame_df.x_b-frame_df.x_r
frame_df['int_d_y'] = frame_df.y_b-frame_df.y_r
frame_df['int_d_mag'] = np.linalg.norm(np.array([frame_df.int_d_x, frame_df.int_d_y]), axis=0)
frame_df['int_d_theta'] = np.arctan(frame_df.int_d_y/frame_df.int_d_x).fillna(0)

frame_df

In [None]:
frame_df['int_v0'] = (frame_df['v_x_r']*frame_df['int_d_x'] + frame_df['v_y_r']*frame_df['int_d_y'])/frame_df['int_d_mag']

frame_df['d_lt_vm'] = (v_max**2-frame_df.int_v0**2)/(2*a_max)
frame_df['t_lt_vm'] = (v_max-frame_df.int_v0)/(a_max)
frame_df['d_at_vm'] = frame_df.int_d_mag-frame_df.d_lt_vm
frame_df['t_at_vm'] = frame_df.d_at_vm/v_max
frame_df['t_tot'] = frame_df.t_lt_vm+frame_df.t_at_vm
frame_df['int_dT'] = frame_df.T_b - frame_df.t_tot

frame_df.sort_values('t_tot', ascending=True)


In [None]:
tti_sigma = 0.45
frame_df['P_int_T_j'] = (1/(1. + np.exp( -np.pi/np.sqrt(3.0)/tti_sigma * (frame_df.int_dT) ) )).round(2) 

frame_df[['displayName', 'team_pos', 'P_int_T_j']].sort_values('P_int_T_j', ascending=False)

In [None]:
animated_play = AnimatePlay(play_df, 20)
HTML(animated_play.ani.to_jshtml())

In [None]:


frame = 40
frame_df = play_df[(play_df.nflId!=0)&(play_df.frameId == 40)][['nflId', 'displayName', 'position', 'team_pos', 'x', 'y', 'v_x', 'v_y', 'v_mag', 'v_theta', 'a_x', 'a_y', 'a_mag', 'a_theta']]

frame_df = frame_df.copy()
frame_df = frame_df[['nflId', 'displayName', 'position', 'team_pos', 'x', 'y', 'v_x', 'v_y', 'v_mag', 'v_theta', 'a_x', 'a_y', 'a_mag', 'a_theta']]

start_loc = frame_df.loc[frame_df.position=='QB'][['x', 'y']].iloc[0]
frame_df = frame_df.loc[frame_df.position!='QB']

target_locs = np.array([np.array([x, y]) for y in range(52, -1, -1) for x in range(int(start_loc[0]), 120) ])
target_locs = target_locs + 0.5

start_locs = np.full_like(target_locs, start_loc)
tofs = np.linalg.norm(start_locs - target_locs, axis=1)/params.avg_ball_speed

frame_df['v_x_r'] = frame_df.a_x*reax_t+frame_df.v_x
frame_df['v_y_r'] = frame_df.a_y*reax_t+frame_df.v_y
frame_df['v_r_mag'] = np.linalg.norm(np.array([frame_df.v_x_r, frame_df.v_y_r]), axis=0)
frame_df['v_r_theta'] = np.arctan(frame_df.v_y_r/frame_df.v_x_r).fillna(0)

frame_df['x_r'] = frame_df.x + frame_df.v_x*reax_t - 0.5*frame_df.a_x*reax_t**2
frame_df['y_r'] = frame_df.y + frame_df.v_y*reax_t - 0.5*frame_df.a_y*reax_t**2

frame_df

In [None]:
PCS = dict()

for i, (ball_x, ball_y) in enumerate(target_locs):
    # i, (ball_x, ball_y) = random.choice(list(enumerate(target_locs)))
    # print(i, ball_x, ball_y, tofs[i])

    df = frame_df.copy()
    df['int_d_x'] = ball_x-df.x_r
    df['int_d_y'] = ball_y-df.y_r
    df['int_d_mag'] = np.linalg.norm(np.array([df.int_d_x, df.int_d_y]), axis=0)
    df['int_d_theta'] = np.arctan(df.int_d_y/df.int_d_x).fillna(0)

    df['int_v0'] = (df['v_x_r']*df['int_d_x'] + df['v_y_r']*df['int_d_y'])/df['int_d_mag']

    df['d_lt_vm'] = (params.v_max**2-df.int_v0**2)/(2*params.a_max)
    df['t_lt_vm'] = (params.v_max-df.int_v0)/(params.a_max)
    df['d_at_vm'] = df.int_d_mag-df.d_lt_vm
    df['t_at_vm'] = df.d_at_vm/v_max
    df['t_tot'] = df.t_lt_vm+df.t_at_vm
    df['int_dT'] = tofs[i] - df.t_tot
    df['P_int_T_j'] = (1/(1. + np.exp( -np.pi/np.sqrt(3.0)/params.tti_sigma * (df.int_dT) ) )).round(2)

    topOff = df.loc[df.team_pos=="OFF"].sort_values('P_int_T_j', ascending=False)['P_int_T_j'].iloc[0]
    topDef = df.loc[df.team_pos=="DEF"].sort_values('P_int_T_j', ascending=False)['P_int_T_j'].iloc[0]

    # topOff-topDef
    PCS[(ball_x, ball_y)] = round(topOff-topDef, 2)

