In [33]:
import numpy as np
import pandas as pd
from IPython.display import HTML
from pandarallel import pandarallel
pandarallel.initialize()

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)

INFO: Pandarallel will run on 8 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.


In [34]:
path_shared = '~/Downloads/nfl-big-data-bowl-2021/{}'

games_df = pd.read_csv(path_shared.format('games.csv'))
plays_df = pd.read_csv(path_shared.format('plays.csv'))
players_df = pd.read_csv(path_shared.format('players.csv'))
track_df = pd.read_csv(path_shared.format('week1_norm.csv'))

In [35]:
params = lambda: None # create an empty object to add params
params.a_max = 7
params.s_max = 9
params.reax_t = params.s_max/params.a_max
params.avg_ball_speed = 20
params.tti_sigma = 0.45
params.cell_length = 1
vars(params)


{'a_max': 7,
 's_max': 9,
 'reax_t': 1.2857142857142858,
 'avg_ball_speed': 20,
 'tti_sigma': 0.45,
 'cell_length': 1}

In [36]:
# game_id = 2018122314
# play_id = 4239
game_id = 2018090905
play_id = 2062
# game_id, play_id = random.choice(plays)

play_df = track_df[(track_df.playId == play_id) & (track_df.gameId == game_id)].sort_values(by = 'frameId')
play_df.head()

Unnamed: 0,gameId,playId,frameId,event,nflId,displayName,jerseyNumber,position,team,team_pos,teamAbbr,route,time,los,x,y,dis,o,s,s_dir,s_dir_rad,v_x,v_y,v_theta,v_mag,a_old,a_x,a_y,a_theta,a_mag
317421,2018090905,2062,1,,496735.0,Kareem Jackson,25.0,CB,away,DEF,HOU,,2018-09-09T18:28:11.700Z,40,54.51,31.08,0.08,260.44,0.83,108.4,1.89,0.79,-0.26,-0.32,0.83,0.38,0.0,0.0,0.0,0.0
317434,2018090905,2062,1,,0.0,Football,,,football,FTBL,FTBL,,2018-09-09T18:28:11.700Z,40,40.18,29.52,0.0,,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
317433,2018090905,2062,1,,2558094.0,Zach Cunningham,41.0,ILB,away,DEF,HOU,,2018-09-09T18:28:11.700Z,40,44.36,32.36,0.05,263.88,0.49,262.25,4.58,-0.49,-0.07,0.14,0.49,0.72,0.0,0.0,0.0,0.0
317432,2018090905,2062,1,,2552490.0,Benardrick McKinney,55.0,ILB,away,DEF,HOU,,2018-09-09T18:28:11.700Z,40,41.88,23.53,0.1,220.61,0.95,298.75,5.21,-0.83,0.46,-0.5,0.95,0.03,0.0,0.0,0.0,0.0
317430,2018090905,2062,1,,2552261.0,Kevin Johnson,30.0,CB,away,DEF,HOU,,2018-09-09T18:28:11.700Z,40,48.4,17.56,0.05,135.6,0.5,27.09,0.47,0.23,0.45,1.1,0.5,0.46,0.0,0.0,0.0,0.0


In [1]:
import xgboost as xgb
import joblib
bst = joblib.load("xyac_model.model")
xgb.plot_importance(bst)
scores = bst.get_score(importance_type='gain')
print(scores.keys())
cols_when_model_builds = bst.feature_names

dict_keys(['4-closest-defender-x', '1-closest-defender-speed', '2-closest-defender-speed', '4-closest-defender-speed', '1-closest-defender-distance', '3-closest-defender-x', 'y', '2-closest-defender-distance', '3-closest-defender-y', '5-closest-defender-x', '4-closest-defender-y', '1-closest-defender-y', '2-closest-defender-y', '5-closest-defender-distance', '5-closest-defender-speed', '1-closest-defender-x', '4-closest-defender-distance', '2-closest-defender-x', '5-closest-defender-y', '3-closest-defender-distance', '3-closest-defender-speed'])


In [69]:
%%time
import time
from scipy.spatial import distance
from ipdb import launch_ipdb_on_exception 

def get_field_df(play_frame_group):
    frame_df = play_frame_group.loc[(play_df.nflId!=0)]
    ball_start = frame_df.loc[frame_df.position=='QB', ['x', 'y']].iloc[0].round(0)
    frame_df = frame_df.loc[frame_df.position!='QB']
    #print(frame_df.columns)
    current_frame = min(frame_df.frameId)
    pocket_width = 10

    x = np.linspace(0.5, 119.5, 120)
    y = np.linspace(-0.5, 53.5, 55)
    y[0] = -0.2
    field_locs = np.stack(np.meshgrid(x, y)).reshape(2, -1).T  # (F, 2)
    
    T = np.linspace(0.1, 4, 40)
    receivers_df = frame_df.loc[frame_df.team_pos == 'DEF',['x', 'y', 'v_x', 'v_y', 'v_theta', 'v_mag', 'los', 'a_x', 'a_y']]
    dist_from_ball_np = np.linalg.norm((receivers_df.x - ball_start[0],
                                                     receivers_df.y - ball_start[1]), axis=0)
    # find the spot the qb would aim at, leading the receiver in their current dir by the ball time
    rec_x_np = receivers_df.x.to_numpy()[:,None]
    rec_y_np = receivers_df.y.to_numpy()[:,None]
    rec_v_x_np = receivers_df.v_x.to_numpy()[:,None]
    rec_v_y_np = receivers_df.v_y.to_numpy()[:,None]
    rec_a_x_np = receivers_df.a_x.to_numpy()[:,None]
    rec_a_y_np = receivers_df.a_y.to_numpy()[:,None]
    rec_v_theta_np = receivers_df.v_theta.to_numpy()[:,None]
    rec_v_x_r = rec_a_x_np*params.reax_t+rec_v_x_np
    rec_v_y_r = rec_a_y_np*params.reax_t+rec_v_y_np
    rec_v_r_mag = np.linalg.norm(np.array([rec_v_x_r, rec_v_y_r]), axis=0)
    rec_v_r_theta = np.arctan(rec_v_y_r/rec_v_x_r)
    
    #target_x = rec_x_np+rec_v_x_np*t+0.5*rec_a_x_np*t**2  # (R, T)
    #target_y = rec_y_np+rec_v_y_np*t+0.5*rec_a_y_np*t**2  # (R, T)
    
    x_r = rec_x_np + rec_v_x_np*params.reax_t - 0.5*rec_a_x_np*params.reax_t**2 #(R, 1)
    y_r = rec_y_np + rec_v_y_np*params.reax_t - 0.5*rec_a_y_np*params.reax_t**2 #(R, 1)
    
    reaction_player_locs = np.hstack((x_r, y_r)) # (R,2)
    reaction_player_vels = np.hstack((rec_v_x_r, rec_v_y_r)) # (R,2)
    
    int_d_vec = field_locs[:, None, :] - reaction_player_locs #(F, R, 2)
    int_d_mag = np.linalg.norm(int_d_vec, axis=2) # F, R
    int_theta = np.arctan(int_d_vec[:,:,1]/int_d_vec[:,:,0]) #this could be a problem
    
    
    int_s0 = np.clip(np.sum(int_d_vec*reaction_player_vels, axis=2)/int_d_mag, -params.s_max, params.s_max) #F, R,  #taking norm between vectors int_d and player velocity
        
    t_lt_smax = (params.s_max-int_s0)/params.a_max  #F, J,
    d_lt_smax = t_lt_smax*((int_s0+params.s_max)/2) #F, J,
    d_at_smax = int_d_mag - d_lt_smax               #F, J,
    t_at_smax = d_at_smax/params.s_max              #F, J,
    t_tot = t_lt_smax+t_at_smax                     #F, J,

    a1 =  np.broadcast_to(t_lt_smax[:,:,None],(*t_lt_smax.shape, len(T))) # F, J, T THIS IS TIME SPENT LT SMAX
    a2 =  np.broadcast_to(T[None,None,:],(*t_lt_smax.shape, len(T))) # F, J, T
    
    time_lt_smax = np.where(a1 > a2, a2, a1) # F, J, T  THIS IS TIME LESS THAN MAX
    time_at_smax = a2 - time_lt_smax # F, J, T THIS IS TIME MORE THAN MAX
    
    
    d = time_at_smax * params.s_max + int_s0[:,:,None]*time_lt_smax + 0.5*params.a_max*np.square(time_lt_smax) # F, J, T
    ## d = Time at max speed * max speed + int_s0* time at lt_smax + 1/2 params.a_max (time at lt_smax * time at lt_smax) 
    #d = 
    v_proj = int_s0[:,:,None] + params.a_max*np.square(time_lt_smax) # F, J, T
    v_proj = np.where(v_proj > params.s_max, params.s_max, v_proj) # F, J, T
    x_proj = x_r + d* np.cos(int_theta[:,:,None]) # F, J, T
    y_proj = y_r + d* np.sin(int_theta[:,:,None]) # F, J, T
    
    
    projected_locations = np.stack((x_proj, y_proj), axis =3)  # F, J, T, 2
    
    distances_to_ball = projected_locations - field_locs[:,None,None,:] # F, J, T, 2
    distance_mags = np.linalg.norm(distances_to_ball, axis = 3) # F, J, T
    
    sorted_indices = np.argsort(distance_mags, axis = 1) # F, J, T

    distance_mags = np.take_along_axis(distance_mags,sorted_indices, axis = 1)
    x_proj_sorted = np.take_along_axis(x_proj,sorted_indices, axis = 1) # F, J, T
    y_proj_sorted = np.take_along_axis(y_proj,sorted_indices, axis = 1) # F, J, T
    v_proj_sorted = np.take_along_axis(v_proj,sorted_indices, axis = 1) # F, J, T
    
    just_top_5_distances = distance_mags[:,0:5,:].transpose((0,2,1)) #F, T, 5
    just_top_5_x_proj = x_proj_sorted[:,0:5,:].transpose((0,2,1)) #F, T, 5
    just_top_5_y_proj = y_proj_sorted[:,0:5,:].transpose((0,2,1))  #F, T, 5
    just_top_5_v_proj = v_proj_sorted[:,0:5,:].transpose((0,2,1))  #F, T, 5
    
    
    just_top_5_distances = np.reshape(just_top_5_distances, (just_top_5_distances.shape[0]*just_top_5_distances.shape[1],just_top_5_distances.shape[2]))
    just_top_5_x_proj  = np.reshape(just_top_5_x_proj, just_top_5_distances.shape)
    just_top_5_y_proj  = np.reshape(just_top_5_y_proj, just_top_5_distances.shape)
    just_top_5_v_proj  = np.reshape(just_top_5_v_proj, just_top_5_distances.shape)

    
    endpoints = np.repeat(field_locs, repeats = len(T), axis = 0) # FxT, 2
    times = np.repeat(T[None, :], repeats = len(field_locs), axis = 0)
    times_shaped = times.reshape((times.shape[0]*times.shape[1]))# FxT, 1

    field_df = pd.DataFrame({
        'pass_endpoint_x': endpoints[:,0],
        'pass_endpoint_y': endpoints[:,1],
        'frame_thrown' : current_frame,
        'time_of_flight' : times_shaped,
        '1-closest-defender-distance' : just_top_5_distances[:,0],
        '2-closest-defender-distance' : just_top_5_distances[:,1],
        '3-closest-defender-distance' : just_top_5_distances[:,2],
        '4-closest-defender-distance' : just_top_5_distances[:,3],
        '5-closest-defender-distance' : just_top_5_distances[:,4],
        '1-closest-defender-x' : just_top_5_x_proj[:,0],
        '2-closest-defender-x' : just_top_5_x_proj[:,1],
        '3-closest-defender-x' : just_top_5_x_proj[:,2],
        '4-closest-defender-x' : just_top_5_x_proj[:,3],
        '5-closest-defender-x' : just_top_5_x_proj[:,4],
        '1-closest-defender-y' : just_top_5_y_proj[:,0], 
        '2-closest-defender-y': just_top_5_y_proj[:,1], 
        '3-closest-defender-y': just_top_5_y_proj[:,2], 
        '4-closest-defender-y': just_top_5_y_proj[:,3], 
        '5-closest-defender-y': just_top_5_y_proj[:,4], 
        '1-closest-defender-speed' : just_top_5_v_proj[:,0],
        '2-closest-defender-speed': just_top_5_v_proj[:,1],
        '3-closest-defender-speed': just_top_5_v_proj[:,2],
        '4-closest-defender-speed': just_top_5_v_proj[:,3],
        '5-closest-defender-speed': just_top_5_v_proj[:,4], 
        "y" : endpoints[:,1]
        
     })
    
    dtest = xgb.DMatrix(field_df[cols_when_model_builds])
    ypred = bst.predict(dtest)
    
    return field_df

field_dfs = play_df.loc[play_df.frameId <= play_df.loc[play_df.event=='pass_forward'].frameId.iloc[0]].groupby(['gameId', 'playId', 'frameId']).parallel_apply(get_field_df)
field_dfs = field_dfs.reset_index(3, drop=True).reset_index()
field_dfs

CPU times: user 11.5 s, sys: 47.5 s, total: 59 s
Wall time: 1min 34s


In [57]:

display(field_dfs.reset_index())

Unnamed: 0,index,gameId,playId,frameId,pass_endpoint_x,pass_endpoint_y,frame_thrown,time_of_flight,1-closest-defender-distance,2-closest-defender-distance,3-closest-defender-distance,4-closest-defender-distance,5-closest-defender-distance,1-closest-defender-x,2-closest-defender-x,3-closest-defender-x,4-closest-defender-x,5-closest-defender-x,1-closest-defender-y,2-closest-defender-y,3-closest-defender-y,4-closest-defender-y,5-closest-defender-y,1-closest-defender-speed,2-closest-defender-speed,3-closest-defender-speed,4-closest-defender-speed
0,0,2018090905,2062,1,0.5,-0.2,1,0.1,45.935152,47.163706,51.564254,54.144397,54.344799,41.700989,40.883330,48.693378,43.792674,49.415861,20.110508,24.163946,18.137682,32.317074,23.476902,0.829998,-0.304997,0.503833,0.435835
1,1,2018090905,2062,1,0.5,-0.2,1,0.2,46.116152,47.316011,51.631754,54.292780,54.486382,41.863334,41.013739,48.756466,43.911318,49.543301,20.190538,24.242624,18.161687,32.406187,23.538587,1.039998,-0.094997,0.713833,0.645835
2,2,2018090905,2062,1,0.5,-0.2,1,0.3,46.367152,47.538316,51.769255,54.511163,54.697966,42.088466,41.204085,48.884977,44.085932,49.733747,20.301519,24.357463,18.210586,32.537340,23.630769,1.389998,0.255003,1.063833,0.995835
3,3,2018090905,2062,1,0.5,-0.2,1,0.4,46.688152,47.830621,51.976755,54.799546,54.979549,42.376383,41.454367,49.078913,44.316517,49.987201,20.443451,24.508462,18.284379,32.710532,23.753449,1.879998,0.745003,1.553833,1.485835
4,4,2018090905,2062,1,0.5,-0.2,1,0.5,47.079151,48.192926,52.254255,55.157930,55.331132,42.727085,41.764586,49.338272,44.603072,50.303662,20.616334,24.695623,18.383066,32.925764,23.906627,2.509998,1.375003,2.183833,2.115835
5,5,2018090905,2062,1,0.5,-0.2,1,0.6,47.540151,48.625231,52.601755,55.586313,55.752716,43.140574,42.134742,49.663056,44.945598,50.683130,20.820168,24.918944,18.506647,33.183034,24.090302,3.279998,2.145003,2.953833,2.885835
6,6,2018090905,2062,1,0.5,-0.2,1,0.7,48.071151,49.127536,53.019256,56.084696,56.244299,43.616848,42.564834,50.053263,45.344095,51.125605,21.054953,25.178426,18.655122,33.482344,24.304475,4.189998,3.055003,3.863833,3.795835
7,7,2018090905,2062,1,0.5,-0.2,1,0.8,48.672151,49.699840,53.506756,56.653079,56.805883,44.155907,43.054863,50.508894,45.798561,51.631088,21.320689,25.474069,18.828490,33.823694,24.549145,5.239998,4.105003,4.913833,4.845835
8,8,2018090905,2062,1,0.5,-0.2,1,0.9,49.343151,50.342145,54.064256,57.291463,57.437466,44.757753,43.604828,51.029949,46.308999,52.199577,21.617375,25.805872,19.026753,34.207083,24.824313,6.429998,5.295003,6.103833,6.035835
9,9,2018090905,2062,1,0.5,-0.2,1,1.0,50.084151,51.054450,54.691757,57.999846,58.139050,45.422384,44.214730,51.616429,46.875406,52.831074,21.945013,26.173836,19.249909,34.632511,25.129978,7.759998,6.625003,7.433833,7.365835


In [None]:
a = ypred

In [None]:
print("lol")