In [None]:
import numpy as np
import pandas as pd
import os
import dask.dataframe as dd
import torch
from enum import Enum
from torch.autograd import Variable
from torch.nn.parameter import Parameter

from pandarallel import pandarallel
pandarallel.initialize(nb_workers=6)

from IPython.display import HTML

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)

In [None]:
class TuningParam(Enum):
    sigma = 1
    lamb = 2
TUNING = TuningParam.lamb

## Plays Dataset

In [None]:
class PlaysDataset(torch.utils.data.Dataset):
    def __init__(self, data_dir, all_weeks=False, event_filter=None):
        # event_filter should be 'pass_forward' for sigma, 'pass_arrived' for lambda
        self.event_filter = event_filter
        if all_weeks:
            all_data = []
            for week in range(5, 10):
                all_data.append(pd.read_csv(os.path.join(data_dir, 'week%d_norm.csv' % week)))
                
            tracking_df = pd.concat(all_data)
            
        else:
            # load csvs
            tracking_df = pd.read_csv(os.path.join(data_dir, 'week1_norm.csv'))

        #tracking_df = tracking_df[tracking_df['event'].isin(['pass_forward', 'pass_arrived', 
        #    'pass_outcome_caught', 'pass_outcome_incomplete', 'pass_outcome_touchdown', 'pass_outcome_intercepted'])]
        
        # get valid frames for tuning from tracking df (consider every pass, labels are 1 if there is a player close by)
        pass_forward_plays = tracking_df[tracking_df['event'] == 'pass_forward'][['gameId', 'playId']].drop_duplicates()
        pass_attempted_plays = tracking_df[tracking_df['event'] == 'pass_arrived'][['gameId', 'playId']].drop_duplicates()
        tracking_df = pass_forward_plays.merge(pass_attempted_plays.merge(tracking_df, on=['gameId', 'playId']), on=['gameId', 'playId'])
        
        # calculate ball ending position
        ball_end = tracking_df[(tracking_df.nflId == 0) & (tracking_df.event == 'pass_arrived')][['gameId', 'playId', 'x', 'y']]
        ball_end = ball_end.rename(columns={'x': 'ball_end_x', 'y': 'ball_end_y'})
        
        # calculate ball position at throw
        ball_start = tracking_df[(tracking_df.nflId == 0) & (tracking_df.event == 'pass_forward')][['gameId', 'playId', 'x', 'y']]
        ball_start = ball_start.rename(columns={'x': 'ball_start_x', 'y': 'ball_start_y'})
        
        ball_start_end = ball_end.merge(ball_start, on=['gameId', 'playId'])
        # remove plays where ball is thrown out of bounds
        ball_start_end = ball_start_end[(ball_start_end.ball_end_x <= 119.5) & (ball_start_end.ball_end_x >= 0.5) & (ball_start_end.ball_end_y <= 53.5) & (ball_start_end.ball_end_y >= -0.5)]
        
        # remove frames with more than 17 non-QB players' tracking data
        tracking_df = tracking_df[(tracking_df.position != 'QB') & (tracking_df.nflId != 0)].groupby(['gameId', 'playId']).filter(lambda x: len(x.nflId.unique()) <= 17)
        
        # merge tracking_df with ball_end and ball_start
        tracking_df = tracking_df[tracking_df.nflId != 0].merge(ball_start_end, on=['gameId', 'playId'])
        # for each player, label whether they reached the ball (radius of 1.5 yds)
        self.player_reached = tracking_df[tracking_df.event == 'pass_arrived'][['gameId', 'playId', 'nflId', 'team_pos', 'event', 'x', 'y', 'ball_end_x', 'ball_end_y', 'ball_start_x', 'ball_start_y']]
        self.player_reached['close_to_ball'] = np.less_equal(np.linalg.norm(np.stack([self.player_reached.x.values,
                    self.player_reached.y.values], axis=-1) - np.stack([self.player_reached.ball_end_x.values,
                    self.player_reached.ball_end_y.values], axis=-1), axis=1), 1.5).astype(int)
        # control is given by (player is on offense) XOR (ball is caught)
        self.player_reached['control_ball'] = ((self.player_reached['team_pos'] == 'OFF') ^ self.player_reached['event'].isin(['pass_outcome_caught', 'pass_outcome_touchdown'])).astype(int)
        
        # store tracking_df
        self.all_plays = tracking_df
        
        # turn play list into np array
        self.play_list = tracking_df[['gameId', 'playId']].drop_duplicates().values
        
        # max number of players per play
        self.max_num = 17
        
    def __len__(self):
        return len(self.play_list)
    
    def __getitem__(self, idx):
        gameId = self.play_list[idx, 0]
        playId = self.play_list[idx, 1]
        
        # load frame, sigma_label, and ball_end
        frame = self.all_plays[(self.all_plays.gameId == gameId) & (self.all_plays.playId == playId)]
        sigma_lambda_label = self.player_reached[(self.player_reached.gameId == gameId) & (self.player_reached.playId == playId)][['nflId', 'close_to_ball', 'control_ball']]
        
        try:
            ball_end = self.player_reached[(self.player_reached.gameId == gameId) & (self.player_reached.playId == playId)][['ball_end_x', 'ball_end_y']].iloc[0].values
            ball_start = self.player_reached[(self.player_reached.gameId == gameId) & (self.player_reached.playId == playId)][['ball_start_x', 'ball_start_y']].iloc[0].values
        except IndexError:
            print(self.player_reached[(self.player_reached.gameId == gameId) & (self.player_reached.playId == playId)])
            raise IndexError
        # clean up frame (remove QB, merge with sigma_lambda_label, ball_end, remove pass_arrived event)
        frame = frame.loc[frame.position != 'QB'].merge(sigma_lambda_label, on='nflId')
        frame = frame.replace('OFF', 1)
        frame = frame.replace('DEF', 0)
        try:
            frame['tof'] = pd.to_timedelta(pd.to_datetime(frame[frame.event == 'pass_arrived'].time.iloc[0]) - pd.to_datetime(frame[frame.event == 'pass_forward'].time.iloc[0])).total_seconds()
        except IndexError:
            print(frame[frame.event == 'pass_arrived'])
            print(frame[frame.event == 'pass_forward'])
            raise IndexError
        frame['ball_end_x'] = ball_end[0]
        frame['ball_end_y'] = ball_end[1]
        frame['ball_start_x'] = ball_start[0]
        frame['ball_start_y'] = ball_start[1]
        if self.event_filter is not None:
            frame = frame[frame.event == self.event_filter]

        # generate data, label, fill missing data
        
        # SS changed for lambda
        if TUNING == TuningParam.lamb:
            data = torch.tensor(frame.loc[frame.close_to_ball == 1, ['nflId', 'x', 'y', 'v_x', 'v_y', 'a_x', 'a_y', 'team_pos', 'ball_end_x', 'ball_end_y', 'ball_start_x', 'ball_start_y', 'tof']].values).float()
            label = torch.tensor(frame.loc[frame.close_to_ball == 1, 'control_ball'].values)
        elif TUNING == TuningParam.sigma:
            data = torch.tensor(frame[['nflId', 'x', 'y', 'v_x', 'v_y', 'a_x', 'a_y', 'team_pos', 'ball_end_x', 'ball_end_y', 'ball_start_x', 'ball_start_y', 'tof']].values).float()
            label = torch.tensor(frame['close_to_ball'].values)
        
        if data.size(0) < self.max_num:
            data = torch.cat([data, torch.ones([self.max_num - data.size(0), data.size(1)])], dim=0)
            label = torch.cat([label, torch.zeros([self.max_num - label.size(0)])], dim=0)
        
        return data, label.long()

## Completion Probability Model

In [None]:
class CompProbModel(torch.nn.Module):
    def __init__(self, a_max=7.0, s_max=9.0, avg_ball_speed=20.0, tti_sigma=0.5, tti_lambda_off=1.0, tti_lambda_def=1.0):
        super().__init__()
        
        # define parameters and whether or not to optimize
        self.tti_sigma = Parameter(torch.tensor([tti_sigma]), requires_grad=False).float()
        self.tti_lambda_off = Parameter(torch.tensor([tti_lambda_off]), requires_grad=True).float()
        self.tti_lambda_def = Parameter(torch.tensor([tti_lambda_def]), requires_grad=True).float()
        self.a_max = Parameter(torch.tensor([a_max]), requires_grad=False).float()
        self.s_max = Parameter(torch.tensor([s_max]), requires_grad=False).float()
        self.reax_t = Parameter(self.s_max / self.a_max, requires_grad=False).float()
        self.avg_ball_speed = Parameter(torch.tensor([avg_ball_speed]), requires_grad=False).float()
        self.g = Parameter(torch.tensor([10.72468]), requires_grad=False) #y/s/s
        self.z_max = Parameter(torch.tensor([3.]), requires_grad=False)
        self.z_min = Parameter(torch.tensor([0.]), requires_grad=False)
        
        # define field grid
        self.x = torch.linspace(0.5, 119.5, 120)
        self.y = torch.linspace(-0.5, 53.5, 55)
        self.y[0] = -0.2
        self.xx, self.yy = torch.meshgrid(self.x, self.y)
        self.field_locs = Parameter(torch.flatten(torch.stack((self.xx, self.yy), dim=-1), end_dim=-2), requires_grad=False)  # (F, 2)
        self.T = Parameter(torch.linspace(0.1, 4, 40), requires_grad=False) # (T,)
        
    
    def forward(self, frame):
        # TODO SS get ball_start
        ball_start = frame[:, :, 3:]
        reach_vecs = self.field_locs - ball_start
        reach_dist = torch.linalg.norm(reach_vecs, dim=-1)
        
        v_x_r = frame[:, :, 5] * self.reax_t + frame[:, :, 3]
        v_y_r = frame[:, :, 6] * self.reax_t + frame[:, :, 4]
        v_r_mag = torch.norm(torch.stack([v_x_r, v_y_r], dim=-1), dim=-1)
        v_r_theta = torch.atan2(v_y_r, v_x_r)
        
        x_r = frame[:, :, 1] + frame[:, :, 3] * self.reax_t + 0.5 * frame[:, :, 5] * self.reax_t**2
        y_r = frame[:, :, 2] + frame[:, :, 4] * self.reax_t + 0.5 * frame[:, :, 6] * self.reax_t**2
        
        # get each player's team, location, and velocity
        player_teams = frame[:, :, 7] # J,
        reaction_player_locs = torch.stack([x_r, y_r], dim=-1).int() # (J, 2)
        reaction_player_vels = torch.stack([v_x_r, v_y_r], dim=-1) #(J, 2)
        
        # calculate each player's distance from each field location
        int_d_vec = self.field_locs.unsqueeze(1).unsqueeze(0) - reaction_player_locs.unsqueeze(1) #F, J, 2
        int_d_mag = torch.norm(int_d_vec, dim=-1) # F, J
        
        # take dot product of velocity and direction
        int_s0 = torch.clip(torch.sum(int_d_vec * reaction_player_vels.unsqueeze(1), dim=-1) / int_d_mag, -1 * self.s_max.item(), self.s_max.item()) #F, J
        #int_s0 = torch.sum(int_d_vec * reaction_player_vels.unsqueeze(1), dim=-1) / int_d_mag
        
        # calculate time it takes for each player to reach each field position accounting for their current velocity and acceleration
        t_lt_smax = (self.s_max - int_s0) / self.a_max  #F, J,
        d_lt_smax = t_lt_smax * ((int_s0 + self.s_max) / 2) #F, J,
        d_at_smax = int_d_mag - d_lt_smax               #F, J,
        t_at_smax = d_at_smax / self.s_max              #F, J,
        t_tot = self.reax_t + t_lt_smax + t_at_smax     # F, J,

        # subtract the arrival time (t_tot) from time of flight of ball
        int_dT = self.T.view(1, 1, -1, 1) - t_tot.unsqueeze(2)         #F, T, J
        #int_dT.register_hook(lambda x: print(x))
        
        # calculate interception probability for each player, field loc, time of flight (logistic function)
        #p_int.register_hook(lambda x: print('before calculation', x))
        #self.tti_sigma.register_hook(lambda x: print('tti_sigma before p_int', x.shape, x.mean()))
        p_int = torch.sigmoid((3.14 / (1.732 * self.tti_sigma)) * int_dT) #F, T, J
        #p_int.register_hook(lambda x: print('before tof ind', x.shape, (x != 0).sum(), x.sum()))
        #self.tti_sigma.register_hook(lambda x: print('tti_sigma before tof', x))
        if TUNING == TuningParam.sigma:
            # get p_int for actual tof
            tof = torch.round(frame[:, 0, -1] * 10).long().view(-1, 1, 1, 1).repeat(1, p_int.size(1), 1, p_int.size(-1))
            p_int = torch.gather(p_int, 2, tof).squeeze() # F, J
            #self.tti_sigma.register_hook(lambda x: print('tti_sigma before ball_field_ind', x))
            #p_int.register_hook(lambda x: print('before ball_field_ind', x.shape, (x != 0).sum(), x.sum()))
            # index into ball position
            ball_end_x = frame[:, 0, -3].int()
            ball_end_y = frame[:, 0, -2].int()
            ball_end_np = np.array([ball_end_y.cpu().numpy()[0], ball_end_x.cpu().numpy()[0]]).astype(int)
            ball_field_ind = (ball_end_y * self.x.shape[0] + ball_end_x).long().view(-1, 1, 1).repeat(1, 1, p_int.size(-1))
            p_int = torch.gather(p_int, 1, ball_field_ind).squeeze()
        #self.tti_sigma.register_hook(lambda x: print('tti_sigma loss grad', x))
        #p_int.register_hook(lambda x: print('loss grad', x.shape, (x != 0).sum(), x.sum()))
        
        dx = reach_vecs[:, 0] #F
        dy = reach_vecs[:, 1] #F
        vx = dx[:, None]/T[None, :]   #F, T
        vy = dy[:, None]/T[None, :]   #F, T
        vz_0 = (self.T * self.g)/2    #T
        
        # note that idx (i, j, k) into below arrays is invalid when j < k
        traj_ts = torch.tile(T, (len(field_locs), len(T), 1)) #(F, T, T)
        traj_locs_x_idx = torch.round(torch.clip((ball_start[0]+vx.unsqueeze(-1)*T), 0, len(x)-1)).int() # F, T, T
        traj_locs_y_idx = torch.round(torch.clip((ball_start[1]+vy.unsqueeze(-1)*T), 0, len(y)-1)).int() # F, T, T
        traj_locs_z = 2.0+vz_0.view(1, -1, 1)*traj_ts-0.5*g*traj_ts*traj_ts #F, T, T
        lambda_z = torch.where((traj_locs_z<self.z_max) & (traj_locs_z>self.z_min), 1, 0) #F, T, T
        
        path_idxs = (traj_locs_y_idx * self.x.shape[0] + traj_locs_x_idx).flatten()  # (F*T*T,)
        # 10*traj_ts - 1 converts the times into indices - hacky
        traj_t_idxs = torch.round(10*traj_ts - 1).flatten().int()  # (F*T*T,)
        p_int_traj = p_int[path_idxs, traj_t_idxs]  # (F*T*T, J)
        p_int_traj = p_int_traj.reshape((*traj_locs_x_idx.shape, len(reaction_player_locs)))
        
        path_idxs = np.ravel_multi_index(np.stack((traj_locs_y_idx, traj_locs_x_idx)).reshape(2, -1), xx.shape)  # (F*T*T,)
        traj_t_idxs = np.rint(10*traj_ts - 1).flatten().astype(int)  # (F, T, T)
        p_int_traj = p_int[path_idxs, traj_t_idxs]\
                        .reshape((*traj_locs_x_idx.shape, len(reaction_player_locs))) *\
                        lambda_z.unsqueeze(-1) # F, T, T, J
        norm_factor = torch.maximum(1., p_int_traj.sum(dim=-1))  #F, T, T
        p_int_traj_norm = (p_int_traj / norm_factor.unsqueeze(-1))  #F, T, T, J
        
        # independent int probs at each point on trajectory
        all_p_int_traj = torch.sum(p_int_traj_norm, dim=-1)  # F, T, T
        off_p_int_traj = torch.sum((player_teams == 1)[None,None,None] * p_int_traj_norm, dim=-1)
        def_p_int_traj = torch.sum((player_teams == 0)[None,None,None] * p_int_traj_norm, dim=-1)
        ind_p_int_traj = p_int_traj_norm #use for analyzing specific players
        
        # calc decaying residual probs after you take away p_int on earlier times in the traj 
        compl_all_p_int_traj = 1-all_p_int_traj  # F, T, T
        remaining_compl_p_int_traj = torch.cumprod(compl_all_p_int_traj, dim=-1)  # F, T, T
        # maximum 0 because if it goes negative the pass has been caught by then and theres no residual probability
        shift_compl_cumsum = torch.roll(remaining_compl_p_int_traj, 1, dims=-1)  # F, T, T
        shift_compl_cumsum[:, :, 0] = 1
        
        # multiply residual prob by p_int at that location and lambda
        lambda_all = self.tti_lambda_off * player_teams + self.tti_lambda_def * (1 - player_teams)
        off_completion_prob_dt = shift_compl_cumsum * off_p_int_traj * self.tti_lambda_off  # F, T, T
        def_completion_prob_dt = shift_compl_cumsum * def_p_int_traj * self.tti_lambda_def  # F, T, T
        all_completion_prob_dt = off_completion_prob_dt + def_completion_prob_dt
        ind_completion_prob_dt = shift_compl_cumsum[:, :, :, None] * ind_p_int_traj * lambda_all[None,None,None]  # F, T, T, J
        
        # now accumulate values over total traj for each team and take at T=t
        all_completion_prob = torch.cumsum(all_completion_prob_dt, dim=-1)  # F, T, T
        off_completion_prob = torch.cumsum(off_completion_prob_dt, dim=-1)  # F, T, T
        def_completion_prob = torch.cumsum(def_completion_prob_dt, dim=-1)  # F, T, T
        ind_completion_prob = torch.cumsum(ind_completion_prob_dt, dim=-2)  # F, T, T, J
        
        # this einsum takes the diagonal values over the last two axes where T = t
        # this takes care of the t > T issue.
        all_p_int_pass = torch.einsum('ijj->ij', all_completion_prob)  # F, T
        off_p_int_pass = torch.einsum('ijj->ij', off_completion_prob)  # F, T
        def_p_int_pass = torch.einsum('ijj->ij', def_completion_prob)  # F, T
        ind_p_int_pass = torch.einsum('ijjk->ijk', ind_completion_prob)  # F, T, J
        no_p_int_pass = 1-all_p_int_pass #F, T

        assert torch.allclose(all_p_int_pass, off_p_int_pass + def_p_int_pass, atol=0.01)
        assert torch.allclose(all_p_int_pass, ind_p_int_pass.sum(-1), atol=0.01)
        return off_p_int_pass, def_p_int_pass, ind_p_int_pass

## Initialize Dataset, Model and Run Training Loop

In [None]:
if TUNING == TuningParam.sigma:
    event_filter = 'pass_forward'
elif TUNING == TuningParam.lamb:
    event_filter = 'pass_arrived'
ds = PlaysDataset(data_dir = '~/Downloads/nfl-big-data-bowl-2021/', all_weeks=False, event_filter=event_filter)
loader = torch.utils.data.DataLoader(ds, batch_size=8, num_workers=4, shuffle=True)

In [None]:
data, label = ds[8]

In [None]:
%%time
from tqdm.notebook import tqdm
model = CompProbModel(tti_sigma=0.8)
loss_fn = torch.nn.BCELoss()

# check if we want cuda
if torch.cuda.is_available():
    model = model.cuda()
    loss_fn = loss_fn.cuda()

optimizer = torch.optim.Adam(model.parameters())
total_loss = 0

for epoch in range(1, 20):
    prog_bar = loader#tqdm(loader)
    total_loss = 0
    for ind, (data, target) in enumerate(prog_bar):
        if torch.cuda.is_available():
            data = data.cuda()
            target = target.cuda()
        
        output = model(data)
        loss = loss_fn(output, target.float())
        total_loss = total_loss + loss.detach().cpu().item()

        # step gradient
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        prog_bar.set_description("Batch %d Loss %.3f" % (epoch, total_loss / (ind + 1)))
        ind += 1

In [None]:
model.tti_lambda_off
model.tti_lambda_def

In [None]:
model.tti_sigma

### Sandbox

In [None]:
# load files
data_dir = '../data/'
tracking_df = pd.read_csv('../data/week1_norm.csv')
plays_df = pd.read_csv('../data/plays.csv')

#print(tracking_df.columns)

# get valid frames for tuning from tracking df
tracking_df = tracking_df[tracking_df['event'].isin(['pass_forward', 'pass_arrived', 
    'pass_outcome_caught', 'pass_outcome_incomplete', 'pass_outcome_touchdown', 'pass_outcome_intercepted'])]
tracking_df['valid_frame'] = tracking_df['event'].str.contains('pass_forward')
tracking_df = tracking_df.groupby(['playId', 'gameId']).filter(lambda l: l['valid_frame'].any()).reset_index()

# merge tracking df and plays df
all_plays = plays_df.merge(tracking_df, how='left', on=['playId', 'gameId'])

In [None]:
play = tracking_df[(tracking_df.playId == 81) & (tracking_df.gameId == 2018090902)]
print(pd.to_timedelta(pd.to_datetime(play[play.event == 'pass_arrived'].time.iloc[0]) - pd.to_datetime(play[play.event == 'pass_forward'].time.iloc[0])).total_seconds())

In [None]:
tracking_df[tracking_df.position != 'QB'][['gameId', 'playId', 'nflId']].drop_duplicates().groupby(['gameId', 'playId']).nflId.count().max()

In [None]:
min_dist_off = min_dist_off.rename(columns={0:'ball_dist'})
complete_passes = all_plays.loc[all_plays.event.isin(['pass_outcome_caught', 'pass_outcome_touchdown'])][['gameId', 'playId']]
incomplete_passes = all_plays.loc[all_plays.event.isin(['pass_outcome_incomplete'])][['gameId', 'playId']]

print(complete_passes.merge(min_dist_off).drop_duplicates().ball_dist.median())
print(incomplete_passes.merge(min_dist_off).drop_duplicates().ball_dist.mean())

min_dist_def = min_dist_def.rename(columns={0:'ball_dist'})
complete_passes = all_plays.loc[all_plays.event.isin(['pass_outcome_caught', 'pass_outcome_touchdown'])][['gameId', 'playId']]
incomplete_passes = all_plays.loc[all_plays.event.isin(['pass_outcome_incomplete'])][['gameId', 'playId']]

print(complete_passes.merge(min_dist_def).drop_duplicates().ball_dist.mean())
print(incomplete_passes.merge(min_dist_def).drop_duplicates().ball_dist.mean())

In [None]:
all_plays.loc[(all_plays.gameId == 2018090600) & (all_plays.playId == 146)][['displayName', 'x', 'y']]
import matplotlib.pyplot as plt
%matplotlib inline

from visualize2 import AnimatePlay
np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning)
play_df = tracking_df[(tracking_df.gameId == 2018091000) & (tracking_df.playId == 3016)]

animated_play = AnimatePlay(play_df, 20)#play_df[play_df.frameId <= 46], 20)
HTML(animated_play.ani.to_jshtml())

In [None]:
print(get_nearest_player(play_df))

In [None]:
# helper functions to get nearest offensive and defensive players in grouped df
def get_nearest_player(df):
    df = df[df.event.isin(['pass_arrived'])]
    if len(df) == 0:
        return np.nan
    
    ball_end = df[df.nflId == 0][['x', 'y']].head(1)
    assert len(ball_end) == 1, print(len(ball_end))
    
    players_end = df[(df.nflId != 0) & (df.team_pos == 'OFF')][['x', 'y']]
    min_dist = np.linalg.norm(players_end.values - ball_end.values, axis=1).min()
    
    return min_dist

def get_nearest_player_off(df):
    df = df[df.event.isin(['pass_arrived'])]
    if len(df) == 0:
        return np.nan
    
    ball_end = df[df.nflId == 0][['x', 'y']].head(1)
    assert len(ball_end) == 1, print(len(ball_end))
    
    players_end = df[(df.nflId != 0) & (df.team_pos == 'OFF')][['x', 'y']]
    min_dist = np.linalg.norm(players_end.values - ball_end.values, axis=1).min()
    
    return min_dist

def get_nearest_player_def(df):
    df = df[df.event.isin(['pass_arrived'])]
    if len(df) == 0:
        return np.nan
    
    ball_end = df[df.nflId == 0][['x', 'y']].head(1)
    assert len(ball_end) == 1, print(len(ball_end))
    
    players_end = df[(df.nflId != 0) & (df.team_pos == 'DEF')][['x', 'y']]
    min_dist = np.linalg.norm(players_end.values - ball_end.values, axis=1).min()
    
    return min_dist

# get bal
min_dist_off = all_plays.groupby(['playId', 'gameId']).apply(get_nearest_player_off).reset_index()
min_dist_def = all_plays.groupby(['playId', 'gameId']).apply(get_nearest_player_def).reset_index()