Using fastest pre-snap speed to determine which way a play is going.

In [31]:
import pandas as pd
import numpy as np

In [32]:
plays = pd.read_csv('../data/tracking_week_1.csv')

# Grab only items that are before and during the snap
plays = plays[plays['frameType'].isin(['BEFORE_SNAP', 'BALL_SNAP'])]

# drop rows that are about the football
plays = plays[plays['displayName'] != 'football']

In [33]:
# Find the player with the maximum 's' within each 'gameId' and 'playId'
max_s_players = plays.loc[plays.groupby(['gameId', 'playId'])['s'].idxmax()]
# Get the row before the maximum 's' within each 'gameId' and 'playId'
previous_rows = plays.loc[plays.index.isin(max_s_players.index - 1)]
# Combine the rows
max_s_players = pd.concat([previous_rows, max_s_players]).sort_index()

max_s_players.head()

Unnamed: 0,gameId,playId,nflId,displayName,frameId,frameType,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event
2393,2022091200,64,47803.0,Noah Fant,112,BEFORE_SNAP,2022-09-13 00:16:14.6,87.0,SEA,right,37.09,26.55,4.35,4.05,0.41,169.24,191.29,
2394,2022091200,64,47803.0,Noah Fant,113,BEFORE_SNAP,2022-09-13 00:16:14.7,87.0,SEA,right,37.01,26.1,4.81,3.67,0.46,171.1,189.58,
6019,2022091200,85,46096.0,Rashaad Penny,115,BEFORE_SNAP,2022-09-13 00:16:51.9,20.0,SEA,right,33.66,30.36,6.27,0.61,0.63,348.86,0.05,
6020,2022091200,85,46096.0,Rashaad Penny,116,BEFORE_SNAP,2022-09-13 00:16:52,20.0,SEA,right,33.66,30.99,6.28,0.7,0.63,352.45,359.69,
8368,2022091200,109,42393.0,Ronald Darby,43,BEFORE_SNAP,2022-09-13 00:17:19.8,23.0,DEN,right,53.75,30.08,4.91,0.22,0.49,2.82,356.34,


In [34]:
max_s_players['orientation_sin'] = np.sin(np.radians(max_s_players['o']))
max_s_players['orientation_cos'] = np.cos(np.radians(max_s_players['o']))

In [35]:
max_s_players['forward_velocity'] = max_s_players['s'] * max_s_players['orientation_cos']
max_s_players['sideways_velocity'] = max_s_players['s'] * max_s_players['orientation_sin']

In [36]:
max_s_players.head()

Unnamed: 0,gameId,playId,nflId,displayName,frameId,frameType,time,jerseyNumber,club,playDirection,...,s,a,dis,o,dir,event,orientation_sin,orientation_cos,forward_velocity,sideways_velocity
2393,2022091200,64,47803.0,Noah Fant,112,BEFORE_SNAP,2022-09-13 00:16:14.6,87.0,SEA,right,...,4.35,4.05,0.41,169.24,191.29,,0.186696,-0.982418,-4.273518,0.812125
2394,2022091200,64,47803.0,Noah Fant,113,BEFORE_SNAP,2022-09-13 00:16:14.7,87.0,SEA,right,...,4.81,3.67,0.46,171.1,189.58,,0.15471,-0.98796,-4.752087,0.744157
6019,2022091200,85,46096.0,Rashaad Penny,115,BEFORE_SNAP,2022-09-13 00:16:51.9,20.0,SEA,right,...,6.27,0.61,0.63,348.86,0.05,,-0.193207,0.981158,6.151861,-1.211408
6020,2022091200,85,46096.0,Rashaad Penny,116,BEFORE_SNAP,2022-09-13 00:16:52,20.0,SEA,right,...,6.28,0.7,0.63,352.45,359.69,,-0.131391,0.991331,6.225556,-0.825138
8368,2022091200,109,42393.0,Ronald Darby,43,BEFORE_SNAP,2022-09-13 00:17:19.8,23.0,DEN,right,...,4.91,0.22,0.49,2.82,356.34,,0.049198,0.998789,4.904054,0.241564


In [37]:
def determine_direction(df):
    # Ensure the dataframe is sorted by gameId, playId, and frameId
    df = df.sort_values(by=['gameId', 'playId', 'frameId'])
    
    # Calculate the difference in y values between consecutive frames
    df['y_diff'] = df.groupby(['gameId', 'playId'])['y'].diff()
    
    # Define the conditions for the new column
    conditions = [
        (df['o'] > 180) & (df['y_diff'] > 0),
        (df['o'] > 180) & (df['y_diff'] < 0),
        (df['o'] < 180) & (df['y_diff'] > 0),
        (df['o'] < 180) & (df['y_diff'] < 0)
    ]
    
    # Define the corresponding values for the new column
    choices = ['right', 'left', 'left', 'right']
    
    # Create the new column based on the conditions
    df['direction'] = np.select(conditions, choices, default=np.nan)
    
    return df

# Apply the function to the max_s_players dataframe
max_s_players = determine_direction(max_s_players)
max_s_players.head()

Unnamed: 0,gameId,playId,nflId,displayName,frameId,frameType,time,jerseyNumber,club,playDirection,...,dis,o,dir,event,orientation_sin,orientation_cos,forward_velocity,sideways_velocity,y_diff,direction
6687782,2022090800,56,42489.0,Stefon Diggs,87,BEFORE_SNAP,2022-09-09 00:23:56.8,14.0,BUF,left,...,0.46,347.06,0.33,,-0.223931,0.974605,4.434453,-1.018884,,
6687783,2022090800,56,42489.0,Stefon Diggs,88,BEFORE_SNAP,2022-09-09 00:23:56.9,14.0,BUF,left,...,0.46,343.02,359.99,,-0.292038,0.956407,4.380343,-1.337533,0.46,right
6693251,2022090800,80,47857.0,Devin Singletary,62,BEFORE_SNAP,2022-09-09 00:24:30.6,26.0,BUF,left,...,0.5,175.99,170.15,,0.069931,-0.997552,-5.047612,0.353849,,
6693252,2022090800,80,47857.0,Devin Singletary,63,BEFORE_SNAP,2022-09-09 00:24:30.7,26.0,BUF,left,...,0.51,177.6,170.53,,0.041876,-0.999123,-5.085535,0.213147,-0.5,right
6697693,2022090800,101,53079.0,Reggie Gilliam,104,BEFORE_SNAP,2022-09-09 00:25:08.5,41.0,BUF,left,...,0.31,331.36,5.1,,-0.479305,0.877649,2.89624,-1.581705,,


In [38]:
max_s_players = max_s_players.dropna(subset=['y_diff'])
max_s_players.head()

Unnamed: 0,gameId,playId,nflId,displayName,frameId,frameType,time,jerseyNumber,club,playDirection,...,dis,o,dir,event,orientation_sin,orientation_cos,forward_velocity,sideways_velocity,y_diff,direction
6687783,2022090800,56,42489.0,Stefon Diggs,88,BEFORE_SNAP,2022-09-09 00:23:56.9,14.0,BUF,left,...,0.46,343.02,359.99,,-0.292038,0.956407,4.380343,-1.337533,0.46,right
6693252,2022090800,80,47857.0,Devin Singletary,63,BEFORE_SNAP,2022-09-09 00:24:30.7,26.0,BUF,left,...,0.51,177.6,170.53,,0.041876,-0.999123,-5.085535,0.213147,-0.5,right
6697694,2022090800,101,53079.0,Reggie Gilliam,105,BEFORE_SNAP,2022-09-09 00:25:08.6,41.0,BUF,left,...,0.36,332.57,4.35,,-0.460665,0.887574,3.363907,-1.745919,0.36,right
6699835,2022090800,122,44985.0,Isaiah McKenzie,19,BEFORE_SNAP,2022-09-09 00:25:38.3,6.0,BUF,left,...,0.36,192.45,190.7,,-0.215588,-0.976485,-3.456755,-0.76318,-0.35,left
6704176,2022090800,167,47879.0,Dawson Knox,16,BEFORE_SNAP,2022-09-09 00:26:59.6,88.0,BUF,left,...,0.4,345.78,348.73,,-0.245646,0.96936,3.828971,-0.970301,0.39,right


In [39]:
max_s_players['play_goes_right'] = pd.get_dummies(max_s_players['playDirection'])['right']
max_s_players.drop(columns=['playDirection'], inplace=True)
max_s_players.head()

Unnamed: 0,gameId,playId,nflId,displayName,frameId,frameType,time,jerseyNumber,club,x,...,o,dir,event,orientation_sin,orientation_cos,forward_velocity,sideways_velocity,y_diff,direction,play_goes_right
6687783,2022090800,56,42489.0,Stefon Diggs,88,BEFORE_SNAP,2022-09-09 00:23:56.9,14.0,BUF,87.17,...,343.02,359.99,,-0.292038,0.956407,4.380343,-1.337533,0.46,right,False
6693252,2022090800,80,47857.0,Devin Singletary,63,BEFORE_SNAP,2022-09-09 00:24:30.7,26.0,BUF,83.08,...,177.6,170.53,,0.041876,-0.999123,-5.085535,0.213147,-0.5,right,False
6697694,2022090800,101,53079.0,Reggie Gilliam,105,BEFORE_SNAP,2022-09-09 00:25:08.6,41.0,BUF,74.58,...,332.57,4.35,,-0.460665,0.887574,3.363907,-1.745919,0.36,right,False
6699835,2022090800,122,44985.0,Isaiah McKenzie,19,BEFORE_SNAP,2022-09-09 00:25:38.3,6.0,BUF,71.38,...,192.45,190.7,,-0.215588,-0.976485,-3.456755,-0.76318,-0.35,left,False
6704176,2022090800,167,47879.0,Dawson Knox,16,BEFORE_SNAP,2022-09-09 00:26:59.6,88.0,BUF,59.38,...,345.78,348.73,,-0.245646,0.96936,3.828971,-0.970301,0.39,right,False


In [40]:
max_s_players.columns

Index(['gameId', 'playId', 'nflId', 'displayName', 'frameId', 'frameType',
       'time', 'jerseyNumber', 'club', 'x', 'y', 's', 'a', 'dis', 'o', 'dir',
       'event', 'orientation_sin', 'orientation_cos', 'forward_velocity',
       'sideways_velocity', 'y_diff', 'direction', 'play_goes_right'],
      dtype='object')

In [41]:
df = max_s_players[['frameId', 'x', 'y', 's', 'a',
       'dis', 'o', 'dir', 'orientation_sin', 'orientation_cos',
       'forward_velocity', 'sideways_velocity', 'y_diff', 'play_goes_right']]

df.head()

Unnamed: 0,frameId,x,y,s,a,dis,o,dir,orientation_sin,orientation_cos,forward_velocity,sideways_velocity,y_diff,play_goes_right
6687783,88,87.17,33.38,4.58,0.41,0.46,343.02,359.99,-0.292038,0.956407,4.380343,-1.337533,0.46,False
6693252,63,83.08,34.18,5.09,0.5,0.51,177.6,170.53,0.041876,-0.999123,-5.085535,0.213147,-0.5,False
6697694,105,74.58,25.93,3.79,3.42,0.36,332.57,4.35,-0.460665,0.887574,3.363907,-1.745919,0.36,False
6699835,19,71.38,24.46,3.54,0.64,0.36,192.45,190.7,-0.215588,-0.976485,-3.456755,-0.76318,-0.35,False
6704176,16,59.38,33.21,3.95,0.39,0.4,345.78,348.73,-0.245646,0.96936,3.828971,-0.970301,0.39,False


In [42]:
from sklearn.model_selection import train_test_split

# Define the features and target variable
X = df.drop(columns=['play_goes_right'])
y = df['play_goes_right']

# Perform the train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Display the shapes of the resulting datasets
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

X_train shape: (1391, 13)
X_test shape: (348, 13)
y_train shape: (1391,)
y_test shape: (348,)


In [43]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Initialize the RandomForestClassifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
rf_classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred = rf_classifier.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.7959770114942529
Classification Report:
               precision    recall  f1-score   support

       False       0.79      0.80      0.80       172
        True       0.80      0.79      0.80       176

    accuracy                           0.80       348
   macro avg       0.80      0.80      0.80       348
weighted avg       0.80      0.80      0.80       348



In [44]:
import joblib

# Export the trained model to the specified directory
joblib.dump(rf_classifier, '../models/rf_classifier.pkl')

['../models/rf_classifier.pkl']