

---


# **Goal of this Model**


---



With this model I will solely focus on using pre-snap movement on the offensive side of the ball to determine one of four outcomes:


*   Run middle
*   Run left
*   Run right
*   Pass


</br>

I will be using a Long Short-Term Neural Network in order to achieve this.

</br>





---


# **Code**

---

**Creating a 2-D Model**

---



In [15]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [16]:
plays = pd.read_csv('../data/plays.csv')
plays.columns

Index(['gameId', 'playId', 'playDescription', 'quarter', 'down', 'yardsToGo',
       'possessionTeam', 'defensiveTeam', 'yardlineSide', 'yardlineNumber',
       'gameClock', 'preSnapHomeScore', 'preSnapVisitorScore',
       'playNullifiedByPenalty', 'absoluteYardlineNumber',
       'preSnapHomeTeamWinProbability', 'preSnapVisitorTeamWinProbability',
       'expectedPoints', 'offenseFormation', 'receiverAlignment',
       'playClockAtSnap', 'passResult', 'passLength', 'targetX', 'targetY',
       'playAction', 'dropbackType', 'dropbackDistance', 'passLocationType',
       'timeToThrow', 'timeInTackleBox', 'timeToSack', 'passTippedAtLine',
       'unblockedPressure', 'qbSpike', 'qbKneel', 'qbSneak',
       'rushLocationType', 'penaltyYards', 'prePenaltyYardsGained',
       'yardsGained', 'homeTeamWinProbabilityAdded',
       'visitorTeamWinProbilityAdded', 'expectedPointsAdded', 'isDropback',
       'pff_runConceptPrimary', 'pff_runConceptSecondary', 'pff_runPassOption',
       'pff_pass

In [17]:
plays = plays[['gameId', 'playId', 'passResult', 'rushLocationType']]

plays['passResult'] = np.where(plays['passResult'].isna(), 0, 1)

plays['rushLocationType'] = plays['rushLocationType'].map({
    'INSIDE_RIGHT': 3,
    'INSIDE_LEFT': 3,
    'OUTSIDE_RIGHT': 2,
    'OUTSIDE_LEFT': 4,
}).fillna(0)

plays['playResult'] = plays['passResult'].astype(int) + plays['rushLocationType']
plays = plays.dropna(subset=['playResult'])
plays = plays[['gameId','playId', 'playResult']]
plays.head()

Unnamed: 0,gameId,playId,playResult
0,2022102302,2655,1.0
1,2022091809,3698,1.0
2,2022103004,3146,1.0
3,2022110610,348,1.0
4,2022102700,2799,3.0


In [18]:
tracking = pd.read_csv('../data/tracking_week_1.csv')

tracking.columns

Index(['gameId', 'playId', 'nflId', 'displayName', 'frameId', 'frameType',
       'time', 'jerseyNumber', 'club', 'playDirection', 'x', 'y', 's', 'a',
       'dis', 'o', 'dir', 'event'],
      dtype='object')

In [19]:
tracking = tracking[tracking['frameType'].isin(['BEFORE_SNAP','SNAP'])]

features = ['gameId', 'playId','frameId', 'x', 'y', 's', 'a', 'dis', 'o', 'dir']
tracking = tracking[features]
tracking.head()

Unnamed: 0,gameId,playId,frameId,x,y,s,a,dis,o,dir
0,2022091200,64,1,51.06,28.55,0.72,0.37,0.07,246.17,68.34
1,2022091200,64,2,51.13,28.57,0.71,0.36,0.07,245.41,71.21
2,2022091200,64,3,51.2,28.59,0.69,0.23,0.07,244.45,69.9
3,2022091200,64,4,51.26,28.62,0.67,0.22,0.07,244.45,67.98
4,2022091200,64,5,51.32,28.65,0.65,0.34,0.07,245.74,62.83


In [20]:
merged = tracking.merge(plays, on=['gameId','playId'], how='inner')
merged.head()

Unnamed: 0,gameId,playId,frameId,x,y,s,a,dis,o,dir,playResult
0,2022091200,64,1,51.06,28.55,0.72,0.37,0.07,246.17,68.34,2.0
1,2022091200,64,2,51.13,28.57,0.71,0.36,0.07,245.41,71.21,2.0
2,2022091200,64,3,51.2,28.59,0.69,0.23,0.07,244.45,69.9,2.0
3,2022091200,64,4,51.26,28.62,0.67,0.22,0.07,244.45,67.98,2.0
4,2022091200,64,5,51.32,28.65,0.65,0.34,0.07,245.74,62.83,2.0


In [21]:
merged.playResult.value_counts()
# 1 = pass
# 2 = right
# 3 = middle
# 4 = left
# 5 = other

playResult
1.0    2775249
3.0    1002616
2.0     427409
4.0     423614
5.0      42182
0.0      21390
Name: count, dtype: int64

In [22]:
merged = merged.sort_values(by=['gameId', 'playId', 'frameId'])
merged.head()

Unnamed: 0,gameId,playId,frameId,x,y,s,a,dis,o,dir,playResult
4420324,2022090800,56,1,89.48,29.52,0.68,1.17,0.07,308.3,266.54,1.0
4420470,2022090800,56,1,81.93,28.52,1.24,0.44,0.12,48.93,305.7,1.0
4420616,2022090800,56,1,82.9,29.84,0.69,0.43,0.07,85.38,312.18,1.0
4420762,2022090800,56,1,88.8,30.19,2.01,0.12,0.2,256.97,263.5,1.0
4420908,2022090800,56,1,91.08,28.34,2.5,0.51,0.25,275.29,185.78,1.0


In [23]:
from sklearn.model_selection import train_test_split

unique_plays = merged[['gameId', 'playId']].drop_duplicates()

train_plays, test_plays = train_test_split(unique_plays, test_size=0.2, random_state=42)

X_train = merged[['x', 'y', 's', 'a', 'dis', 'o', 'dir']]
y_train = merged['playResult']

X_test = merged[['x', 'y', 's', 'a', 'dis', 'o', 'dir']]
y_test = merged['playResult']

train_df = merged.merge(train_plays, on=['gameId', 'playId'])
test_df = merged.merge(test_plays, on=['gameId', 'playId'])

# Check the result
print(f"Training set size: {len(train_df)} rows")
print(f"Testing set size: {len(test_df)} rows")


Training set size: 3738880 rows
Testing set size: 953580 rows


In [24]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical

# Assuming X_train, X_test, y_train, and y_test are already prepared
# Here, X_train and X_test are of shape (num_samples, 7) where 7 is the number of features

# Define the neural network model
model = Sequential([
    Dense(64, activation='relu', input_shape=(7,)),  # 7 input features
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(4, activation='softmax')  # 4 classes, one for each play type
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Convert target variables to one-hot encoding, handling unknown values
# Replace values outside the range 1-4 with a default value (e.g., 1)
y_train_filtered = y_train.copy()
y_train_filtered[~y_train_filtered.isin([1, 2, 3, 4])] = 1 # Replace values not in [1, 2, 3, 4] with 1
y_train_encoded = to_categorical(y_train_filtered - 1, num_classes=4) # Subtract 1 and one-hot encode


y_test_filtered = y_test.copy()
y_test_filtered[~y_test_filtered.isin([1, 2, 3, 4])] = 1 # Replace values not in [1, 2, 3, 4] with 1
y_test_encoded = to_categorical(y_test_filtered - 1, num_classes=4) # Subtract 1 and one-hot encode

In [25]:
X_test.head()

Unnamed: 0,x,y,s,a,dis,o,dir
4420324,89.48,29.52,0.68,1.17,0.07,308.3,266.54
4420470,81.93,28.52,1.24,0.44,0.12,48.93,305.7
4420616,82.9,29.84,0.69,0.43,0.07,85.38,312.18
4420762,88.8,30.19,2.01,0.12,0.2,256.97,263.5
4420908,91.08,28.34,2.5,0.51,0.25,275.29,185.78


In [27]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [28]:
print(X_train_scaled)

[[ 1.16932341  0.40496724  0.19011913 ...  0.1858929   1.35680396
   0.82541567]
 [ 0.86148823  0.25622215  0.86623227 ...  0.79118549 -1.36580006
   1.19949479]
 [ 0.90103791  0.45256567  0.20219258 ...  0.1858929  -0.98318482
   1.26139553]
 ...
 [ 0.09414279 -0.08737901 -0.55843471 ... -0.5404582  -0.84221026
  -0.63889968]
 [ 0.09047323 -0.75970681 -0.5825816  ... -0.66151672 -0.8089348
   0.46604752]
 [ 0.11412143 -0.41610565 -0.6308754  ... -0.66151672         nan
          nan]]


In [29]:
# Train the model using the encoded target variables
history = model.fit(X_train_scaled, y_train_encoded, epochs=5, batch_size=16, validation_data=(X_test_scaled, y_test_encoded))
saved_model = model.save('model.h5')

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
