

---


# **Goal of this Model**


---



With this model I will solely focus on using pre-snap movement on the offensive side of the ball to determine one of four outcomes:


*   Run middle
*   Run left
*   Run right
*   Pass


</br>

I will be using a Long Short-Term Neural Network in order to achieve this.

</br>





---


# **Code**

---

**Creating a 2-D Model**

---



In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
plays = pd.read_csv('plays.csv')
plays.columns

Index(['gameId', 'playId', 'playDescription', 'quarter', 'down', 'yardsToGo',
       'possessionTeam', 'defensiveTeam', 'yardlineSide', 'yardlineNumber',
       'gameClock', 'preSnapHomeScore', 'preSnapVisitorScore',
       'playNullifiedByPenalty', 'absoluteYardlineNumber',
       'preSnapHomeTeamWinProbability', 'preSnapVisitorTeamWinProbability',
       'expectedPoints', 'offenseFormation', 'receiverAlignment',
       'playClockAtSnap', 'passResult', 'passLength', 'targetX', 'targetY',
       'playAction', 'dropbackType', 'dropbackDistance', 'passLocationType',
       'timeToThrow', 'timeInTackleBox', 'timeToSack', 'passTippedAtLine',
       'unblockedPressure', 'qbSpike', 'qbKneel', 'qbSneak',
       'rushLocationType', 'penaltyYards', 'prePenaltyYardsGained',
       'yardsGained', 'homeTeamWinProbabilityAdded',
       'visitorTeamWinProbilityAdded', 'expectedPointsAdded', 'isDropback',
       'pff_runConceptPrimary', 'pff_runConceptSecondary', 'pff_runPassOption',
       'pff_pass

In [3]:
plays = plays[['gameId', 'playId', 'passResult', 'rushLocationType']]

plays['passResult'] = np.where(plays['passResult'].isna(), 0, 1)

plays['rushLocationType'] = plays['rushLocationType'].map({
    'INSIDE_RIGHT': 3,
    'INSIDE_LEFT': 3,
    'OUTSIDE_RIGHT': 2,
    'OUTSIDE_LEFT': 4,
}).fillna(0)

plays['playResult'] = plays['passResult'].astype(int) + plays['rushLocationType']
plays = plays.dropna(subset=['playResult'])
plays = plays[['gameId','playId', 'playResult']]
plays.head()

Unnamed: 0,gameId,playId,playResult
0,2022102302,2655,1.0
1,2022091809,3698,1.0
2,2022103004,3146,1.0
3,2022110610,348,1.0
4,2022102700,2799,3.0


In [4]:
tracking = pd.read_csv('tracking_week_1.csv')

tracking.columns

Index(['gameId', 'playId', 'nflId', 'displayName', 'frameId', 'frameType',
       'time', 'jerseyNumber', 'club', 'playDirection', 'x', 'y', 's', 'a',
       'dis', 'o', 'dir', 'event'],
      dtype='object')

In [6]:
tracking = tracking[tracking['frameType'].isin(['BEFORE_SNAP','SNAP'])]

features = ['gameId', 'playId','frameId', 'x', 'y', 's', 'a', 'dis', 'o', 'dir']
tracking = tracking[features]
tracking.head()

Unnamed: 0,gameId,playId,frameId,x,y,s,a,dis,o,dir
0,2022091200,64,1,51.06,28.55,0.72,0.37,0.07,246.17,68.34
1,2022091200,64,2,51.13,28.57,0.71,0.36,0.07,245.41,71.21
2,2022091200,64,3,51.2,28.59,0.69,0.23,0.07,244.45,69.9
3,2022091200,64,4,51.26,28.62,0.67,0.22,0.07,244.45,67.98
4,2022091200,64,5,51.32,28.65,0.65,0.34,0.07,245.74,62.83


In [7]:
merged = tracking.merge(plays, on=['gameId','playId'], how='inner')
merged.head()

Unnamed: 0,gameId,playId,frameId,x,y,s,a,dis,o,dir,playResult
0,2022091200,64,1,51.06,28.55,0.72,0.37,0.07,246.17,68.34,2.0
1,2022091200,64,2,51.13,28.57,0.71,0.36,0.07,245.41,71.21,2.0
2,2022091200,64,3,51.2,28.59,0.69,0.23,0.07,244.45,69.9,2.0
3,2022091200,64,4,51.26,28.62,0.67,0.22,0.07,244.45,67.98,2.0
4,2022091200,64,5,51.32,28.65,0.65,0.34,0.07,245.74,62.83,2.0


In [8]:
merged.playResult.value_counts()
# 1 = pass
# 2 = right
# 3 = middle
# 4 = left
# 5 = other

playResult
1.0    2775249
3.0    1002616
2.0     427409
4.0     423614
5.0      42182
0.0      21390
Name: count, dtype: int64

In [9]:
from sklearn.model_selection import train_test_split

# Get unique playIds to split at the play level
unique_play_ids = merged['playId'].unique()

# Split playIds into training and testing sets
train_play_ids, test_play_ids = train_test_split(unique_play_ids, test_size=0.2, random_state=42)

# Create training and testing DataFrames by selecting rows based on playId
train_df = merged[merged['playId'].isin(train_play_ids)]
test_df = merged[merged['playId'].isin(test_play_ids)]

In [10]:

X_train = train_df[['x', 'y', 's', 'a', 'dis', 'o', 'dir']]
y_train = train_df['playResult']

X_test = test_df[['x', 'y', 's', 'a', 'dis', 'o', 'dir']]
y_test = test_df['playResult']


In [3]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical

# Assuming X_train, X_test, y_train, and y_test are already prepared
# Here, X_train and X_test are of shape (num_samples, 7) where 7 is the number of features

# Define the neural network model
model = Sequential([
    Dense(64, activation='relu', input_shape=(7,)),  # 7 input features
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(4, activation='softmax')  # 4 classes, one for each play type
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Convert target variables to one-hot encoding, handling unknown values
# Replace values outside the range 1-4 with a default value (e.g., 1)
y_train_filtered = y_train.copy()
y_train_filtered[~y_train_filtered.isin([1, 2, 3, 4])] = 1 # Replace values not in [1, 2, 3, 4] with 1
y_train_encoded = to_categorical(y_train_filtered - 1, num_classes=4) # Subtract 1 and one-hot encode


y_test_filtered = y_test.copy()
y_test_filtered[~y_test_filtered.isin([1, 2, 3, 4])] = 1 # Replace values not in [1, 2, 3, 4] with 1
y_test_encoded = to_categorical(y_test_filtered - 1, num_classes=4) # Subtract 1 and one-hot encode


# Train the model using the encoded target variables
history = model.fit(X_train, y_train_encoded, epochs=5, batch_size=16, validation_data=(X_test, y_test_encoded))
saved_model = model.save('model.h5')

ModuleNotFoundError: No module named 'numpy'



---

**Creating a 3-d model**



---





In [None]:
# Define columns to use as player data
player_data_columns = ['x', 'y', 's', 'a', 'dis', 'o', 'dir']

# Group data by 'gameId' and 'playId'
grouped_data = merged.groupby(['gameId', 'playId'])

# Create a 3D array structure to hold the data
plays_data = []
outcomes = []

for (game_id, play_id), group in grouped_data:
    # Convert player data to a NumPy array and append it to the plays_data list
    play_array = group[player_data_columns].values
    plays_data.append(play_array)

    # Get the play outcome (assuming a single outcome per play) and append to outcomes list
    outcome = group['playResult'].iloc[0]
    outcomes.append(outcome)

# Convert lists to arrays for further processing
plays_data = np.array(plays_data, dtype=object)  # 3D array (plays, frames, features)
outcomes = np.array(outcomes)  # 1D array (play outcomes)