In [1]:
import numpy as np 
import pandas as pd
import torch
from torch import nn

In [2]:
import warnings
warnings.filterwarnings('ignore')

# Data Importation

In [3]:
from Scripts.Data.NFL_data_preprocessing import preprocess_nfl, open_all_tsv

dataframes = open_all_tsv('Data/NFL/')

dataframes = preprocess_nfl(dataframes)

Conversion completed for all TSV files in the folder.


In [4]:
print(dataframes['2019_WAS_2019090806_1117'].columns)

Index(['gameId', 'playerId', 'frame', 'x', 'y', 's', 'o', 'dir'], dtype='object')


In [5]:
from Scripts.Data.NFL_data_preprocessing import check_dataframe_shapes

dataframes = check_dataframe_shapes(dataframes)

Dataframe '2017_MIN_2018011401_4497' has the expected shape: (6877, 8)
Dataframe '2018_ARI_2018092311_141' has the expected shape: (10442, 8)
Dataframe '2018_ARI_2018100710_355' has the expected shape: (11868, 8)
Dataframe '2018_ARI_2018111807_3385' has the expected shape: (7176, 8)
Dataframe '2018_ARI_2018111807_973' has the expected shape: (10649, 8)
Dataframe '2018_ARI_2018121600_322' has the expected shape: (7567, 8)
Dataframe '2018_ARI_2018123015_322' has the expected shape: (7774, 8)
Dataframe '2018_ATL_2018100707_1113' has the expected shape: (8441, 8)
Dataframe '2018_ATL_2018102200_3269' has the expected shape: (5428, 8)
Dataframe '2018_ATL_2018120903_166' has the expected shape: (5405, 8)
Dataframe '2018_ATL_2018121600_665' has the expected shape: (6440, 8)
Dataframe '2018_ATL_2018121600_882' has the expected shape: (7245, 8)
Dataframe '2018_ATL_2018122304_2305' has the expected shape: (6118, 8)
Dataframe '2018_BAL_2018091300_4157' has the expected shape: (9131, 8)
Dataframe '

Dataframe '2018_CLE_2018121501_1907' has the expected shape: (11822, 8)
Dataframe '2018_CLE_2018122305_1246' has the expected shape: (5934, 8)
Dataframe '2018_CLE_2018122305_3048' has the expected shape: (6624, 8)
Dataframe '2018_CLE_2018122305_3615' has an unexpected shape: (9591, 8) (expected: (9568, 8))
Dataframe '2018_DAL_2018111111_1374' has the expected shape: (9982, 8)
Dataframe '2018_DAL_2018111111_3656' has the expected shape: (4232, 8)
Dataframe '2018_DAL_2018112201_2897' has the expected shape: (12328, 8)
Dataframe '2018_DAL_2019010501_1774' has the expected shape: (7498, 8)
Dataframe '2018_DAL_2019010501_3071' has the expected shape: (10810, 8)
Dataframe '2018_DAL_2019011201_2750' has the expected shape: (10465, 8)
Dataframe '2018_DAL_2019011201_531' has the expected shape: (5865, 8)
Dataframe '2018_DEN_2018101800_1168' has the expected shape: (13685, 8)
Dataframe '2018_DEN_2018101800_483' has the expected shape: (8234, 8)
Dataframe '2018_DEN_2018101800_967' has the expecte

# Data Preparation

In [6]:
# Data preparation
from Scripts.Data.NFL_data_preprocessing import prepare_data

test_games = ['2019_WAS_2019090806_193', '2019_WAS_2019090806_1117', '2019_WAS_2019092907_2823', '2019_WAS_2019100609_431', '2019_WAS_2019111707_3624', '2019_WAS_2019120803_3729', '2019_WAS_2019121508_700']

# Assume `dataframes` is your preprocessed data
X_train, Y_train, Y_train_noised, Y_noise_train, X_noise_train, X_test, Y_test, Y_test_noised, Y_noise_test, X_noise_test, Data_train, Data_test = prepare_data(dataframes, test_games)

Number of successfully converted dataframes: 509
Number of failed conversions: 0


In [7]:
total_frames = 0
total_players = 0
total_features = 0
count = 0

for game, x in X_train.items():
    print(f"{game} shape: {x.shape}")
    if torch.isnan(x).any():
        print(f"X_train for {game} contains nan values.")
    else:
        total_frames += x.shape[0]
        total_players += x.shape[1]
        total_features += x.shape[2]
        count += 1

if count > 0:
    avg_frames = total_frames / count
    avg_players = total_players / count
    avg_features = total_features / count
    print(f"Average shape: ({avg_frames}, {avg_players}, {avg_features})")

2017_MIN_2018011401_4497 shape: torch.Size([149, 23, 8])
2018_ARI_2018092311_141 shape: torch.Size([227, 23, 8])
2018_ARI_2018100710_355 shape: torch.Size([258, 23, 8])
2018_ARI_2018111807_3385 shape: torch.Size([156, 23, 8])
2018_ARI_2018111807_973 shape: torch.Size([231, 23, 8])
2018_ARI_2018121600_322 shape: torch.Size([164, 23, 8])
2018_ARI_2018123015_322 shape: torch.Size([169, 23, 8])
2018_ATL_2018100707_1113 shape: torch.Size([183, 23, 8])
2018_ATL_2018102200_3269 shape: torch.Size([118, 23, 8])
2018_ATL_2018120903_166 shape: torch.Size([117, 23, 8])
2018_ATL_2018121600_665 shape: torch.Size([140, 23, 8])
2018_ATL_2018121600_882 shape: torch.Size([157, 23, 8])
2018_ATL_2018122304_2305 shape: torch.Size([133, 23, 8])
2018_BAL_2018091300_4157 shape: torch.Size([198, 23, 8])
2018_BAL_2018092301_3690 shape: torch.Size([2, 23, 8])
2018_BAL_2018112500_1273 shape: torch.Size([106, 23, 8])
2018_BAL_2018112500_1502 shape: torch.Size([124, 23, 8])
2018_BAL_2018122201_2372 shape: torch.Siz

In [8]:
import pickle

# Save
with open('Data/nfl_train_test_data.pkl', 'wb') as f:
    pickle.dump({'X_train': X_train, 'Y_train': Y_train, 'Y_train_noised':Y_train_noised, 'Y_noise_train':Y_noise_train, 'X_noise_train': X_noise_train, 'X_test': X_test, 'Y_test': Y_test, 'Y_test_noised':Y_test_noised, 'Y_noise_test': Y_noise_test, 'X_noise_test':X_noise_test, 'Data_train':Data_train, 'Data_test':Data_test}, f)

In [9]:
import pickle 

# Load
with open('Data/nfl_train_test_data.pkl', 'rb') as f:
    data = pickle.load(f)
    X_train = data['X_train']
    Y_train = data['Y_train']
    Y_train_noised = data['Y_train_noised']
    Y_noise_train = data['Y_noise_train']
    X_noise_train = data['X_noise_train']
    X_test = data['X_test']
    Y_test = data['Y_test']
    Y_test_noised = data['Y_test_noised']
    Y_noise_test = data['Y_noise_test']
    X_noise_test = data['X_noise_test']
    Data_train = data['Data_train']
    Data_test = data['Data_test']