In [53]:
import sys
import pandas as pd
import numpy as np
sys.path.append('c:\\Users\\tyler\\OneDrive\\Documents\\Python\\NFL')
from backend.preprocess.preprocess import main as load_data
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
import os
from sklearn.model_selection import GridSearchCV, KFold, cross_val_score
from sklearn.exceptions import DataConversionWarning, ConvergenceWarning
import warnings
warnings.filterwarnings("ignore", category=DataConversionWarning)
warnings.filterwarnings("ignore", category=ConvergenceWarning)

In [54]:
os.chdir('c:\\Users\\tyler\\OneDrive\\Documents\\Python\\NFL')
os.getcwd()

'c:\\Users\\tyler\\OneDrive\\Documents\\Python\\NFL'

In [55]:
# Load data
df = load_data()

last_season = df[[index[0].year >= 2021 for index in df.index]]
df = df[[index[0].year < 2021 for index in df.index]]
X = df.drop(['y'], axis=1)
y = df[['y']]

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=1)

# Pipeline
pipe = Pipeline(
    [
        ('scaler', StandardScaler()),
        ('nn', MLPClassifier(random_state=1, activation='identity'))
    ]
)


Executing load_data ...
	Done: 3.6481s
Executing preprocess ...
	Done: 1.9792s
Executing load_target_data ...
	Done: 0.9180s
Executing merge_x_y ...
	Done: 0.0368s


In [56]:
parameters = {
    'nn__hidden_layer_sizes': [
        (100, 100, 100), (250, 250, 250), (300, 300, 300),
        (100, 100, 100, 100), (250, 250, 250, 250), (300, 300, 300, 300)
    ],
    'nn__activation': ['identity']
}

In [57]:
search = GridSearchCV(
    pipe,
    parameters,
    scoring='accuracy',
    n_jobs=-1,
    cv=KFold(n_splits=5, shuffle=True, random_state=1)
).fit(X_train, y_train)

In [58]:
results_df = pd.DataFrame(search.cv_results_)
results_df.sort_values(by=['rank_test_score'], inplace=True)
results_df.set_index(['param_nn__activation', 'param_nn__hidden_layer_sizes'], inplace=True)
results_df[['mean_test_score', 'mean_test_score', 'rank_test_score']]

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_test_score,mean_test_score,rank_test_score
param_nn__activation,param_nn__hidden_layer_sizes,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
identity,"(250, 250, 250)",0.616489,0.616489,1
identity,"(300, 300, 300)",0.61383,0.61383,2
identity,"(100, 100, 100)",0.61117,0.61117,3
identity,"(150, 150, 150)",0.610106,0.610106,4
identity,"(200, 200, 200)",0.606383,0.606383,5
tanh,"(100, 100, 100)",0.603723,0.603723,6
tanh,"(250, 250, 250)",0.596277,0.596277,7
tanh,"(300, 300, 300)",0.594681,0.594681,8
tanh,"(200, 200, 200)",0.58617,0.58617,9
tanh,"(150, 150, 150)",0.585638,0.585638,10


In [59]:
search.best_estimator_

Pipeline(steps=[('scaler', StandardScaler()),
                ('nn',
                 MLPClassifier(activation='identity',
                               hidden_layer_sizes=(250, 250, 250),
                               random_state=1))])

In [60]:
scores = cross_val_score(search.best_estimator_, X_train, y_train)
print(f'Mean: {scores.mean()} Std: {scores.std()}')
print(search.best_estimator_.score(X_test, y_test))

Mean: 0.601063829787234 Std: 0.026754843276509135
0.6148936170212767
