In [1]:
# import the usual libraries
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from scikeras.wrappers import KerasRegressor
from keras.utils import to_categorical
# from keras.utils import np_utils
from sklearn.metrics import accuracy_score
from sklearn.model_selection import RandomizedSearchCV

# other machine learning modules
from sklearn.metrics import mean_squared_error, make_scorer
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import train_test_split

# commonly used modules
import numpy as np
import pandas as pd
import matplotlib
from matplotlib import pyplot as plt

print(tf.__version__)


2.13.0


In [2]:
df_attendance = pd.read_csv('attendance.csv')
df_games = pd.read_csv('games.csv')

excludeYears = [2000, 2001]

# Removes any rows containing a null value from the datasets
df_attendance = df_attendance[~df_attendance['year'].isin(excludeYears)]
df_attendance = df_attendance.dropna()
droppit = ['team', 'total', 'home', 'away']
df_attendance = df_attendance.drop(droppit, axis=1)
df_attendance.rename(columns={'team_name': 'home_team_name'}, inplace=True)
df_attendance['week'] = df_attendance['week'].astype(int)

df_games = df_games[~df_games['year'].isin(excludeYears)]
droppa = ['home_team', 'away_team', 'tie', 'home_team_city', 'away_team_city']
exclude_weeks = ['WildCard', 'Division', 'ConfChamp', 'SuperBowl']
df_games = df_games.drop(droppa, axis=1)
df_games = df_games[~df_games['week'].isin(exclude_weeks)]
df_games = df_games.dropna()
df_games['week'] = df_games['week'].astype(int)

right = ['year', 'week', 'home_team_name']
df_result = df_attendance.merge(df_games, on=right)
to_convert_categorical = ['home_team_name', 'time', 'winner', 'day', 'away_team_name']

df_result[to_convert_categorical] = df_result[to_convert_categorical].astype('category')
df_result = df_result.drop('date', axis=1)

In [None]:
# TODO: Data team will handle putting in the training, validation, and test data
X = df_attendance.drop('weekly_attendance', axis=1)
Y = df_attendance['weekly_attendance']

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)



In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train) # TODO: need training data
X_test = scaler.transform(X_test)

In [None]:
def create_model(optimizer='adam', activation='relu', neurons=64):
    model = Sequential()
    model.add(Dense(neurons, input_dim=X_train.shape[1], activation=activation))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer=optimizer)
    return model

In [None]:
model = KerasRegressor(build_fn=create_model, verbose=0)

In [None]:
param_grid = {
    'optimizer': ['adam', 'sgd', 'rmsprop'],
    'activation': ['relu', 'tanh', 'sigmoid'],
    'neurons': [32, 64, 128]
}

In [None]:
scorer = make_scorer(mean_squared_error, greater_is_better=False)

In [None]:
grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring=scorer, cv=5)
grid_result = grid.fit(X_train, Y_train)

In [None]:
best_params = grid_result.best_params_
print("Best Hyperparameters:", best_params)

In [None]:
best_model = grid_result.best_estimator_
y_pred = best_model.predict(X_test)
mse = mean_squared_error(Y_test, y_pred)
print("Final Mean Squared Error on Test Set:", mse)