In [182]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [183]:
import pandas as pd

#Importing NFL stats dataset
file_path = '/content/drive/My Drive/Sports Modeling/nfl/full_nfl.csv'
data = pd.read_csv(file_path)

  data = pd.read_csv(file_path)


In [184]:
data[['home_name', 'away_name','home_record_wins', 'away_record_wins', 'home_record_losses', 'away_record_losses']].count()

Unnamed: 0,0
home_name,6053
away_name,6053
home_record_wins,824
away_record_wins,824
home_record_losses,824
away_record_losses,824


#### Can not use NFL stats data for modeling as target variable has too many null values!!

### Importing NFL Betting odds data

In [185]:
file_path = '/content/drive/My Drive/Sports Modeling/nfl/sorted_odds.csv'
data = pd.read_csv(file_path)

In [186]:
# Get the count of null values in each column
null_counts = data.isnull().sum()

# Filter the columns with null values
null_counts = null_counts[null_counts > 0]

# Display the columns with their respective null value counts
print(null_counts)

# Optionally, save the null counts to a CSV file for further analysis
null_counts.to_csv('null_value_counts.csv', header=['null_counts'])

# If you want to download the file, use the following code
from google.colab import files
files.download('null_value_counts.csv')

OpenerHomePointSpread            18
OpenerAwayPointSpread            18
OpenerHomePointSpreadPayout      18
OpenerAwayPointSpreadPayout      18
OpenerOverUnder                 886
                               ... 
BarstoolUnderPayout            1040
BarstoolHomeTeamTotal          1327
BarstoolAwayTeamTotal          1327
BarstoolHomeTeamTotalPayout    1327
BarstoolAwayTeamTotalPayout    1327
Length: 130, dtype: int64


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [187]:
# Drop columns with more than 100 null values
data = data.dropna(axis=1, thresh=len(data) - 100)

In [188]:
# Step 1: Group-Based Mean Imputation (based on HomeTeam and AwayTeam)
for column in data.columns[data.isnull().any()]:  # Iterate over columns with null values
    data[column] = data.groupby(['HomeTeam', 'AwayTeam'])[column].transform(lambda x: x.fillna(x.mean()))

# Step 2: Overall Median Imputation
for column in data.columns[data.isnull().any()]:  # Iterate over columns with null values
    data[column].fillna(data[column].median(), inplace=True)


In [189]:
# Dropoing "Unnamed: 0" column
data = data.drop(columns=['Unnamed: 0'])

# List of columns to drop
columns_to_drop = ['Season', 'Type', 'Scope','Week', 'Timeframe', 'DateTimeString', 'LastUpdated', 'Status', 'AwayTeamLogo', 'HomeTeamLogo']

# Drop the columns
data = data.drop(columns=columns_to_drop)

In [190]:
data.dtypes #Understanding datatype

Unnamed: 0,0
DayString,object
TimeString,object
DayOfWeek,object
AwayTeam,object
AwayScore,int64
...,...
PointsBetHomePointSpreadPayout,float64
PointsBetAwayPointSpreadPayout,float64
PointsBetOverUnder,float64
PointsBetOverPayout,float64


In [191]:
# Normalize the DayString column to a consistent date format
data['DayString'] = pd.to_datetime(data['DayString'], errors='coerce')

  data['DayString'] = pd.to_datetime(data['DayString'], errors='coerce')


In [192]:
# Map DayOfWeek to numerical format
day_mapping = {'Mon': 0, 'Tue': 1, 'Wed': 2, 'Thu': 3, 'Fri': 4, 'Sat': 5, 'Sun': 6}
data['DayOfWeek'] = data['DayOfWeek'].map(day_mapping)

# Strip leading and trailing spaces from the TimeString column
data['TimeString'] = data['TimeString'].str.strip()

# Remove the 'ET' suffix and convert to datetime
data['Time'] = pd.to_datetime(data['TimeString'].str.replace(' ET', ''), format='%I:%M %p')

# Extract time-related features
data['Hour'] = data['Time'].dt.hour
data['Minute'] = data['Time'].dt.minute

# Drop the original TimeString and Time columns if not needed
data = data.drop(columns=['TimeString', 'Time'])

In [193]:
# Splitting the quarter scores into separate columns
data[['Home_Q1', 'Home_Q2', 'Home_Q3', 'Home_Q4', 'Home_OT']] = data['HomeTeamQuarters'].str.split(',', expand=True).fillna(0).astype(int)
data[['Away_Q1', 'Away_Q2', 'Away_Q3', 'Away_Q4', 'Away_OT']] = data['AwayTeamQuarters'].str.split(',', expand=True).fillna(0).astype(int)

# Drop the original QuarterNames, HomeTeamQuarters, and AwayTeamQuarters columns if not needed
data = data.drop(columns=['QuarterNames', 'HomeTeamQuarters', 'AwayTeamQuarters'])

In [194]:
print(data.columns.tolist())

['DayString', 'DayOfWeek', 'AwayTeam', 'AwayScore', 'AwayTeamHasWon', 'HomeTeam', 'HomeScore', 'HomeTeamHasWon', 'OpenerHomeMoneyLine', 'OpenerAwayMoneyLine', 'OpenerHomePointSpread', 'OpenerAwayPointSpread', 'OpenerHomePointSpreadPayout', 'OpenerAwayPointSpreadPayout', 'ConsensusHomeMoneyLine', 'ConsensusAwayMoneyLine', 'ConsensusHomePointSpread', 'ConsensusAwayPointSpread', 'ConsensusHomePointSpreadPayout', 'ConsensusAwayPointSpreadPayout', 'ConsensusOverUnder', 'ConsensusOverPayout', 'ConsensusUnderPayout', 'DraftKingsHomeMoneyLine', 'DraftKingsAwayMoneyLine', 'DraftKingsHomePointSpread', 'DraftKingsAwayPointSpread', 'DraftKingsHomePointSpreadPayout', 'DraftKingsAwayPointSpreadPayout', 'DraftKingsOverUnder', 'DraftKingsOverPayout', 'DraftKingsUnderPayout', 'FanDuelHomeMoneyLine', 'FanDuelAwayMoneyLine', 'FanDuelHomePointSpread', 'FanDuelAwayPointSpread', 'FanDuelHomePointSpreadPayout', 'FanDuelAwayPointSpreadPayout', 'FanDuelOverUnder', 'FanDuelOverPayout', 'FanDuelUnderPayout', 'Su

### Feature Engineering

In [195]:
# 1. Score-Related Features
data['ScoreDifference'] = data['HomeScore'] - data['AwayScore']
data['TotalPoints'] = data['HomeScore'] + data['AwayScore']

# Quarter Score Differences
data['Q1_Diff'] = data['Home_Q1'] - data['Away_Q1']
data['Q2_Diff'] = data['Home_Q2'] - data['Away_Q2']
data['Q3_Diff'] = data['Home_Q3'] - data['Away_Q3']
data['Q4_Diff'] = data['Home_Q4'] - data['Away_Q4']
data['OT_Diff'] = data['Home_OT'] - data['Away_OT']

# 2. Betting Line Features

# Money Line Differences
data['OpenerMoneyLineDiff'] = data['OpenerHomeMoneyLine'] - data['OpenerAwayMoneyLine']
data['ConsensusMoneyLineDiff'] = data['ConsensusHomeMoneyLine'] - data['ConsensusAwayMoneyLine']
data['DraftKingsMoneyLineDiff'] = data['DraftKingsHomeMoneyLine'] - data['DraftKingsAwayMoneyLine']
data['FanDuelMoneyLineDiff'] = data['FanDuelHomeMoneyLine'] - data['FanDuelAwayMoneyLine']
data['SugarHousePAMoneyLineDiff'] = data['SugarHousePAHomeMoneyLine'] - data['SugarHousePAAwayMoneyLine']
data['ParxMoneyLineDiff'] = data['ParxHomeMoneyLine'] - data['ParxAwayMoneyLine']
data['RiversCasinoPAMoneyLineDiff'] = data['RiversCasinoPAHomeMoneyLine'] - data['RiversCasinoPAAwayMoneyLine']
data['PointsBetMoneyLineDiff'] = data['PointsBetHomeMoneyLine'] - data['PointsBetAwayMoneyLine']

# Point Spread Differences
data['OpenerSpreadDiff'] = data['OpenerHomePointSpread'] - data['OpenerAwayPointSpread']
data['ConsensusSpreadDiff'] = data['ConsensusHomePointSpread'] - data['ConsensusAwayPointSpread']
data['DraftKingsSpreadDiff'] = data['DraftKingsHomePointSpread'] - data['DraftKingsAwayPointSpread']
data['FanDuelSpreadDiff'] = data['FanDuelHomePointSpread'] - data['FanDuelAwayPointSpread']
data['SugarHousePASpreadDiff'] = data['SugarHousePAHomePointSpread'] - data['SugarHousePAAwayPointSpread']
data['ParxSpreadDiff'] = data['ParxHomePointSpread'] - data['ParxAwayPointSpread']
data['RiversCasinoPASpreadDiff'] = data['RiversCasinoPAHomePointSpread'] - data['RiversCasinoPAAwayPointSpread']
data['PointsBetSpreadDiff'] = data['PointsBetHomePointSpread'] - data['PointsBetAwayPointSpread']

# Point Spread Payout Differences
data['OpenerSpreadPayoutDiff'] = data['OpenerHomePointSpreadPayout'] - data['OpenerAwayPointSpreadPayout']
data['ConsensusSpreadPayoutDiff'] = data['ConsensusHomePointSpreadPayout'] - data['ConsensusAwayPointSpreadPayout']
data['DraftKingsSpreadPayoutDiff'] = data['DraftKingsHomePointSpreadPayout'] - data['DraftKingsAwayPointSpreadPayout']
data['FanDuelSpreadPayoutDiff'] = data['FanDuelHomePointSpreadPayout'] - data['FanDuelAwayPointSpreadPayout']
data['SugarHousePASpreadPayoutDiff'] = data['SugarHousePAHomePointSpreadPayout'] - data['SugarHousePAAwayPointSpreadPayout']
data['ParxSpreadPayoutDiff'] = data['ParxHomePointSpreadPayout'] - data['ParxAwayPointSpreadPayout']
data['RiversCasinoPASpreadPayoutDiff'] = data['RiversCasinoPAHomePointSpreadPayout'] - data['RiversCasinoPAAwayPointSpreadPayout']
data['PointsBetSpreadPayoutDiff'] = data['PointsBetHomePointSpreadPayout'] - data['PointsBetAwayPointSpreadPayout']

# 3. Over/Under Features

# Over/Under Differences
data['DraftKingsOverUnderDiff'] = data['DraftKingsOverUnder'] - data['ConsensusOverUnder']
data['FanDuelOverUnderDiff'] = data['FanDuelOverUnder'] - data['ConsensusOverUnder']
data['SugarHousePAOverUnderDiff'] = data['SugarHousePAOverUnder'] - data['ConsensusOverUnder']
data['ParxOverUnderDiff'] = data['ParxOverUnder'] - data['ConsensusOverUnder']
data['RiversCasinoPAOverUnderDiff'] = data['RiversCasinoPAOverUnder'] - data['ConsensusOverUnder']
data['PointsBetOverUnderDiff'] = data['PointsBetOverUnder'] - data['ConsensusOverUnder']

# Over/Under Payout Differences
data['DraftKingsOverUnderPayoutDiff'] = data['DraftKingsOverPayout'] - data['DraftKingsUnderPayout']
data['FanDuelOverUnderPayoutDiff'] = data['FanDuelOverPayout'] - data['FanDuelUnderPayout']
data['SugarHousePAOverUnderPayoutDiff'] = data['SugarHousePAOverPayout'] - data['SugarHousePAUnderPayout']
data['ParxOverUnderPayoutDiff'] = data['ParxOverPayout'] - data['ParxUnderPayout']
data['RiversCasinoPAOverUnderPayoutDiff'] = data['RiversCasinoPAOverPayout'] - data['RiversCasinoPAUnderPayout']
data['PointsBetOverUnderPayoutDiff'] = data['PointsBetOverPayout'] - data['PointsBetUnderPayout']

# 5. Team Strength Features
N = 5
data = data.sort_values(by='DayString')

# 1. Calculate Home Team Rolling Average Score
data['HomeTeamAvgScore'] = data.groupby('HomeTeam')['HomeScore'].transform(lambda x: x.rolling(window=N, min_periods=1).mean())

# 2. Calculate Away Team Rolling Average Score
data['AwayTeamAvgScore'] = data.groupby('AwayTeam')['AwayScore'].transform(lambda x: x.rolling(window=N, min_periods=1).mean())

# 3. Calculate Home Team Rolling Average Allowed Score (optional)
data['HomeTeamAvgAllowedScore'] = data.groupby('HomeTeam')['AwayScore'].transform(lambda x: x.rolling(window=N, min_periods=1).mean())

# 4. Calculate Away Team Rolling Average Allowed Score (optional)
data['AwayTeamAvgAllowedScore'] = data.groupby('AwayTeam')['HomeScore'].transform(lambda x: x.rolling(window=N, min_periods=1).mean())

# 6. Interaction Features
# Creating interaction terms
data['ScoreDiff_vs_MoneyLineDiff'] = data['ScoreDifference'] * data['OpenerMoneyLineDiff']
data['TotalPoints_vs_SpreadDiff'] = data['TotalPoints'] * data['OpenerSpreadDiff']

# 7. Relative Performance Features
# Relative Money Line Strength
data['OpenerMoneyLineRatio'] = data['OpenerHomeMoneyLine'] / data['OpenerAwayMoneyLine']
data['ConsensusMoneyLineRatio'] = data['ConsensusHomeMoneyLine'] / data['ConsensusAwayMoneyLine']

# Relative Point Spread Strength
data['OpenerSpreadRatio'] = data['OpenerHomePointSpread'] / data['OpenerAwayPointSpread']
data['ConsensusSpreadRatio'] = data['ConsensusHomePointSpread'] / data['ConsensusAwayPointSpread']

  data['TotalPoints_vs_SpreadDiff'] = data['TotalPoints'] * data['OpenerSpreadDiff']
  data['OpenerMoneyLineRatio'] = data['OpenerHomeMoneyLine'] / data['OpenerAwayMoneyLine']
  data['ConsensusMoneyLineRatio'] = data['ConsensusHomeMoneyLine'] / data['ConsensusAwayMoneyLine']
  data['OpenerSpreadRatio'] = data['OpenerHomePointSpread'] / data['OpenerAwayPointSpread']
  data['ConsensusSpreadRatio'] = data['ConsensusHomePointSpread'] / data['ConsensusAwayPointSpread']


In [196]:
# Handing Null values
# Step 1: Group-Based Mean Imputation (based on HomeTeam and AwayTeam)
for column in data.columns[data.isnull().any()]:  # Iterate over columns with null values
    data[column] = data.groupby(['HomeTeam', 'AwayTeam'])[column].transform(lambda x: x.fillna(x.mean()))

# Step 2: Overall Median Imputation
for column in data.columns[data.isnull().any()]:  # Iterate over columns with null values
    data[column].fillna(data[column].median(), inplace=True)


In [197]:
# Extract relevant date features
data['Year'] = data['DayString'].dt.year
data['Month'] = data['DayString'].dt.month
data['Day'] = data['DayString'].dt.day
data['Weekday'] = data['DayString'].dt.weekday  # Monday=0, Sunday=6
data['WeekOfYear'] = data['DayString'].dt.isocalendar().week

# Drop the original DayString column if not needed
data = data.drop(columns=['DayString'])

# Display the updated DataFrame
print(data.head())

# Create the 'Winner' column
# If HomeTeamHasWon is True, Winner = 1 (Home team wins)
# If HomeTeamHasWon is False, Winner = 0 (Away team wins)
data['Winner'] = data['HomeTeamHasWon'].apply(lambda x: 1 if x else 0)

# Drop the original boolean target columns
data = data.drop(columns=['HomeTeamHasWon', 'AwayTeamHasWon'])

# Step 3: Convert HomeTeam and AwayTeam to dummy variables
data = pd.get_dummies(data, columns=['HomeTeam', 'AwayTeam'], drop_first=True)

    DayOfWeek AwayTeam  AwayScore  AwayTeamHasWon HomeTeam  HomeScore  \
0           3       GB         10            True      CHI          3   
13          6      PIT          3           False       NE         33   
12          6       SF         31            True       TB         17   
10          6      NYG         17           False      DAL         35   
9           6      CIN         20           False      SEA         21   

    HomeTeamHasWon  OpenerHomeMoneyLine  OpenerAwayMoneyLine  \
0            False                 -164                  144   
13            True                 -250                  200   
12           False                 -118                 -104   
10            True                 -315                  245   
9             True                 -385                  290   

    OpenerHomePointSpread  ...  TotalPoints_vs_SpreadDiff  \
0                    -3.5  ...                      -91.0   
13                   -6.0  ...                     -43

  data['Year'] = data['DayString'].dt.year
  data['Month'] = data['DayString'].dt.month
  data['Day'] = data['DayString'].dt.day
  data['Weekday'] = data['DayString'].dt.weekday  # Monday=0, Sunday=6
  data['WeekOfYear'] = data['DayString'].dt.isocalendar().week
  data['Winner'] = data['HomeTeamHasWon'].apply(lambda x: 1 if x else 0)


In [198]:
# Get columns with null values and their counts
null_columns = data.isnull().sum()

# Filter out columns that have no null values
null_columns = null_columns[null_columns > 0]

# Display the columns with their null counts
print(null_columns)

Series([], dtype: int64)


In [199]:
features_before_game = [
    # Betting Line Features
    'OpenerMoneyLineDiff',
    'ConsensusMoneyLineDiff',
    'DraftKingsMoneyLineDiff',
    'FanDuelMoneyLineDiff',
    'SugarHousePAMoneyLineDiff',
    'ParxMoneyLineDiff',
    'RiversCasinoPAMoneyLineDiff',
    'PointsBetMoneyLineDiff',
    'OpenerSpreadDiff',
    'ConsensusSpreadDiff',
    'DraftKingsSpreadDiff',
    'FanDuelSpreadDiff',
    'SugarHousePASpreadDiff',
    'ParxSpreadDiff',
    'RiversCasinoPASpreadDiff',
    'PointsBetSpreadDiff',
    'OpenerSpreadPayoutDiff',
    'ConsensusSpreadPayoutDiff',
    'DraftKingsSpreadPayoutDiff',
    'FanDuelSpreadPayoutDiff',
    'SugarHousePASpreadPayoutDiff',
    'ParxSpreadPayoutDiff',
    'RiversCasinoPASpreadPayoutDiff',
    'PointsBetSpreadPayoutDiff',

    # Over/Under Features
    'DraftKingsOverUnderDiff',
    'FanDuelOverUnderDiff',
    'SugarHousePAOverUnderDiff',
    'ParxOverUnderDiff',
    'RiversCasinoPAOverUnderDiff',
    'PointsBetOverUnderDiff',
    'DraftKingsOverUnderPayoutDiff',
    'FanDuelOverUnderPayoutDiff',
    'SugarHousePAOverUnderPayoutDiff',
    'ParxOverUnderPayoutDiff',
    'RiversCasinoPAOverUnderPayoutDiff',
    'PointsBetOverUnderPayoutDiff',

    # Team Strength Features (based on historical data)
    'HomeTeamAvgScore',
    'AwayTeamAvgScore',
    'HomeTeamAvgAllowedScore',
    'AwayTeamAvgAllowedScore',

    # Interaction Features (based on betting data)
    'ScoreDiff_vs_MoneyLineDiff',  # You would replace 'ScoreDifference' with a predictive estimate
    'TotalPoints_vs_SpreadDiff',  # You would replace 'TotalPoints' with a predictive estimate

    # Relative Performance Features
    'OpenerMoneyLineRatio',
    'ConsensusMoneyLineRatio',
    'OpenerSpreadRatio',
    'ConsensusSpreadRatio'
]

all_features = [
    # Score-Related Features (post-game)
    'ScoreDifference',  # Only available after the game
    'TotalPoints',  # Only available after the game
    'Q1_Diff',  # Only available after the game
    'Q2_Diff',  # Only available after the game
    'Q3_Diff',  # Only available after the game
    'Q4_Diff',  # Only available after the game
    'OT_Diff',  # Only available after the game

    # Betting Line Features
    'OpenerMoneyLineDiff',
    'ConsensusMoneyLineDiff',
    'DraftKingsMoneyLineDiff',
    'FanDuelMoneyLineDiff',
    'SugarHousePAMoneyLineDiff',
    'ParxMoneyLineDiff',
    'RiversCasinoPAMoneyLineDiff',
    'PointsBetMoneyLineDiff',
    'OpenerSpreadDiff',
    'ConsensusSpreadDiff',
    'DraftKingsSpreadDiff',
    'FanDuelSpreadDiff',
    'SugarHousePASpreadDiff',
    'ParxSpreadDiff',
    'RiversCasinoPASpreadDiff',
    'PointsBetSpreadDiff',
    'OpenerSpreadPayoutDiff',
    'ConsensusSpreadPayoutDiff',
    'DraftKingsSpreadPayoutDiff',
    'FanDuelSpreadPayoutDiff',
    'SugarHousePASpreadPayoutDiff',
    'ParxSpreadPayoutDiff',
    'RiversCasinoPASpreadPayoutDiff',
    'PointsBetSpreadPayoutDiff',

    # Over/Under Features
    'DraftKingsOverUnderDiff',
    'FanDuelOverUnderDiff',
    'SugarHousePAOverUnderDiff',
    'ParxOverUnderDiff',
    'RiversCasinoPAOverUnderDiff',
    'PointsBetOverUnderDiff',
    'DraftKingsOverUnderPayoutDiff',
    'FanDuelOverUnderPayoutDiff',
    'SugarHousePAOverUnderPayoutDiff',
    'ParxOverUnderPayoutDiff',
    'RiversCasinoPAOverUnderPayoutDiff',
    'PointsBetOverUnderPayoutDiff',

    # Team Strength Features (based on historical data)
    'HomeTeamAvgScore',
    'AwayTeamAvgScore',
    'HomeTeamAvgAllowedScore',
    'AwayTeamAvgAllowedScore',

    # Interaction Features
    'ScoreDiff_vs_MoneyLineDiff',
    'TotalPoints_vs_SpreadDiff',

    # Relative Performance Features
    'OpenerMoneyLineRatio',
    'ConsensusMoneyLineRatio',
    'OpenerSpreadRatio',
    'ConsensusSpreadRatio'
]


### Winner Prediction Model



> The Comprehensive Model



In [200]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.models import Model

# Assuming data is your DataFrame and 'Winner' is the target variable
# Split the data into training and testing sets
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

# Separate the target variable
y_train = train_data['Winner']
y_test = test_data['Winner']

# Normalize numerical features for all_features
scaler = StandardScaler()
train_data[all_features] = scaler.fit_transform(train_data[all_features])
test_data[all_features] = scaler.transform(test_data[all_features])

# Normalize numerical features for features_before_game
scaler_before_game = StandardScaler()
train_data_before_game = train_data[features_before_game].copy()
test_data_before_game = test_data[features_before_game].copy()
train_data_before_game[features_before_game] = scaler_before_game.fit_transform(train_data_before_game[features_before_game])
test_data_before_game[features_before_game] = scaler_before_game.transform(test_data_before_game[features_before_game])


In [201]:
from sklearn.metrics import accuracy_score

# Define the comprehensive model
input_all = Input(shape=(len(all_features),))
x = Dense(128, activation='relu')(input_all)
x = Dropout(0.5)(x)
x = Dense(64, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(32, activation='relu')(x)
output_all = Dense(1, activation='sigmoid')(x)
comprehensive_model = Model(inputs=input_all, outputs=output_all)

# Compile the comprehensive model
comprehensive_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the comprehensive model
comprehensive_model.fit(train_data[all_features], y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluate the comprehensive model
X_test = test_data[all_features].values
predictions = comprehensive_model.predict(X_test)
predicted_classes = (predictions > 0.5).astype(int)
accuracy = accuracy_score(y_test, predicted_classes)
print(f"Comprehensive Model - Accuracy: {accuracy:.4f}")

Epoch 1/10
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 18ms/step - accuracy: 0.5772 - loss: 0.7658 - val_accuracy: 0.7465 - val_loss: 0.5446
Epoch 2/10
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.6780 - loss: 0.5900 - val_accuracy: 0.7981 - val_loss: 0.4613
Epoch 3/10
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7749 - loss: 0.4983 - val_accuracy: 0.8357 - val_loss: 0.3905
Epoch 4/10
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7855 - loss: 0.4391 - val_accuracy: 0.9061 - val_loss: 0.2977
Epoch 5/10
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8434 - loss: 0.3653 - val_accuracy: 0.9343 - val_loss: 0.2324
Epoch 6/10
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8328 - loss: 0.3348 - val_accuracy: 0.9484 - val_loss: 0.1746
Epoch 7/10
[1m27/27[0m [32m━━━━━━━━━

### Transfer Learning to the Pre-Game Model

In [213]:
# Define the pre-game model with the same architecture but adapted input size
input_before_game = Input(shape=(len(features_before_game),))
x = Dense(128, activation='relu')(input_before_game)
x = Dropout(0.5)(x)
x = Dense(64, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(32, activation='relu')(x)
output_before_game = Dense(1, activation='sigmoid')(x)
before_game_model = Model(inputs=input_before_game, outputs=output_before_game)

# Extract dense and dropout layers from both models
comprehensive_model_layers = [layer for layer in comprehensive_model.layers if 'dense' in layer.name or 'dropout' in layer.name]
before_game_model_layers = [layer for layer in before_game_model.layers if 'dense' in layer.name or 'dropout' in layer.name]

# Transfer weights with compatibility check
for comp_layer, before_layer in zip(comprehensive_model_layers[1:], before_game_model_layers[1:]):
    if comp_layer.get_weights() and before_layer.get_weights() and \
       comp_layer.get_weights()[0].shape == before_layer.get_weights()[0].shape:
        before_layer.set_weights(comp_layer.get_weights())
    else:
        print(f"Skipped layer: {before_layer.name} due to incompatible shape")


# Compile the pre-game model
before_game_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the pre-game model (you can fine-tune this further)
before_game_model.fit(train_data_before_game[features_before_game], y_train, epochs=20, batch_size=32, validation_split=0.2)

# Prediction using betting odds features
X_test_pred = test_data_before_game[features_before_game]
predictions_pred = before_game_model.predict(X_test_pred)
predicted_classes_pred = (predictions_pred > 0.5).astype(int)

# Evaluate the prediction model
accuracy_pred = accuracy_score(y_test, predicted_classes_pred)

# Print the metrics for the prediction model
print(f"Prediction Model - Accuracy: {accuracy_pred:.4f}")

Skipped layer: dropout_44 due to incompatible shape
Skipped layer: dropout_45 due to incompatible shape
Epoch 1/20
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 33ms/step - accuracy: 0.5546 - loss: 0.9103 - val_accuracy: 0.7042 - val_loss: 0.5929
Epoch 2/20
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.6538 - loss: 0.7027 - val_accuracy: 0.7089 - val_loss: 0.5403
Epoch 3/20
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.6930 - loss: 0.6156 - val_accuracy: 0.7465 - val_loss: 0.5258
Epoch 4/20
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.6736 - loss: 0.5740 - val_accuracy: 0.7512 - val_loss: 0.5169
Epoch 5/20
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.7208 - loss: 0.5512 - val_accuracy: 0.7512 - val_loss: 0.5101
Epoch 6/20
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accura

### Over/Under prediction


> Comprehensive Model for Over/Under Prediction



In [203]:
# Define the over/under threshold
threshold = 45

# Create the 'OverUnder' column
# 1 if total points exceed the threshold, 0 otherwise
data['OverUnder'] = data['TotalPoints'].apply(lambda x: 1 if x > threshold else 0)

In [204]:
# Split the data into training and testing sets
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

# Separate the target variable for Over/Under prediction
y_train_over_under = train_data['OverUnder']
y_test_over_under = test_data['OverUnder']

# Normalize numerical features for all_features
train_data[all_features] = scaler.fit_transform(train_data[all_features])
test_data[all_features] = scaler.transform(test_data[all_features])

# Normalize numerical features for features_before_game
train_data_before_game[features_before_game] = scaler_before_game.fit_transform(train_data_before_game[features_before_game])
test_data_before_game[features_before_game] = scaler_before_game.transform(test_data_before_game[features_before_game])

In [205]:
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization
from tensorflow.keras.models import Model

# Define the comprehensive model for over/under prediction
input_all_over_under = Input(shape=(len(all_features),))
x = Dense(256, activation='relu')(input_all_over_under)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
x = Dense(128, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
x = Dense(64, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
x = Dense(32, activation='relu')(x)
output_all_over_under = Dense(1, activation='sigmoid')(x)
comprehensive_model_over_under = Model(inputs=input_all_over_under, outputs=output_all_over_under)

# Compile the comprehensive model
comprehensive_model_over_under.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [206]:
# Train the comprehensive over/under model
comprehensive_model_over_under.fit(train_data[all_features], y_train_over_under, epochs=20, batch_size=64, validation_split=0.2)

# Evaluate the comprehensive model
X_test = test_data[all_features].values
predictions = comprehensive_model_over_under.predict(X_test)
predicted_classes = (predictions > 0.5).astype(int)
accuracy = accuracy_score(y_test_over_under, predicted_classes)
print(f"Comprehensive Model - Accuracy: {accuracy:.4f}")

Epoch 1/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 29ms/step - accuracy: 0.5104 - loss: 0.8583 - val_accuracy: 0.6479 - val_loss: 0.6702
Epoch 2/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.6032 - loss: 0.7265 - val_accuracy: 0.7371 - val_loss: 0.6411
Epoch 3/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.6923 - loss: 0.6319 - val_accuracy: 0.7934 - val_loss: 0.6065
Epoch 4/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.7383 - loss: 0.5319 - val_accuracy: 0.8451 - val_loss: 0.5611
Epoch 5/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.7744 - loss: 0.4768 - val_accuracy: 0.8638 - val_loss: 0.5074
Epoch 6/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.7729 - loss: 0.4599 - val_accuracy: 0.9014 - val_loss: 0.4539
Epoch 7/20
[1m14/14[0m [32m━━━━━━━━━

### Define the Pre-Game Model for Over/Under Prediction

In [207]:
# Define the pre-game model for over/under prediction
input_before_game_over_under = Input(shape=(len(features_before_game),))
x = Dense(256, activation='relu')(input_before_game_over_under)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
x = Dense(128, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
x = Dense(64, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
x = Dense(32, activation='relu')(x)
output_before_game_over_under = Dense(1, activation='sigmoid')(x)
before_game_model_over_under = Model(inputs=input_before_game_over_under, outputs=output_before_game_over_under)

# Compile the pre-game model
before_game_model_over_under.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

### Transfer Weights and Train the Pre-Game Over/Under Model

In [214]:
# Transfer weights with compatibility check
for comp_layer, before_layer in zip(comprehensive_model_layers[1:], before_game_model_over_under.layers[1:]):
    if comp_layer.get_weights() and before_layer.get_weights() and \
       comp_layer.get_weights()[0].shape == before_layer.get_weights()[0].shape:
        before_layer.set_weights(comp_layer.get_weights())
    else:
        print(f"Skipped layer: {before_layer.name} due to incompatible shape")

# Train the pre-game over/under model
before_game_model_over_under.fit(train_data_before_game[features_before_game], y_train_over_under, epochs=10, batch_size=64, validation_split=0.2)

# Evaluate the pre-game over/under model
X_test_pred_over_under = test_data_before_game[features_before_game]
predictions_pred_over_under = before_game_model_over_under.predict(X_test_pred_over_under)
predicted_classes_pred_over_under = (predictions_pred_over_under > 0.5).astype(int)
accuracy_pred_over_under = accuracy_score(y_test_over_under, predicted_classes_pred_over_under)
print(f"Pre-Game Over/Under Model - Accuracy: {accuracy_pred_over_under:.4f}")

Skipped layer: dense_72 due to incompatible shape
Skipped layer: batch_normalization_16 due to incompatible shape
Skipped layer: dropout_39 due to incompatible shape
Skipped layer: dense_73 due to incompatible shape
Skipped layer: batch_normalization_17 due to incompatible shape
Epoch 1/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - accuracy: 0.7662 - loss: 0.5084 - val_accuracy: 0.6995 - val_loss: 0.5428
Epoch 2/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.7582 - loss: 0.5188 - val_accuracy: 0.7230 - val_loss: 0.5392
Epoch 3/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.7240 - loss: 0.5524 - val_accuracy: 0.7183 - val_loss: 0.5390
Epoch 4/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step - accuracy: 0.7657 - loss: 0.4848 - val_accuracy: 0.7230 - val_loss: 0.5391
Epoch 5/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1

## Confidence Scores

In [215]:
def make_predictions_with_confidence(model, X_test, threshold=0.5):
    predictions = model.predict(X_test)
    predicted_classes = (predictions > threshold).astype(int)
    confidence_scores = predictions if threshold == 0.5 else (1 - predictions)
    return predicted_classes, confidence_scores

# Making predictions for the pre-game model
predicted_winner_pred, confidence_winner_pred = make_predictions_with_confidence(before_game_model, test_data_before_game[features_before_game])
predicted_ou_pred, confidence_ou_pred = make_predictions_with_confidence(before_game_model_over_under, test_data_before_game[features_before_game])

# Data for Winner Prediction (Pre-Game Model)
data_winner_pred = {
    'Game': [f"Game {i+1}" for i in range(len(predicted_winner_pred))],
    'Winner Prediction (Pre-Game)': ['Home' if pred == 1 else 'Away' for pred in predicted_winner_pred],
    'Winner Confidence (Pre-Game)': [conf[0]*100 for conf in confidence_winner_pred]
}

# Data for Over/Under Prediction (Pre-Game Model)
data_ou_pred = {
    'Game': [f"Game {i+1}" for i in range(len(predicted_ou_pred))],
    'Over/Under Prediction (Pre-Game)': ['Over' if pred == 1 else 'Under' for pred in predicted_ou_pred],
    'Over/Under Confidence (Pre-Game)': [conf[0]*100 for conf in confidence_ou_pred]
}

# Convert the data dictionaries into DataFrames
confidence_df_winner_pred = pd.DataFrame(data_winner_pred)
confidence_df_ou_pred = pd.DataFrame(data_ou_pred)


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


In [216]:
confidence_df_winner_pred

Unnamed: 0,Game,Winner Prediction (Pre-Game),Winner Confidence (Pre-Game)
0,Game 1,Home,53.247130
1,Game 2,Home,99.811840
2,Game 3,Home,99.976087
3,Game 4,Away,6.881587
4,Game 5,Away,48.998022
...,...,...,...
261,Game 262,Home,80.337799
262,Game 263,Home,92.736888
263,Game 264,Away,47.611374
264,Game 265,Home,67.564440


In [217]:
confidence_df_ou_pred

Unnamed: 0,Game,Over/Under Prediction (Pre-Game),Over/Under Confidence (Pre-Game)
0,Game 1,Under,7.973353
1,Game 2,Under,28.667927
2,Game 3,Over,71.342808
3,Game 4,Over,67.931461
4,Game 5,Under,46.027184
...,...,...,...
261,Game 262,Over,72.114921
262,Game 263,Over,61.451262
263,Game 264,Under,8.556294
264,Game 265,Under,48.974302
