In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd

#Importing NFL stats dataset
file_path = '/content/drive/My Drive/Sports Modeling/nfl/imputed_data.csv'
data = pd.read_csv(file_path)

In [3]:
# Get the count of null values in each column
null_counts = data.isnull().sum()

# Filter the columns with null values
null_counts = null_counts[null_counts > 0]

# Display the columns with their respective null value counts
print(null_counts)

Series([], dtype: int64)


In [4]:
# Create a new column 'result' with default value 0 (for draws)
data['result'] = 0

# Set the result to 1 where home team wins
data.loc[data['home_points'] > data['away_points'], 'result'] = 1

# Set the result to -1 where away team wins
data.loc[data['home_points'] < data['away_points'], 'result'] = -1

In [5]:
print(data['result'].value_counts())

result
 1    3404
-1    2635
 0      14
Name: count, dtype: int64


In [6]:
# Display columns that are not numerical
non_numerical_data = data.select_dtypes(exclude=['int64', 'float64'])

# Display the first few rows of these non-numerical columns
print(non_numerical_data.head())

             venue_name       venue_city venue_surface venue_roof_type  \
0  Three Rivers Stadium       Pittsburgh    artificial         outdoor   
1  RingCentral Coliseum          Oakland          turf         outdoor   
2          Georgia Dome          Atlanta    artificial            dome   
3        Giants Stadium  East Rutherford    artificial         outdoor   
4     Caesars Superdome      New Orleans    artificial            dome   

  home_name  away_name home_possession_time away_possession_time  
0  Steelers     Ravens                24:53                35:07  
1   Raiders   Chargers                31:30                28:30  
2   Falcons      49ers                31:39                28:21  
3    Giants  Cardinals                31:30                28:30  
4    Saints      Lions                29:11                30:49  


In [7]:
data = data.drop(['venue_name', 'venue_city'], axis=1)  # Dropping Unnecessary Columns

In [8]:
# Convert possession time from format 'MM:SS' to total seconds
data['home_possession_time_seconds'] = data['home_possession_time'].apply(lambda x: int(x.split(':')[0]) * 60 + int(x.split(':')[1]))
data['away_possession_time_seconds'] = data['away_possession_time'].apply(lambda x: int(x.split(':')[0]) * 60 + int(x.split(':')[1]))

In [9]:
# Drop the original time columns
data = data.drop(['home_possession_time', 'away_possession_time'], axis=1)

# Now check for non-numerical columns again
non_numerical_data = data.select_dtypes(exclude=['int64', 'float64'])
print(non_numerical_data.head())


  venue_surface venue_roof_type home_name  away_name
0    artificial         outdoor  Steelers     Ravens
1          turf         outdoor   Raiders   Chargers
2    artificial            dome   Falcons      49ers
3    artificial         outdoor    Giants  Cardinals
4    artificial            dome    Saints      Lions


In [10]:
from sklearn.preprocessing import LabelEncoder

# Initialize the LabelEncoder
label_encoder = LabelEncoder()

# Apply Label Encoding to each categorical column
data['venue_surface_encoded'] = label_encoder.fit_transform(data['venue_surface'])
data['venue_roof_type_encoded'] = label_encoder.fit_transform(data['venue_roof_type'])
data['home_name_encoded'] = label_encoder.fit_transform(data['home_name'])
data['away_name_encoded'] = label_encoder.fit_transform(data['away_name'])

# Drop the original columns if you no longer need them
data = data.drop(['venue_surface', 'venue_roof_type', 'home_name', 'away_name'], axis=1)

In [11]:
print(data.columns.tolist())

['attendance', 'quarter', 'season_year', 'week_sequence', 'week_title', 'venue_capacity', 'home_used_timeouts', 'home_remaining_timeouts', 'home_points', 'away_used_timeouts', 'away_remaining_timeouts', 'away_points', 'home_avg_gain', 'home_safeties', 'home_turnovers', 'home_play_count', 'home_rush_plays', 'home_total_yards', 'home_fumbles', 'home_lost_fumbles', 'home_penalties', 'home_penalty_yards', 'home_return_yards', 'home_rushing_totals_avg_yards', 'home_rushing_totals_attempts', 'home_rushing_totals_touchdowns', 'home_rushing_totals_tlost', 'home_rushing_totals_tlost_yards', 'home_rushing_totals_yards', 'home_rushing_totals_longest', 'home_rushing_totals_longest_touchdown', 'home_rushing_totals_redzone_attempts', 'home_receiving_totals_targets', 'home_receiving_totals_receptions', 'home_receiving_totals_avg_yards', 'home_receiving_totals_yards', 'home_receiving_totals_touchdowns', 'home_receiving_totals_yards_after_catch', 'home_receiving_totals_longest', 'home_receiving_totals_

### Finding most important features


> Recursive Feature Elimination (RFE) with a RandomForestClassifier



In [11]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import RFE

# Define the model
model = RandomForestClassifier(random_state=42)

# Define X by dropping the 'result' column
X = data.drop(columns=['result'])

# Define y as the 'result' column
y = data['result']

# Define RFE and select the top 10 features
rfe = RFE(estimator=model, n_features_to_select=50, step=1)
rfe = rfe.fit(X, y)

# Get the selected features
selected_features = X.columns[rfe.support_]
print("Selected Features:", selected_features)

# Optionally, check the ranking of all features
print("Feature Ranking:", rfe.ranking_)

Selected Features: Index(['home_used_timeouts', 'home_remaining_timeouts', 'home_points',
       'away_used_timeouts', 'away_remaining_timeouts', 'away_points',
       'home_avg_gain', 'home_rush_plays', 'home_total_yards',
       'home_rushing_totals_attempts', 'home_rushing_totals_yards',
       'home_receiving_totals_avg_yards', 'home_passing_totals_attempts',
       'home_passing_totals_cmp_pct', 'home_passing_totals_rating',
       'home_passing_totals_avg_yards', 'home_kickoffs_totals_yards',
       'home_kickoffs_totals_number', 'home_kick_returns_totals_yards',
       'home_field_goals_totals_yards', 'home_field_goals_totals_avg_yards',
       'home_extra_points_kicks_totals_attempts',
       'home_extra_points_kicks_totals_made', 'home_touchdowns_total',
       'away_turnovers', 'away_rush_plays', 'away_total_yards',
       'away_return_yards', 'away_rushing_totals_attempts',
       'away_rushing_totals_yards', 'away_receiving_totals_avg_yards',
       'away_passing_totals_att

### Seleccting best features for modeling

In [122]:
# Create a list of feature ranking
feature_ranking = [18, 24, 100, 165, 177, 124, 1, 1, 1, 1, 1, 1, 1, 326, 35, 46, 1, 1, 187, 234, 173, 12, 19, 112, 1, 189,
                   208, 158, 1, 42, 79, 157, 27, 142, 1, 5, 83, 162, 76, 85, 207, 11, 204, 14, 68, 342, 284, 252, 98, 34,
                   55, 136, 117, 115, 105, 343, 292, 251, 223, 174, 9, 1, 119, 1, 160, 41, 1, 58, 1, 166, 99, 32, 72,
                   181, 77, 81, 301, 346, 363, 357, 362, 368, 322, 260, 274, 22, 168, 1, 310, 1, 2, 47, 1, 106, 340, 306,
                   359, 13, 182, 130, 293, 190, 198, 242, 268, 224, 226, 240, 307, 257, 360, 314, 361, 113, 31, 286, 1,
                   1, 40, 171, 183, 56, 96, 21, 145, 7, 120, 38, 236, 255, 80, 191, 156, 327, 169, 221, 299, 285, 305,
                   209, 344, 333, 279, 1, 350, 1, 200, 289, 311, 320, 348, 356, 365, 367, 161, 222, 123, 53, 111, 163,
                   126, 109, 176, 272, 1, 338, 302, 336, 329, 328, 214, 203, 225, 30, 15, 118, 159, 184, 45, 67, 270,
                   283, 10, 313, 1, 84, 1, 1, 212, 216, 179, 94, 1, 50, 1, 147, 205, 134, 1, 26, 74, 36, 33, 102, 1, 28,
                   172, 164, 89, 51, 202, 63, 210, 44, 39, 351, 280, 245, 167, 43, 57, 114, 65, 82, 133, 332, 317, 249,
                   213, 188, 17, 1, 110, 1, 107, 1, 1, 135, 1, 125, 92, 60, 75, 178, 1, 54, 303, 353, 364, 372, 369, 366,
                   312, 241, 254, 23, 193, 1, 300, 1, 66, 91, 1, 97, 354, 345, 323, 52, 170, 73, 315, 185, 186, 264, 256,
                   232, 247, 229, 308, 253, 319, 309, 347, 103, 4, 334, 1, 29, 49, 61, 215, 59, 86, 20, 155, 8, 108, 1,
                   263, 250, 140, 197, 122, 349, 150, 238, 324, 294, 296, 175, 325, 341, 275, 1, 330, 1, 269, 282, 297,
                   304, 318, 358, 370, 371, 129, 230, 104, 62, 78, 154, 152, 148, 153, 262, 1, 295, 291, 355, 335, 337,
                   199, 196, 237, 70, 25, 132, 180, 137, 1, 69, 273, 287, 121, 139, 90, 276, 206, 321, 339, 95, 271, 88,
                   144, 217, 141, 201, 131, 211, 352, 331, 3, 93, 235, 151, 195, 128, 259, 1, 261, 37, 281, 231, 228,
                   194, 266, 218, 220, 288, 138, 239, 233, 278, 1, 244, 64, 277, 243, 265, 192, 267, 227, 219, 298, 127,
                   248, 246, 101, 16, 48, 6, 87, 71, 116, 258, 143, 1, 1, 316, 290, 149, 146]

# Define X by dropping the 'result' column
X = data.drop(columns=['result'])

# Define y as the 'result' column
y = data['result']

# Create a DataFrame with features and their rankings
feature_rankings = pd.DataFrame({
    'Feature': X.columns,  # The feature names
    'Ranking': feature_ranking  # The rankings from RFE
})

# Sort the features by their ranking
feature_rankings = feature_rankings.sort_values(by='Ranking')

# Select features with a ranking less than or equal to 5
important_features = feature_rankings[feature_rankings['Ranking'] <= 5]['Feature']
important_features_list = important_features.tolist()

print("Selected Important Features with Ranking <= 5:\n", important_features_list)

Selected Important Features with Ranking <= 5:
 ['home_touchdowns_total', 'away_passing_totals_net_yards', 'away_efficiency_thirddown_pct', 'away_field_goals_totals_yards', 'home_receiving_totals_avg_yards', 'home_field_goals_totals_yards', 'home_field_goals_totals_avg_yards', 'away_receiving_totals_avg_yards', 'away_kick_returns_totals_yards', 'away_rushing_totals_kneel_downs', 'home_passing_totals_avg_yards', 'away_kickoffs_totals_number', 'home_kickoffs_totals_yards', 'home_passing_totals_rating', 'home_rushing_totals_kneel_downs', 'away_return_yards', 'home_passing_totals_cmp_pct', 'home_passing_totals_attempts', 'away_passing_totals_attempts', 'away_passing_totals_cmp_pct', 'away_passing_totals_sack_yards', 'away_passing_totals_rating', 'home_extra_points_kicks_totals_made', 'away_passing_totals_avg_yards', 'home_extra_points_kicks_totals_attempts', 'away_kickoffs_totals_yards', 'home_rushing_totals_yards', 'away_touchdowns_total', 'home_kickoffs_totals_number', 'away_extra_points

### Feature Engineering

In [123]:
# Listing out all the important features for feature engineering
important_features_list = ['home_touchdowns_total', 'away_passing_totals_net_yards', 'away_efficiency_thirddown_pct', 'away_field_goals_totals_yards', 'home_receiving_totals_avg_yards',
                       'home_field_goals_totals_yards', 'home_field_goals_totals_avg_yards', 'away_receiving_totals_avg_yards', 'away_kick_returns_totals_yards',
                       'away_rushing_totals_kneel_downs', 'home_passing_totals_avg_yards', 'away_kickoffs_totals_number', 'home_kickoffs_totals_yards', 'home_passing_totals_rating',
                       'home_rushing_totals_kneel_downs', 'away_return_yards', 'home_passing_totals_cmp_pct', 'home_passing_totals_attempts', 'away_passing_totals_attempts',
                       'away_passing_totals_cmp_pct', 'away_passing_totals_sack_yards', 'away_passing_totals_rating', 'home_extra_points_kicks_totals_made',
                       'away_passing_totals_avg_yards', 'home_extra_points_kicks_totals_attempts', 'away_kickoffs_totals_yards', 'home_rushing_totals_yards', 'away_touchdowns_total',
                       'home_kickoffs_totals_number', 'away_extra_points_kicks_totals_made', 'away_rushing_totals_attempts', 'away_possession_time_seconds',
                       'home_possession_time_seconds', 'away_rushing_totals_yards', 'home_used_timeouts', 'home_remaining_timeouts', 'home_points', 'away_used_timeouts',
                       'away_remaining_timeouts', 'away_points', 'home_avg_gain', 'away_total_yards', 'away_defense_totals_passes_defended', 'away_extra_points_kicks_totals_attempts',
                       'home_total_yards', 'away_rush_plays', 'away_turnovers', 'home_kick_returns_totals_yards', 'home_rush_plays', 'home_rushing_totals_attempts',
                       'home_kickoffs_totals_total_endzone', 'home_defense_totals_def_targets', 'away_field_goals_totals_made', 'home_receiving_totals_yards',
                       'attendance', 'quarter', 'season_year', 'week_sequence', 'week_title', 'venue_capacity', 'venue_location_lat', 'venue_location_lng',
                       'home_name_encoded', 'away_name_encoded', 'venue_surface_encoded','venue_roof_type_encoded', 'year', 'month', 'day']

# Dataframe 'df' contains 54 features
df = data[important_features_list]

# Historical averages over the last N games (e.g., 5 games)
df['home_avg_points_last_5'] = df.groupby('home_name_encoded')['home_points'].rolling(5).mean().reset_index(level=0, drop=True)
df['away_avg_points_last_5'] = df.groupby('away_name_encoded')['away_points'].rolling(5).mean().reset_index(level=0, drop=True)

# Recent form (e.g., percentage of maximum points in the last 5 games)
df['home_recent_form'] = df.groupby('home_name_encoded')['home_points'].rolling(5).sum().reset_index(level=0, drop=True) / 15  # Assuming 3 points for a win
df['away_recent_form'] = df.groupby('away_name_encoded')['away_points'].rolling(5).sum().reset_index(level=0, drop=True) / 15

# Scoring and Points
df['point_difference'] = df['home_points'] - df['away_points']
df['total_points'] = df['home_points'] + df['away_points']
df['home_scoring_efficiency'] = df['home_points'] / df['home_total_yards']
df['away_scoring_efficiency'] = df['away_points'] / df['away_total_yards']

# Offensive Efficiency
df['home_avg_yards_per_play'] = df['home_total_yards'] / df['home_rush_plays']
df['away_avg_yards_per_play'] = df['away_total_yards'] / df['away_rush_plays']
df['home_rush_efficiency'] = df['home_rushing_totals_yards'] / df['home_rushing_totals_attempts']
df['away_rush_efficiency'] = df['away_rushing_totals_yards'] / df['away_rushing_totals_attempts']

# Defensive Metrics
df['home_defense_effectiveness'] = df['away_total_yards'] / df['home_defense_totals_def_targets']
df['away_defense_effectiveness'] = df['home_total_yards'] / df['away_defense_totals_passes_defended']

# Special Teams Performance
df['home_kick_return_yards_per_attempt'] = df['home_kick_returns_totals_yards'] / df['home_kickoffs_totals_number']
df['away_kick_return_yards_per_attempt'] = df['away_kick_returns_totals_yards'] / df['away_kickoffs_totals_number']

# Possession and Time Management
df['possession_difference'] = df['home_possession_time_seconds'] - df['away_possession_time_seconds']
df['home_time_per_play'] = df['home_possession_time_seconds'] / (df['home_rush_plays'] + df['home_passing_totals_attempts'])
df['away_time_per_play'] = df['away_possession_time_seconds'] / (df['away_rush_plays'] + df['away_passing_totals_attempts'])

# Turnover Analysis
df['turnover_difference'] = df['away_turnovers'] - df['home_rushing_totals_attempts']

# Third Down Efficiency
df['third_down_success_difference'] = df['home_rush_plays'] - df['away_efficiency_thirddown_pct']

# Red Zone Performance
df['home_red_zone_efficiency'] = df['home_touchdowns_total'] / (df['home_rush_plays'] + df['home_passing_totals_attempts'])
df['away_red_zone_efficiency'] = df['away_touchdowns_total'] / (df['away_rush_plays'] + df['away_passing_totals_attempts'])

# Yardage Metrics
df['total_yards_difference'] = df['home_total_yards'] - df['away_total_yards']
df['rushing_yards_difference'] = df['home_rushing_totals_yards'] - df['away_rushing_totals_yards']
df['passing_yards_difference'] = df['home_passing_totals_avg_yards'] - df['away_passing_totals_avg_yards']

# Timeouts and Time Management
df['home_timeouts_left_ratio'] = df['home_remaining_timeouts'] / df['home_used_timeouts']
df['away_timeouts_left_ratio'] = df['away_remaining_timeouts'] / df['away_used_timeouts']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['home_avg_points_last_5'] = df.groupby('home_name_encoded')['home_points'].rolling(5).mean().reset_index(level=0, drop=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['away_avg_points_last_5'] = df.groupby('away_name_encoded')['away_points'].rolling(5).mean().reset_index(level=0, drop=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_gu

In [124]:
print(df.columns.tolist())

['home_touchdowns_total', 'away_passing_totals_net_yards', 'away_efficiency_thirddown_pct', 'away_field_goals_totals_yards', 'home_receiving_totals_avg_yards', 'home_field_goals_totals_yards', 'home_field_goals_totals_avg_yards', 'away_receiving_totals_avg_yards', 'away_kick_returns_totals_yards', 'away_rushing_totals_kneel_downs', 'home_passing_totals_avg_yards', 'away_kickoffs_totals_number', 'home_kickoffs_totals_yards', 'home_passing_totals_rating', 'home_rushing_totals_kneel_downs', 'away_return_yards', 'home_passing_totals_cmp_pct', 'home_passing_totals_attempts', 'away_passing_totals_attempts', 'away_passing_totals_cmp_pct', 'away_passing_totals_sack_yards', 'away_passing_totals_rating', 'home_extra_points_kicks_totals_made', 'away_passing_totals_avg_yards', 'home_extra_points_kicks_totals_attempts', 'away_kickoffs_totals_yards', 'home_rushing_totals_yards', 'away_touchdowns_total', 'home_kickoffs_totals_number', 'away_extra_points_kicks_totals_made', 'away_rushing_totals_attemp

In [125]:
# List of features available before game starts
Before_game_features = [
    'attendance', 'quarter', 'season_year', 'week_sequence', 'week_title', 'venue_capacity', 'venue_location_lat', 'venue_location_lng',
    'home_name_encoded', 'away_name_encoded', 'venue_surface_encoded','venue_roof_type_encoded', 'year', 'month', 'day']

# List of all features, it included engineered features and features which will be avaiable before game starts
# Features which are commented have infinite values and so are not considered for modeling
all_features = [
    'home_avg_points_last_5', 'away_avg_points_last_5', 'home_recent_form', 'away_recent_form',
    #'point_difference', 'total_points',
    'home_scoring_efficiency', 'away_scoring_efficiency',
    'home_avg_yards_per_play', 'away_avg_yards_per_play', 'home_rush_efficiency', 'away_rush_efficiency',
    #'home_defense_effectiveness', 'away_defense_effectiveness', 'home_kick_return_yards_per_attempt','away_kick_return_yards_per_attempt',
    #'possession_difference',
    'home_time_per_play',
    'away_time_per_play', 'turnover_difference', 'third_down_success_difference', 'home_red_zone_efficiency',
    'away_red_zone_efficiency', 'total_yards_difference', 'rushing_yards_difference',
    'passing_yards_difference',
    #'home_timeouts_left_ratio', 'away_timeouts_left_ratio',
    'attendance', 'quarter', 'season_year', 'week_sequence', 'week_title', 'venue_capacity', 'venue_location_lat', 'venue_location_lng',
    'home_name_encoded', 'away_name_encoded', 'venue_surface_encoded','venue_roof_type_encoded', 'year', 'month', 'day']

In [126]:
# X is the DataFrame containing all features
X = df[all_features]

# Check for infinite values in X
infinite_mask = np.isinf(X)

# Get the count of infinite values for each column
inf_counts = infinite_mask.sum(axis=0)

# Filter out columns with infinite values and get the counts
inf_columns_with_counts = inf_counts[inf_counts > 0]

# Print the results
print("Columns with infinite values and their counts in X:")
print(inf_columns_with_counts)

Columns with infinite values and their counts in X:
Series([], dtype: int64)


In [127]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical
from keras.regularizers import l2
from keras.callbacks import EarlyStopping
from keras.layers import Dropout
from sklearn.utils import class_weight

# Features and target
X = df[all_features]
y = data['result']

# One-hot encode the target variable (result)
y = to_categorical(y + 1, num_classes=3)  # Shift to make classes 0, 1, 2

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Check for infinite values and replace them with NaN
X_train.replace([np.inf, -np.inf], np.nan, inplace=True)
X_test.replace([np.inf, -np.inf], np.nan, inplace=True)


# Option 2: Impute NaN values (mean imputation is one approach)
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(strategy='mean')
X_train = imputer.fit_transform(X_train)
X_test = imputer.transform(X_test)

# Normalize the feature data again
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#To avoid overfitting, adding dropout layers and regularization
model = Sequential()
model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.01), input_shape=(X_train.shape[1],)))
model.add(Dropout(0.5))  # Add dropout to reduce overfitting
model.add(Dense(32, activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.5))
model.add(Dense(16, activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.5))
model.add(Dense(3, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

#For imbalanced dataset using class_weight
class_weights = class_weight.compute_class_weight('balanced', classes=np.unique(np.argmax(y_train, axis=1)), y=np.argmax(y_train, axis=1))
class_weights = dict(enumerate(class_weights))

# Train the model
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping], class_weight=class_weights)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy:.2f}')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.4700 - loss: 2.4297 - val_accuracy: 0.8324 - val_loss: 1.3936
Epoch 2/10
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6576 - loss: 1.6138 - val_accuracy: 0.8728 - val_loss: 1.1011
Epoch 3/10
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7344 - loss: 1.9680 - val_accuracy: 0.8745 - val_loss: 1.0050
Epoch 4/10
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7473 - loss: 1.8666 - val_accuracy: 0.8671 - val_loss: 0.9498
Epoch 5/10
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7629 - loss: 1.3172 - val_accuracy: 0.8687 - val_loss: 0.9072
Epoch 6/10
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7727 - loss: 1.1277 - val_accuracy: 0.8745 - val_loss: 0.8727
Epoch 7/10
[1m152/152[0m 

In [128]:
from sklearn.metrics import confusion_matrix, classification_report

# Make predictions on the test set
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)  # Convert one-hot encoded predictions back to class labels
y_test_classes = np.argmax(y_test, axis=1)  # Convert one-hot encoded test labels back to class labels

# Confusion matrix
cm = confusion_matrix(y_test_classes, y_pred_classes)
print("Confusion Matrix:")
print(cm)

# Classification report
print("Classification Report:")
print(classification_report(y_test_classes, y_pred_classes))

[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
Confusion Matrix:
[[442  26  31]
 [  0   3   0]
 [ 57  39 613]]
Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.89      0.89       499
           1       0.04      1.00      0.08         3
           2       0.95      0.86      0.91       709

    accuracy                           0.87      1211
   macro avg       0.63      0.92      0.63      1211
weighted avg       0.92      0.87      0.90      1211



### Recreating model using Focal Loss


> To address class imbalance by down-weighting easy examples and focusing training on harder



In [143]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.regularizers import l2
from keras.callbacks import EarlyStopping
from sklearn.utils import class_weight
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
import tensorflow as tf

# Features and target
X = df[all_features]
y = data['result']

# One-hot encode the target variable (result)
y = to_categorical(y + 1, num_classes=3)  # Shift to make classes 0, 1, 2

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Check for infinite values and replace them with NaN
X_train.replace([np.inf, -np.inf], np.nan, inplace=True)
X_test.replace([np.inf, -np.inf], np.nan, inplace=True)

# Impute NaN values (mean imputation)
imputer = SimpleImputer(strategy='mean')
X_train = imputer.fit_transform(X_train)
X_test = imputer.transform(X_test)

# Normalize the feature data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define focal loss function
def focal_loss(gamma=2., alpha=.25):
    def focal_loss_fixed(y_true, y_pred):
        eps = tf.keras.backend.epsilon()
        y_pred = tf.clip_by_value(y_pred, eps, 1. - eps)
        y_true = tf.cast(y_true, tf.float32)
        alpha_t = y_true * alpha + (tf.keras.backend.ones_like(y_true) - y_true) * (1 - alpha)
        p_t = y_true * y_pred + (tf.keras.backend.ones_like(y_true) - y_true) * (tf.keras.backend.ones_like(y_true) - y_pred)
        fl = - alpha_t * tf.keras.backend.pow((tf.keras.backend.ones_like(y_true) - p_t), gamma) * tf.keras.backend.log(p_t)
        return tf.keras.backend.mean(fl)
    return focal_loss_fixed

#To avoid overfitting, adding dropout layers and regularization
model = Sequential()
model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.01), input_shape=(X_train.shape[1],)))
model.add(Dropout(0.3))  # Adjusted dropout rate
model.add(Dense(32, activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.3))
model.add(Dense(16, activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.3))
model.add(Dense(3, activation='softmax'))

# Compile the model with focal loss
model.compile(optimizer='adam', loss=focal_loss(gamma=2., alpha=.25), metrics=['accuracy'])

# Calculate class weights for imbalanced dataset
class_weights = class_weight.compute_class_weight('balanced', classes=np.unique(np.argmax(y_train, axis=1)), y=np.argmax(y_train, axis=1))
class_weights = dict(enumerate(class_weights))

# Train the model
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)  # Increased patience
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping], class_weight=class_weights)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy:.2f}')

# Generate confusion matrix and classification report
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_test, axis=1)

print("Confusion Matrix:")
print(confusion_matrix(y_true_classes, y_pred_classes))

print("\nClassification Report:")
print(classification_report(y_true_classes, y_pred_classes))

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.5228 - loss: 0.8707 - val_accuracy: 0.8464 - val_loss: 0.3057
Epoch 2/50
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7661 - loss: 0.2321 - val_accuracy: 0.8893 - val_loss: 0.0910
Epoch 3/50
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8417 - loss: 0.0827 - val_accuracy: 0.8993 - val_loss: 0.0477
Epoch 4/50
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8735 - loss: 0.0496 - val_accuracy: 0.9125 - val_loss: 0.0377
Epoch 5/50
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8650 - loss: 0.0464 - val_accuracy: 0.9141 - val_loss: 0.0356
Epoch 6/50
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8615 - loss: 0.0451 - val_accuracy: 0.9092 - val_loss: 0.0341
Epoch 7/50
[1m152/152[0m [32m━━━━━━━

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Function to Retrieve Historical Data


In [157]:
def retrieve_historical_data(home_team, away_team, game_date, historical_df, lookback_games=5):
    # Unpack the game_date tuple
    game_year, game_month, game_day = game_date

    # Filter past games for the home and away teams before the given game date
    past_games_home = historical_df[
        (historical_df['home_name_encoded'] == home_team) &
        (
            (historical_df['year'] < game_year) |
            ((historical_df['year'] == game_year) & (historical_df['month'] < game_month)) |
            ((historical_df['year'] == game_year) & (historical_df['month'] == game_month) & (historical_df['day'] < game_day))
        )
    ].sort_values(by=['year', 'month', 'day'], ascending=False).head(lookback_games)

    past_games_away = historical_df[
        (historical_df['away_name_encoded'] == away_team) &
        (
            (historical_df['year'] < game_year) |
            ((historical_df['year'] == game_year) & (historical_df['month'] < game_month)) |
            ((historical_df['year'] == game_year) & (historical_df['month'] == game_month) & (historical_df['day'] < game_day))
        )
    ].sort_values(by=['year', 'month', 'day'], ascending=False).head(lookback_games)

    # Calculate historical features (unchanged)
    home_avg_points_last_5 = past_games_home['home_points'].mean()
    away_avg_points_last_5 = past_games_away['away_points'].mean()
    home_recent_form = past_games_home['home_points'].sum() / (lookback_games * 3)  # Assuming 3 points per win
    away_recent_form = past_games_away['away_points'].sum() / (lookback_games * 3)

    home_scoring_efficiency = past_games_home['home_points'].sum() / past_games_home['home_total_yards'].sum()
    away_scoring_efficiency = past_games_away['away_points'].sum() / past_games_away['away_total_yards'].sum()

    home_avg_yards_per_play = past_games_home['home_total_yards'].sum() / past_games_home['home_rush_plays'].sum()
    away_avg_yards_per_play = past_games_away['away_total_yards'].sum() / past_games_away['away_rush_plays'].sum()

    home_rush_efficiency = past_games_home['home_rushing_totals_yards'].sum() / past_games_home['home_rushing_totals_attempts'].sum()
    away_rush_efficiency = past_games_away['away_rushing_totals_yards'].sum() / past_games_away['away_rushing_totals_attempts'].sum()

    #home_defense_effectiveness = past_games_away['away_total_yards'].sum() / past_games_home['home_defense_totals_def_targets'].sum()
    #away_defense_effectiveness = past_games_home['home_total_yards'].sum() / past_games_away['away_defense_totals_passes_defended'].sum()

    #home_kick_return_yards_per_attempt = past_games_home['home_kick_returns_totals_yards'].sum() / past_games_home['home_kickoffs_totals_number'].sum()
    #away_kick_return_yards_per_attempt = past_games_away['away_kick_returns_totals_yards'].sum() / past_games_away['away_kickoffs_totals_number'].sum()

    home_time_per_play = past_games_home['home_possession_time_seconds'].sum() / (past_games_home['home_rush_plays'].sum() + past_games_home['home_passing_totals_attempts'].sum())
    away_time_per_play = past_games_away['away_possession_time_seconds'].sum() / (past_games_away['away_rush_plays'].sum() + past_games_away['away_passing_totals_attempts'].sum())

    turnover_difference = past_games_away['away_turnovers'].sum() - past_games_home['home_rushing_totals_attempts'].sum()

    third_down_success_difference = past_games_home['home_rush_plays'].sum() - past_games_away['away_efficiency_thirddown_pct'].sum()

    home_red_zone_efficiency = past_games_home['home_touchdowns_total'].sum() / (past_games_home['home_rush_plays'].sum() + past_games_home['home_passing_totals_attempts'].sum())
    away_red_zone_efficiency = past_games_away['away_touchdowns_total'].sum() / (past_games_away['away_rush_plays'].sum() + past_games_away['away_passing_totals_attempts'].sum())

    total_yards_difference = past_games_home['home_total_yards'].sum() - past_games_away['away_total_yards'].sum()
    rushing_yards_difference = past_games_home['home_rushing_totals_yards'].sum() - past_games_away['away_rushing_totals_yards'].sum()
    passing_yards_difference = past_games_home['home_passing_totals_avg_yards'].sum() - past_games_away['away_passing_totals_avg_yards'].sum()

   #home_timeouts_left_ratio = past_games_home['home_remaining_timeouts'].sum() / past_games_home['home_used_timeouts'].sum()
   #away_timeouts_left_ratio = past_games_away['away_remaining_timeouts'].sum() / past_games_away['away_used_timeouts'].sum()

    # Combine all historical features into a dictionary
    historical_features = {
        'home_avg_points_last_5': home_avg_points_last_5,
        'away_avg_points_last_5': away_avg_points_last_5,
        'home_recent_form': home_recent_form,
        'away_recent_form': away_recent_form,
        'home_scoring_efficiency': home_scoring_efficiency,
        'away_scoring_efficiency': away_scoring_efficiency,
        'home_avg_yards_per_play': home_avg_yards_per_play,
        'away_avg_yards_per_play': away_avg_yards_per_play,
        'home_rush_efficiency': home_rush_efficiency,
        'away_rush_efficiency': away_rush_efficiency,
        #'home_defense_effectiveness': home_defense_effectiveness,
        #'away_defense_effectiveness': away_defense_effectiveness,
        #'home_kick_return_yards_per_attempt': home_kick_return_yards_per_attempt,
        #'away_kick_return_yards_per_attempt': away_kick_return_yards_per_attempt,
        'home_time_per_play': home_time_per_play,
        'away_time_per_play': away_time_per_play,
        'turnover_difference': turnover_difference,
        'third_down_success_difference': third_down_success_difference,
        'home_red_zone_efficiency': home_red_zone_efficiency,
        'away_red_zone_efficiency': away_red_zone_efficiency,
        'total_yards_difference': total_yards_difference,
        'rushing_yards_difference': rushing_yards_difference,
        'passing_yards_difference': passing_yards_difference,
        #'home_timeouts_left_ratio': home_timeouts_left_ratio,
        #'away_timeouts_left_ratio': away_timeouts_left_ratio
    }

    return historical_features


### Combine Before-game Features with Historical Features


In [158]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

def create_feature_vector(home_team, away_team, before_game_features, game_date, historical_df, scaler):
    # Retrieve historical features
    historical_features = retrieve_historical_data(home_team, away_team, game_date, historical_df)

    # Combine before-game features with historical features
    feature_vector = {**historical_features, **before_game_features}

    # Convert to DataFrame for model input
    feature_vector_df = pd.DataFrame([feature_vector])

    # Check for infinite values and replace them with NaN
    feature_vector_df.replace([np.inf, -np.inf], np.nan, inplace=True)

    # Impute NaN values (mean imputation)
    imputer = SimpleImputer(strategy='mean')
    feature_vector_imputed = pd.DataFrame(imputer.fit_transform(feature_vector_df), columns=feature_vector_df.columns)

    # Scale the features
    feature_vector_scaled = pd.DataFrame(scaler.transform(feature_vector_imputed), columns=feature_vector_imputed.columns)

    return feature_vector_scaled

### Prediction Function


In [159]:
def predict_game_outcome(home_team, away_team, before_game_features, game_date, historical_df, model, scaler):
    # Create the feature vector
    feature_vector_df = create_feature_vector(home_team, away_team, before_game_features, game_date, historical_df, scaler)

    # Make the prediction
    prediction = model.predict(feature_vector_df)

    # Get the index of the highest probability class
    predicted_class = np.argmax(prediction, axis=1)[0]  # Extract the class index (0, 1, or 2)

    # Map the predicted class index to the corresponding label
    prediction_label = "Home Win" if predicted_class == 2 else "Away Win" if predicted_class == 0 else "Draw"
    return prediction_label

### Creating Test Dataset

In [160]:
import pandas as pd

# Define the list of columns you want to keep
Before_game_features = [
    'attendance', 'quarter', 'season_year', 'week_sequence', 'week_title', 'venue_capacity',
    'venue_location_lat', 'venue_location_lng', 'home_name_encoded', 'away_name_encoded',
    'venue_surface_encoded', 'venue_roof_type_encoded', 'year', 'month', 'day', 'result'
]

# Select only the columns from Before_game_features
df_filtered = data[Before_game_features]

# Sort by a date column or sequence column to ensure the most recent rows are at the bottom
df_filtered_sorted = df_filtered.sort_values(by=['season_year', 'month', 'day'], ascending=[True, True, True])

# Take the most recent 5% of the rows
recent_5_percent_index = int(len(df_filtered_sorted) * 0.95)
X_test_recent = df_filtered_sorted.iloc[recent_5_percent_index:]

# Reset the index of X_test
X_test_recent.reset_index(drop=True, inplace=True)

# Display X_test to verify the result
X_test_recent.head()

Unnamed: 0,attendance,quarter,season_year,week_sequence,week_title,venue_capacity,venue_location_lat,venue_location_lng,home_name_encoded,away_name_encoded,venue_surface_encoded,venue_roof_type_encoded,year,month,day,result
0,67431.0,4.0,2022.0,15.0,15.0,67895.0,41.506054,-81.700004,5,26,1,1,2022.0,12.0,17.0,1
1,70794.0,4.0,2022.0,15.0,15.0,71608.0,42.773826,-78.786589,3,13,0,1,2022.0,12.0,18.0,1
2,70541.0,5.0,2022.0,15.0,15.0,72220.0,29.684735,-95.410725,31,9,0,2,2022.0,12.0,18.0,-1
3,73548.0,4.0,2022.0,15.0,15.0,74867.0,35.225937,-80.853133,22,30,0,1,2022.0,12.0,18.0,-1
4,75076.0,4.0,2022.0,15.0,15.0,82500.0,40.813611,-74.074444,19,20,0,1,2022.0,12.0,18.0,-1


### Making Predictions

In [161]:
import pandas as pd
from datetime import datetime

# List to store the predictions
predictions = []

# Iterate over each row in X_test
for index, row in X_test_recent.iterrows():
    # Extract the relevant features for the prediction
    before_game_features = {
        'attendance': row['attendance'],
        'quarter': row['quarter'],
        'season_year': row['season_year'],
        'week_sequence': row['week_sequence'],
        'week_title': row['week_title'],
        'venue_capacity': row['venue_capacity'],
        'venue_location_lat': row['venue_location_lat'],
        'venue_location_lng': row['venue_location_lng'],
        'home_name_encoded': row['home_name_encoded'],
        'away_name_encoded': row['away_name_encoded'],
        'venue_surface_encoded': row['venue_surface_encoded'],
        'venue_roof_type_encoded': row['venue_roof_type_encoded'],
        'year': row['year'],
        'month': row['month'],
        'day': row['day']
    }

    # Construct the game_date from year, month, and day
    game_date = (row['year'], row['month'], row['day'])  # Just a tuple now

    # Predict the game outcome using the extracted features
    prediction_label = predict_game_outcome(
        row['home_name_encoded'],
        row['away_name_encoded'],
        before_game_features,
        game_date,
        df,  #historical data DataFrame
        model,
        scaler=scaler
    )

    # Append the prediction to the list
    predictions.append(prediction_label)

# Add the predictions to X_test DataFrame for comparison
X_test_recent['predicted_outcome'] = predictions

# Optionally, print the DataFrame to see the predictions
print(X_test_recent[['home_name_encoded', 'away_name_encoded', 'predicted_outcome', 'result']])


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
     home_name_encoded  away_name_encoded predicted_outcome  result
0                    5                 26          Home Win       1
1                    3                 13          Home Win       1
2                   31                  9          Away Win      -1
3                   22                 30          Home Win      -1
4                   19                 20          Home Win      -1
..                 ...                ...               ...     ...
298                 31                 32          Home Win       1
299                 26                 13          Home Win       1
300                 29                 30          Home Win      -1
301                  9                  2          Away Win       1
302                  4                  8          Home Win       1

[303 rows x 4 columns]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test_recent['predicted_outcome'] = predictions


In [162]:
from sklearn.metrics import accuracy_score

# Adjust the 'result' column to match the labels
# Convert -1 to 0, 0 stays the same, and 1 to 2
X_test_recent['result'] = X_test_recent['result'].map({-1: 0, 0: 1, 1: 2})

# Print the DataFrame to verify the conversion and predictions
print(X_test_recent[['home_name_encoded', 'away_name_encoded', 'predicted_outcome', 'result']])

# Calculate the accuracy score
# Convert 'predicted_outcome' from labels ("Home Win", "Away Win", "Draw") to numerical values (2, 0, 1)
X_test_recent['predicted_outcome_num'] = X_test_recent['predicted_outcome'].map({"Home Win": 2, "Away Win": 0, "Draw": 1})

# Calculate accuracy
accuracy = accuracy_score(X_test_recent['result'], X_test_recent['predicted_outcome_num'])

print(f"Accuracy: {accuracy:.2f}")

     home_name_encoded  away_name_encoded predicted_outcome  result
0                    5                 26          Home Win       2
1                    3                 13          Home Win       2
2                   31                  9          Away Win       0
3                   22                 30          Home Win       0
4                   19                 20          Home Win       0
..                 ...                ...               ...     ...
298                 31                 32          Home Win       2
299                 26                 13          Home Win       2
300                 29                 30          Home Win       0
301                  9                  2          Away Win       2
302                  4                  8          Home Win       2

[303 rows x 4 columns]
Accuracy: 0.59


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test_recent['result'] = X_test_recent['result'].map({-1: 0, 0: 1, 1: 2})
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test_recent['predicted_outcome_num'] = X_test_recent['predicted_outcome'].map({"Home Win": 2, "Away Win": 0, "Draw": 1})


In [163]:
print(X_test_recent['predicted_outcome_num'].value_counts())


predicted_outcome_num
2    228
0     75
Name: count, dtype: int64


### Transfer Learning


> Building Comprehensive Model



In [172]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Model
from keras.layers import Dense, Dropout, Input
from keras.utils import to_categorical
from keras.regularizers import l2
from sklearn.impute import SimpleImputer
from keras.callbacks import EarlyStopping

# Features and target
X = df[all_features]
y = data['result']

# One-hot encode the target variable (result)
y = to_categorical(y + 1, num_classes=3)  # Shift to make classes 0, 1, 2

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Check for infinite values and replace them with NaN
X_train.replace([np.inf, -np.inf], np.nan, inplace=True)
X_test.replace([np.inf, -np.inf], np.nan, inplace=True)

# Impute NaN values (mean imputation)
imputer = SimpleImputer(strategy='mean')
X_train = imputer.fit_transform(X_train)
X_test = imputer.transform(X_test)

# Normalize the feature data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define the comprehensive model using Functional API
input_all = Input(shape=(X_train.shape[1],))
x = Dense(128, activation='relu')(input_all)
x = Dropout(0.5)(x)
x = Dense(64, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(32, activation='relu')(x)
output_all = Dense(3, activation='softmax')(x)  # Assuming 3 classes as per y = to_categorical
comprehensive_model = Model(inputs=input_all, outputs=output_all)

# Compile the model
comprehensive_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model with early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
comprehensive_model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping])

# Evaluate the model
loss, accuracy = comprehensive_model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy:.2f}')

Epoch 1/10
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.7426 - loss: 0.5532 - val_accuracy: 0.8976 - val_loss: 0.2297
Epoch 2/10
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8946 - loss: 0.2650 - val_accuracy: 0.9191 - val_loss: 0.1884
Epoch 3/10
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9157 - loss: 0.2084 - val_accuracy: 0.9306 - val_loss: 0.1618
Epoch 4/10
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9235 - loss: 0.1995 - val_accuracy: 0.9381 - val_loss: 0.1476
Epoch 5/10
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9320 - loss: 0.1804 - val_accuracy: 0.9513 - val_loss: 0.1310
Epoch 6/10
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9421 - loss: 0.1486 - val_accuracy: 0.9546 - val_loss: 0.1146
Epoch 7/10
[1m152/152[0m 

### Transfer learning from compreensive model to prediction model

In [173]:
from keras.models import Model
from keras.layers import Input, Dense, Dropout
from keras.utils import to_categorical
from sklearn.preprocessing import StandardScaler
from keras.callbacks import EarlyStopping

# Assuming 'data' and 'Before_game_features' are defined and correct
X_new = data[Before_game_features]
y = data['result']
y_new = to_categorical(y + 1, num_classes=3)  # Ensure the correct number of classes

new_scaler = StandardScaler()
X_new = new_scaler.fit_transform(X_new)

# Define new model architecture
input_pred = Input(shape=(X_new.shape[1],))
x_pred = Dense(128, activation='relu')(input_pred)
x_pred = Dropout(0.5)(x_pred)
x_pred = Dense(64, activation='relu')(x_pred)
x_pred = Dropout(0.5)(x_pred)
x_pred = Dense(32, activation='relu')(x_pred)
output_pred = Dense(3, activation='softmax')(x_pred)
prediction_model = Model(inputs=input_pred, outputs=output_pred)

# Filter only 'Dense' layers, skipping the first one
comprehensive_model_dense_layers = [layer for layer in comprehensive_model.layers if 'dense' in layer.name][1:]
prediction_model_dense_layers = [layer for layer in prediction_model.layers if 'dense' in layer.name][1:]

# Transfer weights for compatible layers
for comp_layer, pred_layer in zip(comprehensive_model_dense_layers, prediction_model_dense_layers):
    if comp_layer.get_config()['units'] == pred_layer.get_config()['units']:
        pred_layer.set_weights(comp_layer.get_weights())
    else:
        print(f"Skipping weight transfer for layer {comp_layer.name} due to configuration mismatch.")

# Compile and train the new model
prediction_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
prediction_model.fit(X_new, y_new, epochs=20, batch_size=32, validation_split=0.2, callbacks=[early_stopping])

# Evaluate the model
loss, accuracy = prediction_model.evaluate(X_new, y_new)
print(f'New Model Accuracy: {accuracy:.2f}')

Epoch 1/20
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.5518 - loss: 0.7776 - val_accuracy: 0.5301 - val_loss: 0.7269
Epoch 2/20
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5637 - loss: 0.7044 - val_accuracy: 0.5301 - val_loss: 0.7320
Epoch 3/20
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5536 - loss: 0.7011 - val_accuracy: 0.5301 - val_loss: 0.7273
Epoch 4/20
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.5596 - loss: 0.7028 - val_accuracy: 0.5301 - val_loss: 0.7276
Epoch 5/20
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.5602 - loss: 0.7022 - val_accuracy: 0.5301 - val_loss: 0.7404
Epoch 6/20
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.5766 - loss: 0.6924 - val_accuracy: 0.5301 - val_loss: 0.7272
[1m190/190[0m [32m━━━━━━━

In [174]:
from keras.layers import Dense, Dropout
from keras.models import Sequential
from keras.optimizers import Adam

# Redefine new model
new_model = Sequential()
# Reinitialize the first layer to match the new feature size
new_model.add(Dense(64, activation='relu', input_shape=(X_new.shape[1],)))
# Add other layers, possibly transferring weights from the base model if compatible
new_model.add(Dense(32, activation='relu'))
new_model.add(Dropout(0.3))
new_model.add(Dense(16, activation='relu'))
new_model.add(Dropout(0.3))
new_model.add(Dense(3, activation='softmax'))

# Compile and train the new model
new_model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
new_model.fit(X_new, y_new, epochs=20, batch_size=32,  validation_split=0.2)

# Evaluate the new model
loss, accuracy = new_model.evaluate(X_new, y_new)
print(f'New Model Accuracy: {accuracy:.2f}')

Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.4430 - loss: 0.9570 - val_accuracy: 0.5301 - val_loss: 0.7260
Epoch 2/20
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5445 - loss: 0.7209 - val_accuracy: 0.5301 - val_loss: 0.7280
Epoch 3/20
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5569 - loss: 0.7083 - val_accuracy: 0.5301 - val_loss: 0.7330
Epoch 4/20
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5581 - loss: 0.7155 - val_accuracy: 0.5301 - val_loss: 0.7429
Epoch 5/20
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5442 - loss: 0.7015 - val_accuracy: 0.5301 - val_loss: 0.7421
Epoch 6/20
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5796 - loss: 0.6990 - val_accuracy: 0.5301 - val_loss: 0.7464
Epoch 7/20
[1m152/152[0m [32m━━━━━━━

In [175]:
from sklearn.ensemble import RandomForestClassifier
import numpy as np

# Use RandomForest to assess feature importance
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_new, np.argmax(y_new, axis=1))  # Assuming y_new is one-hot encoded

# Extract feature importances
importances = rf_model.feature_importances_
feature_names = Before_game_features

# Sort the feature importances in descending order
indices = np.argsort(importances)[::-1]

# Print feature rankings and importance scores
print("Feature Ranking and Importance Scores:")
for i in range(len(feature_names)):
    print(f"{i + 1}. {feature_names[indices[i]]}: {importances[indices[i]]:.4f}")

Feature Ranking and Importance Scores:
1. attendance: 0.3305
2. away_name_encoded: 0.2176
3. day: 0.2089
4. season_year: 0.1036
5. year: 0.1026
6. venue_surface_encoded: 0.0367


### Selecting most relevant pre-game features

In [176]:
from sklearn.decomposition import PCA
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.utils import to_categorical
from sklearn.preprocessing import StandardScaler

# Step 1: Preprocess the Before_game_features
Before_game_features = ['attendance', 'season_year','away_name_encoded', 'venue_surface_encoded', 'year', 'day']

X_new = data[Before_game_features]  # Assuming 'data' contains your dataset with all features

# One-hot encode the target variable (result)
y = data['result']
y_new = to_categorical(y + 1, num_classes=3)  # Shift to make classes 0, 1, 2

# Normalize the data using a new scaler
new_scaler = StandardScaler()
X_new = new_scaler.fit_transform(X_new)

# Step 2: Apply PCA
pca = PCA(n_components=5)  # Reduce to 10 components (adjust based on variance explained or model performance)
X_pca = pca.fit_transform(X_new)

# Step 3: Create a new model architecture
input_shape = X_pca.shape[1]

# Define the new model architecture
new_model = Sequential()
new_model.add(Dense(128, activation='relu', input_shape=(input_shape,)))
new_model.add(Dropout(0.3))  # Add dropout to prevent overfitting
new_model.add(Dense(64, activation='relu'))
new_model.add(Dropout(0.3))
new_model.add(Dense(32, activation='relu'))
new_model.add(Dense(3, activation='softmax'))

# Step 4: Transfer learning
# Load weights from the base model except for the first layer
# Transfer learning: Load weights from the base model, skipping layers where the shapes don't match
for i in range(1, min(len(new_model.layers), len(model.layers))):  # Ensure matching layer indices
    try:
        if new_model.layers[i].get_weights()[0].shape == model.layers[i].get_weights()[0].shape:
            new_model.layers[i].set_weights(model.layers[i].get_weights())
        else:
            print(f"Skipping weight transfer for layer {i} due to shape mismatch.")
    except IndexError:
        print(f"Skipping weight transfer for layer {i} due to index error.")

Skipping weight transfer for layer 1 due to index error.
Skipping weight transfer for layer 2 due to shape mismatch.
Skipping weight transfer for layer 3 due to index error.
Skipping weight transfer for layer 4 due to shape mismatch.
Skipping weight transfer for layer 5 due to index error.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [177]:
# Step 5: Compile and Train the New Model
new_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model on your new dataset (use the appropriate labels for this dataset)
new_model.fit(X_pca, y_new, epochs=20, batch_size=32, validation_split=0.2)

# Evaluate the new model
loss, accuracy = new_model.evaluate(X_pca, y_new)
print(f'New Model Accuracy after PCA: {accuracy:.2f}')

Epoch 1/20
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.5402 - loss: 0.8434 - val_accuracy: 0.5334 - val_loss: 0.7327
Epoch 2/20
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5427 - loss: 0.7069 - val_accuracy: 0.5301 - val_loss: 0.7580
Epoch 3/20
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5470 - loss: 0.7006 - val_accuracy: 0.5351 - val_loss: 0.7564
Epoch 4/20
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5714 - loss: 0.6940 - val_accuracy: 0.5310 - val_loss: 0.7440
Epoch 5/20
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5578 - loss: 0.6987 - val_accuracy: 0.5376 - val_loss: 0.7651
Epoch 6/20
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5755 - loss: 0.6852 - val_accuracy: 0.4963 - val_loss: 0.7422
Epoch 7/20
[1m152/152[0m 