In [None]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix

# Load the dataset
training_data = pd.read_excel('/content/Premier_League_Date_Combined_Modified_2.0 - Training Set.xlsx')

# Define the feature columns and target variable
features = [
    'LSPR',    # Last Season Possession Ratio
    'LSGFD',   # Last Season Goals For Difference
    'LSGAD',   # Last Season Goals Against Difference
    'LSYCD',   # Last Season Yellow Cards Difference
    'LSPD',    # Last Season Penalty Difference
    'LSSPR',   # Last Season Save Percentage Ratio
    'LSCSPR',  # Last Season Clean Sheet Percentage Ratio
    'R5PD',    # Recent 5 Games Points Difference
    'R5GFD',   # Recent 5 Games Goals For Difference
    'R5GAD',   # Recent 5 Games Goals Against Difference
    'TSSD',    # This Season Squad Difference
    'TSAD',    # This Season Age (Average) Difference
    'TSFD',    # This Season Foreigners Difference
    'TSTMR',   # This Season Total Market Ratio
    'R3VATGD', # Recent 3 Versus Away Team Goals Difference
    'R3VATP'   # Recent 3 Vercus Away Team Points
]
outcome_label = 'Outcome_Label'

# Define seasons for sliding window CV
seasons = ['2015-2016', '2016-2017', '2017-2018', '2018-2019', '2019-2020', '2020-2021', '2021-2022']

# Store results for each fold
fold_results = []

# Sliding window cross-validation
for i in range(2, len(seasons) - 1):
    # Define training and test seasons
    train_seasons = seasons[i - 2:i]
    test_season = seasons[i + 1]

    # Filter training and testing data
    X_train = training_data[training_data['Season'].isin(train_seasons)][features]
    y_train = training_data[training_data['Season'].isin(train_seasons)][outcome_label]
    X_test = training_data[training_data['Season'] == test_season][features]
    y_test = training_data[training_data['Season'] == test_season][outcome_label]

    # Standardize the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Convert labels to categorical (one-hot encoding)
    num_classes = len(y_train.unique())
    y_train_encoded = to_categorical(y_train, num_classes=num_classes)
    y_test_encoded = to_categorical(y_test, num_classes=num_classes)

    # Build the neural network model
    model = Sequential([
        Dense(128, activation='relu', input_shape=(X_train_scaled.shape[1],)),  # Input layer
        Dropout(0.2),  # Dropout for regularization
        Dense(64, activation='relu'),  # Hidden layer
        Dropout(0.2),  # Dropout for regularization
        Dense(num_classes, activation='softmax')  # Output layer for multi-class classification
    ])

    # Compile the model
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    # Train the model
    history = model.fit(X_train_scaled, y_train_encoded, epochs=50, batch_size=32, verbose=0)

    # Evaluate the model on the test set
    test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test_encoded, verbose=0)
    fold_results.append(test_accuracy)

# Summary of sliding window CV results
print("\nSliding Window Cross-Validation Results:")
print(f"Mean Accuracy: {np.mean(fold_results):.4f}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Sliding Window Cross-Validation Results:
Mean Accuracy: 0.4829


In [None]:
testing_data = pd.read_excel('/content/Premier_League_Date_Combined_Modified_2.0 - Testing Set.xlsx')

# Prepare the training data
X = training_data[features]
y = training_data[outcome_label]
X_test = testing_data[features]
y_test = testing_data[outcome_label]

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_scaled_test = scaler.fit_transform(X_test)

# Convert labels to categorical (one-hot encoding)
num_classes = len(y.unique())
y_encoded = to_categorical(y, num_classes=num_classes)
y_encoded_test = to_categorical(y_test, num_classes=num_classes)

# Build the neural network model
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_scaled.shape[1],)),  # Input layer
    Dropout(0.2),  # Dropout for regularization
    Dense(64, activation='relu'),  # Hidden layer
    Dropout(0.2),  # Dropout for regularization
    Dense(num_classes, activation='softmax')  # Output layer for multi-class classification
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model on the entire training set
history = model.fit(X_scaled, y_encoded, epochs=50, batch_size=32, verbose=1)

# Evaluate the model on the training data
train_loss, train_accuracy = model.evaluate(X_scaled, y_encoded, verbose=0)
print(f"\nTraining Accuracy: {train_accuracy * 100:.2f}%")
test_loss, test_accuracy = model.evaluate(X_scaled_test, y_encoded_test, verbose=0)
print(f"\nTesting Accuracy: {test_accuracy * 100:.2f}%")

# Generate predictions and evaluate the confusion matrix
y_pred = np.argmax(model.predict(X_scaled), axis=1)
y_pred_test = np.argmax(model.predict(X_scaled_test), axis=1)

# Confusion Matrix
print("\nTraining Confusion Matrix:")
print(confusion_matrix(y, y_pred))
print("\nTesting Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_test))


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.4982 - loss: 1.0154
Epoch 2/50
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5243 - loss: 0.9873
Epoch 3/50
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5486 - loss: 0.9653
Epoch 4/50
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5289 - loss: 0.9848
Epoch 5/50
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5497 - loss: 0.9684
Epoch 6/50
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5483 - loss: 0.9602
Epoch 7/50
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5502 - loss: 0.9516
Epoch 8/50
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5475 - loss: 0.9495
Epoch 9/50
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1