**LDA:**

In [None]:
import pandas as pd
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

# Load the dataset
training_data = pd.read_excel('/content/Premier_League_Date_Combined_Modified_2.0 - Training Set.xlsx')

# Define the feature columns and target variable
features = [
    'LSPR',    # Last Season Possession Ratio
    'LSGFD',   # Last Season Goals For Difference
    'LSGAD',   # Last Season Goals Against Difference
    'LSYCD',   # Last Season Yellow Cards Difference
    'LSPD',    # Last Season Penalty Difference
    'LSSPR',   # Last Season Save Percentage Ratio
    'LSCSPR',  # Last Season Clean Sheet Percentage Ratio
    'R5PD',    # Recent 5 Games Points Difference
    'R5GFD',   # Recent 5 Games Goals For Difference
    'R5GAD',   # Recent 5 Games Goals Against Difference
    'TSSD',    # This Season Squad Difference
    'TSAD',    # This Season Age (Average) Difference
    'TSFD',    # This Season Foreigners Difference
    'TSTMR',   # This Season Total Market Ratio
    'R3VATGD', # Recent 3 Versus Away Team Goals Difference
    'R3VATP'   # Recent 3 Vercus Away Team Points
]
X = training_data[features]
y = training_data['Outcome_Label']

# Define the seasons for forward chaining cross-validation
seasons = ['2015-2016', '2016-2017', '2017-2018', '2018-2019', '2019-2020', '2020-2021', '2021-2022']

results = []

# Initialize Linear Discriminant Analysis model
model = LinearDiscriminantAnalysis()

for i in range(2, len(seasons) - 1):
    # Define training and validation seasons
    train_seasons = seasons[i - 2:i]
    test_season = seasons[i + 1]

    # Filter training and validation data
    X_train = training_data[training_data['Season'].isin(train_seasons)][features]
    y_train = training_data[training_data['Season'].isin(train_seasons)]['Outcome_Label']
    X_test = training_data[training_data['Season'] == test_season][features]
    y_test = training_data[training_data['Season'] == test_season]['Outcome_Label']

    # Train the model
    model.fit(X_train, y_train)

    # Test the model
    y_pred = model.predict(X_test)

    # Evaluate the model
    report = classification_report(y_test, y_pred, output_dict=True)
    accuracy = report['accuracy']
    results.append({'Train Seasons': train_seasons, 'Test Season': test_season, 'Accuracy': accuracy})

# Calculate mean accuracy
results_df = pd.DataFrame(results)
mean_accuracy = results_df['Accuracy'].mean()
print(f"\nMean Accuracy for LDA: {mean_accuracy}")

# Test the model on the entire training set
model.fit(X, y)
y_pred_train = model.predict(X)
training_confusion_matrix = confusion_matrix(y, y_pred_train)

print("Training Confusion Matrix:")
print(training_confusion_matrix)


Mean Accuracy for LDA: 0.5210526315789474
Training Confusion Matrix:
[[513   3 343]
 [236   2 386]
 [255   1 921]]


**QDA:**

In [None]:
import pandas as pd
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

# Load the dataset
training_data = pd.read_excel('/content/Premier_League_Date_Combined_Modified_2.0 - Training Set.xlsx')

# Define the feature columns and target variable
features = [
    'LSPR',    # Last Season Possession Ratio
    'LSGFD',   # Last Season Goals For Difference
    'LSGAD',   # Last Season Goals Against Difference
    'LSYCD',   # Last Season Yellow Cards Difference
    'LSPD',    # Last Season Penalty Difference
    'LSSPR',   # Last Season Save Percentage Ratio
    'LSCSPR',  # Last Season Clean Sheet Percentage Ratio
    'R5PD',    # Recent 5 Games Points Difference
    'R5GFD',   # Recent 5 Games Goals For Difference
    'R5GAD',   # Recent 5 Games Goals Against Difference
    'TSSD',    # This Season Squad Difference
    'TSAD',    # This Season Age (Average) Difference
    'TSFD',    # This Season Foreigners Difference
    'TSTMR',   # This Season Total Market Ratio
    'R3VATGD', # Recent 3 Versus Away Team Goals Difference
    'R3VATP'   # Recent 3 Vercus Away Team Points
]
X = training_data[features]
y = training_data['Outcome_Label']

# Define the seasons for forward chaining cross-validation
seasons = ['2015-2016', '2016-2017', '2017-2018', '2018-2019', '2019-2020', '2020-2021', '2021-2022']

results = []

# Initialize Quadratic Discriminant Analysis model
model = QuadraticDiscriminantAnalysis()

for i in range(2, len(seasons) - 1):
    # Define training and validation seasons
    train_seasons = seasons[i - 2:i]
    test_season = seasons[i + 1]

    # Filter training and validation data
    X_train = training_data[training_data['Season'].isin(train_seasons)][features]
    y_train = training_data[training_data['Season'].isin(train_seasons)]['Outcome_Label']
    X_test = training_data[training_data['Season'] == test_season][features]
    y_test = training_data[training_data['Season'] == test_season]['Outcome_Label']

    # Train the model
    model.fit(X_train, y_train)

    # Test the model
    y_pred = model.predict(X_test)

    # Evaluate the model
    report = classification_report(y_test, y_pred, output_dict=True)
    accuracy = report['accuracy']
    results.append({'Train Seasons': train_seasons, 'Test Season': test_season, 'Accuracy': accuracy})

# Calculate mean accuracy
results_df = pd.DataFrame(results)
mean_accuracy = results_df['Accuracy'].mean()
print(f"\nMean Accuracy for QDA: {mean_accuracy}")

# Test the model on the entire training set
model.fit(X, y)
y_pred_train = model.predict(X)
training_confusion_matrix = confusion_matrix(y, y_pred_train)

print("Training Confusion Matrix:")
print(training_confusion_matrix)



Mean Accuracy for QDA: 0.4703947368421053
Training Confusion Matrix:
[[632  54 173]
 [305  90 229]
 [411 107 659]]
