In [2]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

# Load the dataset
training_data = pd.read_excel('/content/Premier_League_Date_Combined_Modified_2.0 - Training Set.xlsx')

# Define the feature columns and target variable
features = [
    'LSPR',    # Last Season Possession Ratio
    'LSGFD',   # Last Season Goals For Difference
    'LSGAD',   # Last Season Goals Against Difference
    'LSYCD',   # Last Season Yellow Cards Difference
    'LSPD',    # Last Season Penalty Difference
    'LSSPR',   # Last Season Save Percentage Ratio
    'LSCSPR',  # Last Season Clean Sheet Percentage Ratio
    'R5PD',    # Recent 5 Games Points Difference
    'R5GFD',   # Recent 5 Games Goals For Difference
    'R5GAD',   # Recent 5 Games Goals Against Difference
    'TSSD',    # This Season Squad Difference
    'TSAD',    # This Season Age (Average) Difference
    'TSFD',    # This Season Foreigners Difference
    'TSTMR',   # This Season Total Market Ratio
    'R3VATGD', # Recent 3 Versus Away Team Goals Difference
    'R3VATP'   # Recent 3 Vercus Away Team Points
]
X = training_data[features]
y = training_data['Outcome_Label']

# Define the seasons for forward chaining cross-validation
seasons = ['2015-2016', '2016-2017', '2017-2018', '2018-2019', '2019-2020', '2020-2021', '2021-2022']

results = []

# RBF Kernel
for C in [0.1, 1, 10, 100]:
    for gamma in [0.001, 0.01, 0.1, 1]:
        print(f"Testing with rbf kernel, C={C}, gamma={gamma}")
        model = SVC(kernel='rbf', C=C, gamma=gamma)

        fold_results = []

        for i in range(2, len(seasons) - 1):
            # Define training and validation seasons
            train_seasons = seasons[i - 2:i]
            test_season = seasons[i + 1]

            # Filter training and testing data
            X_train = training_data[training_data['Season'].isin(train_seasons)][features]
            y_train = training_data[training_data['Season'].isin(train_seasons)]['Outcome_Label']
            X_test = training_data[training_data['Season'] == test_season][features]
            y_test = training_data[training_data['Season'] == test_season]['Outcome_Label']

            # Train the model
            model.fit(X_train, y_train)

            # Test the model
            y_pred = model.predict(X_test)

            # Evaluate the model
            report = classification_report(y_test, y_pred, output_dict=True)
            accuracy = report['accuracy']
            fold_results.append(accuracy)

        # Average results for this configuration
        mean_accuracy = np.mean(fold_results)
        results.append({'Kernel': 'rbf', 'C': C, 'Gamma': gamma, 'Mean Accuracy': mean_accuracy})

# Summarize all results
results_df = pd.DataFrame(results)
print("\nSummary of Results:")
print(results_df)

# Find the best configuration
best_result = results_df.loc[results_df['Mean Accuracy'].idxmax()]
print(f"\nBest Configuration: {best_result}")


Testing with rbf kernel, C=0.1, gamma=0.001


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Testing with rbf kernel, C=0.1, gamma=0.01


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Testing with rbf kernel, C=0.1, gamma=0.1


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Testing with rbf kernel, C=0.1, gamma=1


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

Testing with rbf kernel, C=1, gamma=0.001


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Testing with rbf kernel, C=1, gamma=0.01
Testing with rbf kernel, C=1, gamma=0.1


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Testing with rbf kernel, C=1, gamma=1


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Testing with rbf kernel, C=10, gamma=0.001
Testing with rbf kernel, C=10, gamma=0.01
Testing with rbf kernel, C=10, gamma=0.1


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Testing with rbf kernel, C=10, gamma=1


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Testing with rbf kernel, C=100, gamma=0.001
Testing with rbf kernel, C=100, gamma=0.01
Testing with rbf kernel, C=100, gamma=0.1


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Testing with rbf kernel, C=100, gamma=1


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Summary of Results:
   Kernel      C  Gamma  Mean Accuracy
0     rbf    0.1  0.001       0.526974
1     rbf    0.1  0.010       0.434211
2     rbf    0.1  0.100       0.434211
3     rbf    0.1  1.000       0.434211
4     rbf    1.0  0.001       0.528947
5     rbf    1.0  0.010       0.464474
6     rbf    1.0  0.100       0.434211
7     rbf    1.0  1.000       0.434211
8     rbf   10.0  0.001       0.464474
9     rbf   10.0  0.010       0.450000
10    rbf   10.0  0.100       0.433553
11    rbf   10.0  1.000       0.434211
12    rbf  100.0  0.001       0.419079
13    rbf  100.0  0.010       0.449342
14    rbf  100.0  0.100       0.433553
15    rbf  100.0  1.000       0.434211

Best Configuration: Kernel                rbf
C                     1.0
Gamma               0.001
Mean Accuracy    0.528947
Name: 4, dtype: object


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
# Train the final model using the entire training set with the best n_estimators value
model = SVC(kernel='rbf', C=1.0, gamma=0.001)
X_train = training_data[features]
y_train = training_data['Outcome_Label']

# Fit the model on the entire training data
model.fit(X_train, y_train)

# Predict on the training data
y_pred_train = model.predict(X_train)

# Evaluate the model on the training data
training_confusion_matrix = confusion_matrix(y_train, y_pred_train)

print("\nTraining Confusion Matrix:")
print(training_confusion_matrix)


Training Confusion Matrix:
[[448   0 411]
 [193   0 431]
 [198   0 979]]


In [None]:
print(training_data['Outcome_Label'].value_counts())

Outcome_Label
2    1177
0     859
1     624
Name: count, dtype: int64
