In [None]:
from sklearn.metrics import root_mean_squared_error, accuracy_score, precision_recall_fscore_support,mean_squared_error
from sklearn.preprocessing import MinMaxScaler, LabelEncoder, StandardScaler, OneHotEncoder
from sklearn.feature_selection import SelectKBest, chi2, RFE, mutual_info_regression
from tensorflow.keras.layers import Dense, LSTM, Dropout
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras import regularizers
from sklearn.svm import SVC, LinearSVC
from keras.utils import to_categorical
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import tensorflow as tf
import seaborn as sns
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import GridSearchCV
import networkx as nx
from sklearn.model_selection import ParameterGrid
from sklearn.metrics import mean_squared_error, log_loss
from sklearn.linear_model import LogisticRegression
from scipy.stats import norm
import itertools

In [None]:
data_dir = "finalData.csv"
df = pd.read_csv(data_dir)

In [None]:
def get_season(date):
    if date.month >= 8:  
        return (date.year)
    else:  
        return (date.year - 1)

In [None]:
# renaming the columns
df.columns = ['Date', 'Home Team', 'Away Team', 'Full Time Home Goals', 'Full Time Away Goals', 'Full Time Result',       
'Half Time Home Goals', 'Half Time Away Goals', 'Half Time Result', 'Referee', 'Home Shots', 'Away Shots', 'Home Shots on Target',   
'Away Shots on Target', 'Home Corners', 'Away Corners', 'Home Fouls', 'Away Fouls', 'Home Yellow Cards', 'Away Yellow Cards',     
'Home Red Cards', 'Away Red Cards', 'Home Possession', 'Away Possession', 'Home Passes Completed', 'Home Passes PCT',
'Home Progressive Passes', 'Home Progressive Passing Distance', 'Home xG', 'Home Take Ons Won', 'Home Take Ons', 
'Home Interceptions', 'Home Blocks', 'Home Touches', 'Home Touches Def 3rd', 'Home Touches Mid 3rd', 'Home Touches Att 3rd',
'Home Carries', 'Home Carries Progressive Distance', 'Home Tackles', 'Home Tackles Won', 'Away Passes Completed',
'Away Passes PCT', 'Away Progressive Passes', 'Away Progressive Passing Distance', 'Away xG',
'Away Take Ons Won', 'Away Take Ons', 'Away Interceptions', 'Away Blocks', 'Away Touches', 'Away Touches Def 3rd',
'Away Touches Mid 3rd', 'Away Touches Att 3rd', 'Away Carries', 'Away Carries Progressive Distance', 'Away Tackles',
'Away Tackles Won']

In [None]:
# ensure sorted by date
df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d')
df = df.sort_values(by='Date')

# each season starts at 08 and ends at 05 of next year - 2000-2001 season will be the 2000 season
def get_season(date):
    if date.month >= 8:  
        return (date.year)
    else:  
        return (date.year - 1)

df['Season'] = df['Date'].apply(get_season)
df['Match Outcome'] = df['Full Time Result'].map({'H': 1, 'D': 0, 'A': -1})

In [None]:
def calculate_season_points(df):
    df['Home Team Points'] = 0
    df['Away Team Points'] = 0

    for idx, row in df.iterrows():
        if row['Match Outcome'] == 1:  
            df.at[idx, 'Home Team Points'] = 3
            df.at[idx, 'Away Team Points'] = 0
        elif row['Match Outcome'] == 0: 
            df.at[idx, 'Home Team Points'] = 1
            df.at[idx, 'Away Team Points'] = 1
        elif row['Match Outcome'] == -1:  
            df.at[idx, 'Home Team Points'] = 0
            df.at[idx, 'Away Team Points'] = 3

    df['Home Total Seasonal Points'] = (df.groupby(['Home Team', 'Season'])['Home Team Points'].cumsum())
    df['Away Total Seasonal Points'] = (df.groupby(['Away Team', 'Season'])['Away Team Points'].cumsum())
    # df['Seasonal Point Difference'] = df['Home Total Seasonal Points'] - df['Away Total Seasonal Points']
    # df.drop(columns=['Home Team Points', 'Away Team Points'], inplace=True)

    return df
df = calculate_season_points(df)

In [None]:
features = [
    # Home Advantage
    "Home Team",
    "Away Team",
    "Match Outcome",
    "Full Time Home Goals",
    "Full Time Away Goals",
    
    # Attacking Strength
    "Home Shots on Target", "Away Shots on Target",
    "Home Progressive Passes", "Away Progressive Passes",
    "Home Touches Att 3rd", "Away Touches Att 3rd",
    "Home Take Ons Won", "Away Take Ons Won",
    "Home Corners", "Away Corners",
    
    # Midfield Strength
    "Home Touches Mid 3rd", "Away Touches Mid 3rd",
    "Home Passes Completed", "Away Passes Completed",
    "Home Passes PCT", "Away Passes PCT",
    "Home Carries", "Away Carries",
    
    # Defensive Strength
    "Home Tackles", "Away Tackles",
    "Home Tackles Won", "Away Tackles Won",
    "Home Blocks", "Away Blocks",
    "Home Interceptions", "Away Interceptions",
    
    # Extra
    "Full Time Result",
    'Date',
    'Season'
]

updated_df = df[features].copy()
updated_df.dropna(inplace=True)

In [None]:
numerical_features = [
    # Home Advantage,
    "Full Time Home Goals",
    "Full Time Away Goals",
    
    # Attacking Strength
    "Home Shots on Target", "Away Shots on Target",
    "Home Progressive Passes", "Away Progressive Passes",
    "Home Touches Att 3rd", "Away Touches Att 3rd",
    "Home Take Ons Won", "Away Take Ons Won",
    "Home Corners", "Away Corners",
    
    # Midfield Strength
    "Home Touches Mid 3rd", "Away Touches Mid 3rd",
    "Home Passes Completed", "Away Passes Completed",
    "Home Passes PCT", "Away Passes PCT",
    "Home Carries", "Away Carries",
    
    # Defensive Strength
    "Home Tackles", "Away Tackles",
    "Home Tackles Won", "Away Tackles Won",
    "Home Blocks", "Away Blocks",
    "Home Interceptions", "Away Interceptions",
    ]

In [None]:
def standardisation(df, terms=numerical_features):
    scaler = StandardScaler()
    df[terms] = scaler.fit_transform(df[terms])
    return df

# normalises a specific set of columns
def normalisation(df, terms=numerical_features):
    scaler = MinMaxScaler()
    df[terms] = scaler.fit_transform(df[terms])
    return df

# label encoding - use for boosting models
def label_encoding(df, col=['Home Team', 'Away Team'], dataframe=True):
    label_encoder = LabelEncoder()
    if dataframe:
        for c in col:
            df[c] = label_encoder.fit_transform(df[c])
    else:
        df = label_encoder.fit_transform(df)
    return df

In [None]:
def pca(df):
    # drop the irrelevant terms
    featuresToDrop = ['Date', 'Home Team', 'Away Team', 'Full Time Result', 'Half Time Result', 'Referee']
    assert set(featuresToDrop).issubset(df.columns), "Some columns in featuresToDrop are missing in df"
    df2 = df.drop(featuresToDrop, axis=1)
    
    df2 = standardisation(df2, terms=list(df2.columns))
    
    pca = PCA()
    pca_components = pca.fit_transform(df2)
    
    # find the explained variance ratio
    explained_variance = pca.explained_variance_ratio_
    print("Explained Variance Ratio: ", explained_variance)

    # select number of components based on explained variance (e.g., 95% variance)
    cumulative_variance = explained_variance.cumsum()
    n_components = next(i for i, v in enumerate(cumulative_variance) if v >= 0.95) + 1
    print(f"Number of components to retain 95% variance: {n_components}")

    # visualize explained variance
    plt.figure(figsize=(10, 6))
    plt.plot(cumulative_variance, marker='o', linestyle='--')
    plt.title('Cumulative Explained Variance by Number of Components')
    plt.xlabel('Number of Components')
    plt.ylabel('Cumulative Explained Variance')
    plt.grid()
    plt.show()
    
    return n_components
    
number_of_components = pca(df)



def chi_square(df):
    # organise dataset into X (training examples) and y (targets)
    featuresToDrop = ['Date', 'Home Team', 'Full Time Result', 'Away Team', 'Match Outcome']
    assert set(featuresToDrop).issubset(df.columns), "Some columns in featuresToDrop are missing in df"
    data = df.drop(featuresToDrop, axis=1)
    df['Match Outcome'] = df['Full Time Result'].map({'H': 1, 'D': 0, 'A': -1})
    target = df['Match Outcome']
    
    # chi2 scoring function requires non-negative input: normalisation
    data = normalisation(data, terms=list(data.columns))
    
    # collect the feature names
    feature_names = list(data.columns)
    
    # perform chi square selection    
    chi_select = SelectKBest(chi2, k=min(number_of_components, len(data.columns)))
    new_data = chi_select.fit_transform(data, target)
    
    # collect features
    selected_features = []
    for i, b in enumerate(chi_select.get_support()):
        if b:
            selected_features.append(feature_names[i])
    
    # get the chi-square scores for all features
    chi_scores = chi_select.scores_

    # create a dataFrame for easy visualization
    chi2_df = pd.DataFrame({'Feature': feature_names, 'Chi-Square Score': chi_scores})
    chi2_df = chi2_df.sort_values(by='Chi-Square Score', ascending=False)

    # plot the scores
    plt.figure(figsize=(10, 5))
    colours = ['skyblue' if i < number_of_components else 'gray' for i in range(len(chi2_df))]
    plt.barh(chi2_df['Feature'], chi2_df['Chi-Square Score'], color=colours)
    plt.xlabel('Chi-Square Score')
    plt.ylabel('Features')
    plt.title('Feature Importance (Chi-Square)')
    plt.gca().invert_yaxis()  # Invert y-axis to show the highest scores at the top
    plt.tight_layout()
    plt.tick_params(axis="y", pad=10, labelsize=5)
    plt.show()
    
    return chi2_df
    
chi2_df = chi_square(updated_df)   

In [None]:
def adjust_feature_weights(features, chi2_df):
    # Aggregate Home and Away importance scores
    aggregated_scores = {}
    for feature_pair in features:
        home_feature, away_feature = feature_pair
        home_score = chi2_df.loc[chi2_df['Feature'] == home_feature, 'Chi-Square Score'].values[0]
        away_score = chi2_df.loc[chi2_df['Feature'] == away_feature, 'Chi-Square Score'].values[0]
        aggregated_scores[feature_pair] = home_score + away_score

    # Sort features by aggregated importance
    sorted_features = sorted(aggregated_scores.items(), key=lambda x: x[1], reverse=True)
    
    # Adjust scores using the /3 rule
    adjusted_scores = {}
    adjusted_scores[sorted_features[0][0]] = sorted_features[0][1]  # Most important feature remains unchanged
    
    for i in range(1, len(sorted_features)):
        current_feature, current_score = sorted_features[i]
        previous_feature, previous_score = sorted_features[i - 1]
        
        if current_score < previous_score / 3:
            current_score = previous_score / 3
        adjusted_scores[current_feature] = current_score

    # Assign adjusted scores equally to Home and Away features
    final_weights = {}
    for feature_pair, score in adjusted_scores.items():
        home_feature, away_feature = feature_pair
        final_weights[home_feature] = score / 2
        final_weights[away_feature] = score / 2
    
    return final_weights

def create_strength_features_separate(df, chi2_df):
    # Define feature pairs for each group
    attack_features = [
        ("Home Shots on Target", "Away Shots on Target"),
        ("Home Progressive Passes", "Away Progressive Passes"),
        ("Home Touches Att 3rd", "Away Touches Att 3rd"),
        ("Home Take Ons Won", "Away Take Ons Won"),
        ("Home Corners", "Away Corners")
    ]
    midfield_features = [
        ("Home Touches Mid 3rd", "Away Touches Mid 3rd"),
        ("Home Passes Completed", "Away Passes Completed"),
        ("Home Passes PCT", "Away Passes PCT"),
        ("Home Carries", "Away Carries")
    ]
    defense_features = [
        ("Home Tackles", "Away Tackles"),
        ("Home Tackles Won", "Away Tackles Won"),
        ("Home Blocks", "Away Blocks"),
        ("Home Interceptions", "Away Interceptions")
    ]
    
    # Adjust weights for each group
    attack_weights = adjust_feature_weights(attack_features, chi2_df)
    midfield_weights = adjust_feature_weights(midfield_features, chi2_df)
    defense_weights = adjust_feature_weights(defense_features, chi2_df)
    
    # Calculate strength scores for Home and Away
    def calculate_strength(feature_pairs, weights, team_type):
        strength_score = 0
        for home_feature, away_feature in feature_pairs:
            feature = home_feature if team_type == "Home" else away_feature
            strength_score += df[feature] * weights[feature]
        return strength_score

    # Compute Home and Away strengths for Attack, Midfield, and Defense
    df['Home Attack Strength'] = calculate_strength(attack_features, attack_weights, "Home")
    df['Away Attack Strength'] = calculate_strength(attack_features, attack_weights, "Away")
    df['Home Midfield Strength'] = calculate_strength(midfield_features, midfield_weights, "Home")
    df['Away Midfield Strength'] = calculate_strength(midfield_features, midfield_weights, "Away")
    df['Home Defense Strength'] = calculate_strength(defense_features, defense_weights, "Home")
    df['Away Defense Strength'] = calculate_strength(defense_features, defense_weights, "Away")
    
    return df

updated_df = create_strength_features_separate(updated_df, chi2_df)


In [None]:
# Remove these 4 features
updated_df['Home Goals Scored'] = updated_df['Full Time Home Goals']
updated_df['Home Goals Conceded'] = updated_df['Full Time Away Goals']
updated_df['Away Goals Scored'] = updated_df['Full Time Away Goals']
updated_df['Away Goals Conceded'] = updated_df['Full Time Home Goals']
updated_df["Home Performance"] = updated_df["Match Outcome"].apply(lambda x: 3 if x == 1 else 1 if x == 0 else 0)
updated_df["Away Performance"] = updated_df["Match Outcome"].apply(lambda x: 3 if x == -1 else 1 if x == 0 else 0)

updated_df['Home Avg Attacking Strength'] = updated_df.groupby('Home Team')['Home Attack Strength'].transform(lambda x: x.shift().rolling(window=5, min_periods=1).mean())
updated_df['Home Avg Defense Strength'] = updated_df.groupby('Home Team')['Home Defense Strength'].transform(lambda x: x.shift().rolling(window=5, min_periods=1).mean())
updated_df['Away Avg Attacking Strength'] = updated_df.groupby('Away Team')['Away Attack Strength'].transform(lambda x: x.shift().rolling(window=5, min_periods=1).mean())
updated_df['Away Avg Defense Strength'] = updated_df.groupby('Away Team')['Away Defense Strength'].transform(lambda x: x.shift().rolling(window=5, min_periods=1).mean())
updated_df['Home Avg Midfield Strength'] = updated_df.groupby('Home Team')['Home Midfield Strength'].transform(lambda x: x.shift().rolling(window=5, min_periods=1).mean())
updated_df['Away Avg Midfield Strength'] = updated_df.groupby('Away Team')['Away Midfield Strength'].transform(lambda x: x.shift().rolling(window=5, min_periods=1).mean())
updated_df['Home Recent Performance'] = (updated_df.groupby('Home Team')['Home Performance'].transform(lambda x: x.shift().rolling(window=5, min_periods=1).mean()))
updated_df['Away Recent Performance'] = (updated_df.groupby('Away Team')['Away Performance'].transform(lambda x: x.shift().rolling(window=5, min_periods=1).mean()))

# updated_df['Avg Home Goals Scored'] = (updated_df.groupby('Home Team')['Home Goals Scored'].transform(lambda x: x.shift().rolling(window=5, min_periods=1).mean()))
# updated_df['Avg Home Goals Conceded'] = (updated_df.groupby('Home Team')['Home Goals Conceded'].transform(lambda x: x.shift().rolling(window=5, min_periods=1).mean()))
# updated_df['Avg Away Goals Scored'] = (updated_df.groupby('Away Team')['Away Goals Scored'].transform(lambda x: x.shift().rolling(window=5, min_periods=1).mean()))
# updated_df['Avg Away Goals Conceded'] = (updated_df.groupby('Away Team')['Away Goals Conceded'].transform(lambda x: x.shift().rolling(window=5, min_periods=1).mean()))
# updated_df.dropna(inplace=True)

In [None]:
updated_df.dropna(inplace=True)

In [None]:
elo_ratings = {team: 1500 for team in pd.concat([updated_df['Home Team'], updated_df['Away Team']]).unique()}

# Compute ELO ratings
def compute_elo(row):
    H_0, A_0 = elo_ratings[row['Home Team']], elo_ratings[row['Away Team']]
    γ_H = 1 / (1 + 10 ** ((A_0 - H_0) / 400))
    γ_A = 1 - γ_H
    k = 30 * (1 + abs(row['Full Time Home Goals'] - row['Full Time Away Goals'])) ** 0.5
    α_H = 1 if row['Full Time Home Goals'] > row['Full Time Away Goals'] else (0.5 if row['Full Time Home Goals'] == row['Full Time Away Goals'] else 0)
    α_A = 1 - α_H
    elo_ratings[row['Home Team']] = H_0 + k * (α_H - γ_H)
    elo_ratings[row['Away Team']] = A_0 + k * (α_A - γ_A)
    return pd.Series({'Home ELO': H_0, 'Away ELO': A_0})

updated_df[['Home ELO', 'Away ELO']] = updated_df.apply(compute_elo, axis=1)

In [None]:
# Feature selection
features = [
    'Home ELO', 'Away ELO',
    'Home Avg Attacking Strength', 'Away Avg Attacking Strength',
    'Home Avg Defense Strength', 'Away Avg Defense Strength',
    'Home Recent Performance', 'Away Recent Performance',
    'Home Avg Midfield Strength', 'Away Avg Midfield Strength'
]

home_features = [f for f in features if 'Home' in f]
away_features = [f for f in features if 'Away' in f]

In [None]:
updated_df

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
y_home_scored = updated_df['Full Time Home Goals']
y_away_scored = updated_df['Full Time Away Goals']
X_home_scored = updated_df[home_features]
X_away_scored = updated_df[away_features]
X_train_scored, X_test_scored, y_train_scored, y_test_scored = train_test_split(X_home_scored, y_home_scored, test_size=0.2, random_state=31)
X_train_conceded, X_test_conceded, y_train_conceded, y_test_conceded = train_test_split(X_away_scored, y_away_scored, test_size=0.2, random_state=31)

In [None]:
home_scored_model = RandomForestRegressor(random_state=31)
away_scored_model = RandomForestRegressor(random_state=31)

home_scored_model.fit(X_train_scored, y_train_scored)
away_scored_model.fit(X_train_conceded, y_train_conceded)

# Predictions
home_scored_preds = home_scored_model.predict(X_test_scored)
home_conceded_preds = away_scored_model.predict(X_test_conceded)

In [30]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import BayesianRidge, Ridge
from sklearn.metrics import mean_absolute_error, mean_squared_error, accuracy_score, precision_score, f1_score, classification_report

# Models
models = {
    "Random Forest": RandomForestRegressor(random_state=31),
    "K-Nearest Neighbors": KNeighborsRegressor(n_neighbors=5),
    "Bayesian Regression": BayesianRidge(),
    "Ridge Regression": Ridge(alpha=1.0)
}

for model_name, base_model in models.items():
    print(f"Evaluating {model_name}...")

    # Separate models for goals scored and goals conceded
    home_scored_model = base_model.__class__(**base_model.get_params())  # Clone the model for goals scored
    away_conceded_model = base_model.__class__(**base_model.get_params())  # Clone the model for goals conceded

    # Fit models on the respective feature sets
    home_scored_model.fit(X_train_scored, y_train_scored)
    away_conceded_model.fit(X_train_conceded, y_train_conceded)

    # Predictions (use appropriate feature sets for each model)
    home_scored_preds = home_scored_model.predict(X_test_scored)
    away_conceded_preds = away_conceded_model.predict(X_test_conceded)

    # Metrics for goals
    mae_scored = mean_absolute_error(y_test_scored, home_scored_preds)
    mse_scored = mean_squared_error(y_test_scored, home_scored_preds)
    mae_conceded = mean_absolute_error(y_test_conceded, away_conceded_preds)
    mse_conceded = mean_squared_error(y_test_conceded, away_conceded_preds)

    # Outcome predictions
    draw_threshold = 0.15
    predicted_outcomes = []
    for home_goals, away_goals in zip(home_scored_preds, away_conceded_preds):
        goal_difference = abs(home_goals - away_goals)
        if goal_difference < draw_threshold:
            predicted_outcomes.append(0)  # Draw
        elif home_goals > away_goals:
            predicted_outcomes.append(1)  # Home Win
        else:
            predicted_outcomes.append(-1)  # Away Win

    # Actual outcomes
    actual_outcomes = updated_df.loc[X_test_scored.index, 'Match Outcome'].values  # Ensure indices match

    # Classification metrics
    accuracy = accuracy_score(actual_outcomes, predicted_outcomes)
    precision = precision_score(actual_outcomes, predicted_outcomes, average='weighted', zero_division=0)
    f1 = f1_score(actual_outcomes, predicted_outcomes, average='weighted', zero_division=0)

    # Print results
    print(f"Accuracy: {accuracy:.2f}")
    print(f"Precision (Weighted): {precision:.2f}")
    print(f"F1 Score (Weighted): {f1:.2f}")
    print("\nClassification Report:")
    print(classification_report(actual_outcomes, predicted_outcomes, target_names=['Away Win', 'Draw', 'Home Win']))

    print("\nHome Goals Scored Metrics:")
    print(f"Home Scored: MAE: {mae_scored:.2f}, MSE: {mse_scored:.2f}")

    print("\nHome Goals Conceded Metrics:")
    print(f"Home Conceded MAE: {mae_conceded:.2f}, MSE: {mse_conceded:.2f}")
    print("-" * 50)


Evaluating Random Forest...
Accuracy: 0.47
Precision (Weighted): 0.46
F1 Score (Weighted): 0.46

Classification Report:
              precision    recall  f1-score   support

    Away Win       0.49      0.43      0.46       171
        Draw       0.24      0.21      0.22       125
    Home Win       0.55      0.64      0.59       248

    accuracy                           0.47       544
   macro avg       0.43      0.42      0.42       544
weighted avg       0.46      0.47      0.46       544


Home Goals Scored Metrics:
Home Scored: MAE: 0.99, MSE: 1.65

Home Goals Conceded Metrics:
Home Conceded MAE: 0.99, MSE: 1.72
--------------------------------------------------
Evaluating K-Nearest Neighbors...
Accuracy: 0.49
Precision (Weighted): 0.46
F1 Score (Weighted): 0.47

Classification Report:
              precision    recall  f1-score   support

    Away Win       0.47      0.52      0.49       171
        Draw       0.25      0.12      0.16       125
    Home Win       0.56      0.6

In [22]:
from sklearn.metrics import mean_squared_error, accuracy_score, precision_score, f1_score, classification_report
# Evaluation

mae_scored = mean_absolute_error(y_test_scored, home_scored_preds)
mse_scored = mean_squared_error(y_test_scored, home_scored_preds)
mae_conceded = mean_absolute_error(y_test_conceded, home_conceded_preds)
mse_conceded = mean_squared_error(y_test_conceded, home_conceded_preds)

draw_threshold = 0.2
predicted_outcomes = []
for home_goals, away_goals in zip(home_scored_preds, home_conceded_preds):
    goal_difference = abs(home_goals - away_goals)
    if goal_difference < draw_threshold:
        predicted_outcomes.append(0)  # Draw
    elif home_goals > away_goals:
        predicted_outcomes.append(1)  # Home Win
    else:
        predicted_outcomes.append(-1)  # Away Win

actual_outcomes = updated_df.loc[y_test_scored.index, 'Match Outcome'].values

# Calculate metrics
accuracy = accuracy_score(actual_outcomes, predicted_outcomes)
precision = precision_score(actual_outcomes, predicted_outcomes, average='weighted')
f1 = f1_score(actual_outcomes, predicted_outcomes, average='weighted')

# Print results
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision (Weighted): {precision:.2f}")
print(f"F1 Score (Weighted): {f1:.2f}")
print("\nClassification Report:")
print(classification_report(actual_outcomes, predicted_outcomes, target_names=['Away Win', 'Draw', 'Home Win']))

print("Home Goals Scored Metrics:")
print(f"Home Scored: MAE: {mae_scored:.2f}, MSE: {mse_scored:.2f}")

print("\nHome Goals Conceded Metrics:")
print(f"Home Conceded MAE: {mae_conceded:.2f}, MSE: {mse_conceded:.2f}")

Accuracy: 0.46
Precision (Weighted): 0.47
F1 Score (Weighted): 0.46

Classification Report:
              precision    recall  f1-score   support

    Away Win       0.53      0.40      0.45       171
        Draw       0.24      0.28      0.26       125
    Home Win       0.54      0.60      0.57       248

    accuracy                           0.46       544
   macro avg       0.44      0.42      0.43       544
weighted avg       0.47      0.46      0.46       544

Home Goals Scored Metrics:
Home Scored: MAE: 0.99, MSE: 1.65

Home Goals Conceded Metrics:
Home Conceded MAE: 0.99, MSE: 1.72
