# Orbited Probit Model

In [904]:
import pandas as pd
# Remove the limit on the number of columns displayed
pd.set_option('display.max_columns', None)

In [905]:
data = pd.read_csv("modelling_table.csv")
len(data)

179

In [906]:
data.columns

Index(['Unnamed: 0', 'Team 1', 'Team 2', 'Team 1 Score', 'Team 2 Score',
       'Prev Team 1 Form', 'Prev Team 2 Form', 'Prev Team 1 Goal Difference',
       'Prev Team 2 Goal Difference', 'Prev Team 1 Points',
       'Prev Team 2 Points', 'Cumulative_Avg_HST', 'Cumulative_Avg_AST',
       'Cumulative_Avg_HF', 'Cumulative_Avg_AF', 'Cumulative_Avg_HC',
       'Cumulative_Avg_AC', 'Cumulative_Avg_HY', 'team_1_fdr', 'team_2_fdr',
       'Cumulative_Avg_AY', 'Cumulative_Avg_HR', 'Cumulative_Avg_AR',
       'H2H_Home_Total_Wins_Last_4', 'H2H_Away_Total_Wins_Last_4',
       'H2H_Draws_Last_4', 'Cum PPDA Team 2', 'Avg PPDA Team 1',
       'Avg PPDA Team 2', 'Avg Deep Completions Team 1',
       'Avg Deep Completions Team 2', 'Weighted Avg PPDA Team 1',
       'Weighted Avg PPDA Team 2', 'Weighted Avg Deep Completions Team 1',
       'Weighted Avg Deep Completions Team 2', 'Avg xG', 'Weighted Avg xG',
       'Avg xG.1', 'Weighted Avg xG.1', 'Avg xG Team 1', 'Avg xG Team 2',
       'Weighted Av

In [907]:
missing_values = data.isnull().sum()
# Filter to display columns with missing values only
missing_values_present = missing_values[missing_values > 0]

if not missing_values_present.empty:
    print("Columns with missing values:")
    print(missing_values_present)
else:
    print("No missing values found in the dataset.")

Columns with missing values:
Team 1 Score                    1
Team 2 Score                    1
H2H_Home_Total_Wins_Last_4    172
H2H_Away_Total_Wins_Last_4    172
H2H_Draws_Last_4              172
dtype: int64


In [908]:
# Assuming 'data' is your pandas DataFrame
missing_values = data[data['team1_player_average'].isnull()]

# Display the rows with missing values
missing_values.head(19)

Unnamed: 0.1,Unnamed: 0,Team 1,Team 2,Team 1 Score,Team 2 Score,Prev Team 1 Form,Prev Team 2 Form,Prev Team 1 Goal Difference,Prev Team 2 Goal Difference,Prev Team 1 Points,Prev Team 2 Points,Cumulative_Avg_HST,Cumulative_Avg_AST,Cumulative_Avg_HF,Cumulative_Avg_AF,Cumulative_Avg_HC,Cumulative_Avg_AC,Cumulative_Avg_HY,team_1_fdr,team_2_fdr,Cumulative_Avg_AY,Cumulative_Avg_HR,Cumulative_Avg_AR,H2H_Home_Total_Wins_Last_4,H2H_Away_Total_Wins_Last_4,H2H_Draws_Last_4,Cum PPDA Team 2,Avg PPDA Team 1,Avg PPDA Team 2,Avg Deep Completions Team 1,Avg Deep Completions Team 2,Weighted Avg PPDA Team 1,Weighted Avg PPDA Team 2,Weighted Avg Deep Completions Team 1,Weighted Avg Deep Completions Team 2,Avg xG,Weighted Avg xG,Avg xG.1,Weighted Avg xG.1,Avg xG Team 1,Avg xG Team 2,Weighted Avg xG Team 1,Weighted Avg xG Team 2,Cum np_xg,Avg team1_np_xg,Avg team2_np_xg,Weighted Avg team1_np_xg,Weighted Avg team2_np_xg,Avg team1_expected_points,Avg team2_expected_points,Weighted Avg team1_expected_points,Weighted Avg team2_expected_points,Avg Team 1 Won,Avg Team 2 Won,Avg Team 1 Drawn,Avg Team 2 Drawn,Avg Team 1 Lost,Avg Team 2 Lost,Avg Team 1 Goals Scored,Avg Team 2 Goals Scored,Avg Team 1 Goals Conceded,Avg Team 2 Goals Conceded,Avg Team 1 Goal Difference,Avg Team 2 Goal Difference,time_period_encoded,ppg_team1,ppg_team2,team1_player_average,team2_player_average


In [909]:
data = data.drop(columns = ['H2H_Home_Total_Wins_Last_4','H2H_Away_Total_Wins_Last_4','H2H_Draws_Last_4'])


## Team 1

In [911]:
import numpy as np
import pandas as pd
from statsmodels.miscmodels.ordinal_model import OrderedModel
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

data = data.dropna()
# print(data.columns)
# Discretize 'Team 1 Score' into ordinal categories (e.g., 0 = Low, 1 = Medium, 2 = High)
data['Team 1 Score Category'] = pd.cut(
    data['Team 1 Score'],
    bins=[-float('inf'), 0, 2, float('inf')],
    labels=[0, 1, 2]  # 0 = Low, 1 = Medium, 2 = High
).astype(int)
# Handle missing values for 'team1_player_average' if necessary
if 'team1_player_average' in data.columns:
    print("yes")
else:
    print("Column 'team1_player_average' not found in the dataset.")

# Feature selection for training
X_team1 = data[['Prev Team 1 Form', 'Prev Team 1 Goal Difference',
       'Prev Team 1 Points','Cumulative_Avg_HST', 
       'Cumulative_Avg_HF',  'Cumulative_Avg_HC',
        'Cumulative_Avg_HY', 'team_1_fdr',
        'Cumulative_Avg_HR', 
        'Avg PPDA Team 1',
       'Avg Deep Completions Team 1',
       'Weighted Avg PPDA Team 1',
        'Weighted Avg Deep Completions Team 1',
        'Avg xG', 'Weighted Avg xG',
        'Avg xG Team 1', 
       'Weighted Avg xG Team 1', 'Cum np_xg',
       'Avg team1_np_xg', 'Weighted Avg team1_np_xg',
        'Avg team1_expected_points',
       'Weighted Avg team1_expected_points',
        'Avg Team 1 Won',
        'Avg Team 1 Drawn', 
       'Avg Team 1 Lost', 'Avg Team 1 Goals Scored',
      'Avg Team 1 Goals Conceded',
       'Avg Team 1 Goal Difference',
       'time_period_encoded', 'ppg_team1',
      'team1_player_average']]

# Define target (dependent variable)
y_team1 = data['Team 1 Score Category']

# Scale the features
scaler = StandardScaler()
X_team1_scaled = scaler.fit_transform(X_team1)

# Fit the Ordered Probit model
ordered_model_team1 = OrderedModel(y_team1, X_team1_scaled, distr='probit')
result_team1 = ordered_model_team1.fit(method='bfgs')

# Display the summary of the model
print(result_team1.summary())

# Predict probabilities for each category (Low, Medium, High) for training data
predicted_probs = result_team1.predict()
predicted_probs_df = pd.DataFrame(predicted_probs, columns=['Low', 'Medium', 'High'])

# Assign the most likely category as the predicted class
predicted_classes = predicted_probs.argmax(axis=1)

# Evaluation metrics for the training data
accuracy = accuracy_score(y_team1, predicted_classes)
conf_matrix = confusion_matrix(y_team1, predicted_classes)
classification_rep = classification_report(y_team1, predicted_classes, target_names=['Low', 'Medium', 'High'])

print("Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)



yes
Optimization terminated successfully.
         Current function value: 0.827977
         Iterations: 128
         Function evaluations: 130
         Gradient evaluations: 130
                               OrderedModel Results                              
Dep. Variable:     Team 1 Score Category   Log-Likelihood:                -147.38
Model:                      OrderedModel   AIC:                             360.8
Method:               Maximum Likelihood   BIC:                             465.8
Date:                   Fri, 03 Jan 2025                                         
Time:                           18:06:38                                         
No. Observations:                    178                                         
Df Residuals:                        145                                         
Df Model:                             31                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
----

In [919]:
# Load the new data
new_data = pd.read_csv("prediction_data.csv")

# Extract features for prediction
X_new_team1 = new_data[['Prev Team 2 Form', 
       'Prev Team 2 Goal Difference', 
       'Prev Team 2 Points', 'Cumulative_Avg_AST',
        'Cumulative_Avg_AF', 
       'Cumulative_Avg_AC',  'team_2_fdr',
       'Cumulative_Avg_AY',  'Cumulative_Avg_AR',
        'Cum PPDA Team 2', 
       'Avg PPDA Team 2', 
       'Avg Deep Completions Team 2', 
       'Weighted Avg PPDA Team 2', 
       'Weighted Avg Deep Completions Team 2',
       'Avg xG.1', 'Weighted Avg xG.1', 'Avg xG Team 2',
        'Weighted Avg xG Team 2', 
        'Avg team2_np_xg', 
       'Weighted Avg team2_np_xg', 
       'Avg team2_expected_points', 
       'Weighted Avg team2_expected_points', 
       'Avg Team 2 Won','Avg Team 2 Drawn',
       'Avg Team 2 Lost', 
       'Avg Team 2 Goals Scored', 
       'Avg Team 2 Goals Conceded', 
       'Avg Team 2 Goal Difference', 'time_period_encoded', 
       'ppg_team2', 'team2_player_average']]

# Ensure column order matches training data
X_new_team1 = X_new_team1[X_team1.columns]

# Scale the new data using the same scaler fitted on training data
X_new_team1_scaled = scaler.transform(X_new_team1)

# Predict probabilities for the new data
predicted_probs_new = result_team1.predict(X_new_team1_scaled)

# Convert probabilities into a DataFrame
predicted_probs_new_df = pd.DataFrame(predicted_probs_new, columns=['Low', 'Medium', 'High'])

# Assign the most likely category as the predicted class
predicted_probs_new_df['Predicted Class'] = predicted_probs_new_df.idxmax(axis=1)

# Display predictions
print(predicted_probs_new_df)


KeyError: "['Prev Team 1 Form', 'Prev Team 1 Goal Difference', 'Prev Team 1 Points', 'Cumulative_Avg_HST', 'Cumulative_Avg_HF', 'Cumulative_Avg_HC', 'Cumulative_Avg_HY', 'team_1_fdr', 'Cumulative_Avg_HR', 'Avg PPDA Team 1', 'Avg Deep Completions Team 1', 'Weighted Avg PPDA Team 1', 'Weighted Avg Deep Completions Team 1', 'Avg xG', 'Weighted Avg xG', 'Avg xG Team 1', 'Weighted Avg xG Team 1', 'Cum np_xg', 'Avg team1_np_xg', 'Weighted Avg team1_np_xg', 'Avg team1_expected_points', 'Weighted Avg team1_expected_points', 'Avg Team 1 Won', 'Avg Team 1 Drawn', 'Avg Team 1 Lost', 'Avg Team 1 Goals Scored', 'Avg Team 1 Goals Conceded', 'Avg Team 1 Goal Difference', 'ppg_team1', 'team1_player_average'] not in index"

# Bivariate Poisson

In [None]:
len(modelling_table)

In [957]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize
from scipy.stats import poisson
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report

# Load the modelling table
modelling_table = pd.read_csv("modelling_table.csv")

# Define feature columns for Team 1 and Team 2
team1_features = ['Prev Team 1 Form', 'team_1_fdr', 'ppg_team1', 'Cumulative_Avg_HST', 'Avg xG','Avg xG Team 1',
                  'Avg Team 1 Goal Difference', 'team1_player_average','time_period_encoded']
team2_features = ['Prev Team 2 Form', 'team_2_fdr', 'ppg_team2', 'Cumulative_Avg_AST', 
                  'Avg xG.1','Avg xG Team 2', 'Avg Team 2 Goal Difference','team2_player_average','time_period_encoded']

# Extract features and target variables
X_team1 = modelling_table[team1_features]
X_team2 = modelling_table[team2_features]
y_team1 = modelling_table['Team 1 Score']
y_team2 = modelling_table['Team 2 Score']

# Scale the features
scaler_team1 = StandardScaler()
scaler_team2 = StandardScaler()
X_team1_scaled = scaler_team1.fit_transform(X_team1)
X_team2_scaled = scaler_team2.fit_transform(X_team2)

# Define the bivariate Poisson log-likelihood function
def bivariate_poisson_loglik(params, X1, X2, y1, y2):
    """
    Computes the negative log-likelihood for the bivariate Poisson model.
    """
    beta1 = params[:X1.shape[1]]
    beta2 = params[X1.shape[1]:X1.shape[1] + X2.shape[1]]
    rho = params[-1]  # Shared covariance term

    # Compute Poisson rate parameters
    lambda1 = np.exp(np.dot(X1, beta1))
    lambda2 = np.exp(np.dot(X2, beta2))
    lambda12 = np.exp(rho)  # Shared component

    # Calculate log-likelihood
    loglik = (
        poisson.logpmf(y1, lambda1) +
        poisson.logpmf(y2, lambda2) +
        poisson.logpmf(np.minimum(y1, y2), lambda12)
    )
    return -np.sum(loglik)  # Negative log-likelihood for minimization

# Initial parameters
initial_params = np.ones(X_team1_scaled.shape[1] + X_team2_scaled.shape[1] + 1)

# Fit the model
result = minimize(
    bivariate_poisson_loglik,
    x0=initial_params,
    args=(X_team1_scaled, X_team2_scaled, y_team1.values, y_team2.values),
    method='L-BFGS-B'
)

# Extract model parameters
params = result.x
print("\nModel Parameters:\n", params)

# Make predictions for training data
def predict_scores(X1_scaled, X2_scaled, params):
    """
    Predict scores using the fitted bivariate Poisson model.
    """
    lambda1 = np.exp(np.dot(X1_scaled, params[:X1_scaled.shape[1]]))
    lambda2 = np.exp(np.dot(X2_scaled, params[X1_scaled.shape[1]:-1]))
    lambda12 = np.exp(params[-1])
    return lambda1, lambda2, lambda12

lambda1_pred, lambda2_pred, lambda12_pred = predict_scores(X_team1_scaled, X_team2_scaled, params)

# Combine predictions into a DataFrame
predictions = pd.DataFrame({
    'Team 1 Predicted Score': lambda1_pred,
    'Team 2 Predicted Score': lambda2_pred,
    'Shared Covariance Term': lambda12_pred
})

print("\nTraining Data Predictions:\n", predictions.head())

# Calculate classification accuracy
# Define actual outcomes
actual_outcomes = np.where(y_team1 > y_team2, 'Win',
                   np.where(y_team1 < y_team2, 'Lose', 'Draw'))

# Define predicted outcomes
predicted_outcomes = np.where(lambda1_pred > lambda2_pred, 'Win',
                      np.where(lambda1_pred < lambda2_pred, 'Lose', 'Draw'))

# Calculate accuracy and classification report
accuracy = accuracy_score(actual_outcomes, predicted_outcomes)
classification_report_output = classification_report(actual_outcomes, predicted_outcomes)

print(f"\nClassification Accuracy: {accuracy:.2f}")
print("\nClassification Report:\n", classification_report_output)



Model Parameters:
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]

Training Data Predictions:
    Team 1 Predicted Score  Team 2 Predicted Score  Shared Covariance Term
0              134.503783               40.151420                2.718282
1                0.000062                0.105272                2.718282
2                0.000006                0.146248                2.718282
3                0.078487                0.000027                2.718282
4                0.263601                0.010413                2.718282

Classification Accuracy: 0.45

Classification Report:
               precision    recall  f1-score   support

        Draw       0.00      0.00      0.00        51
        Lose       0.42      0.68      0.52        56
         Win       0.49      0.60      0.54        72

    accuracy                           0.45       179
   macro avg       0.30      0.43      0.35       179
weighted avg       0.33      0.45      0.38       179



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [933]:
# Recompute predictions using the previously defined function and parameters
lambda1_pred, lambda2_pred, lambda12_pred = predict_scores(X_team1_scaled, X_team2_scaled, params)

# Define actual outcomes
actual_outcomes = np.where(y_team1 > y_team2, 'Win',
                   np.where(y_team1 < y_team2, 'Lose', 'Draw'))

# Define predicted outcomes
predicted_outcomes = np.where(lambda1_pred > lambda2_pred, 'Win',
                      np.where(lambda1_pred < lambda2_pred, 'Lose', 'Draw'))

# Calculate accuracy and classification report
accuracy = accuracy_score(actual_outcomes, predicted_outcomes)
classification_report_output = classification_report(actual_outcomes, predicted_outcomes)

accuracy, classification_report_output


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


(0.3407821229050279,
 '              precision    recall  f1-score   support\n\n        Draw       0.00      0.00      0.00        51\n        Lose       0.30      0.45      0.36        56\n         Win       0.38      0.50      0.43        72\n\n    accuracy                           0.34       179\n   macro avg       0.23      0.32      0.26       179\nweighted avg       0.25      0.34      0.28       179\n')

In [925]:
new_data.head()

Unnamed: 0.1,Unnamed: 0,Team 1,Team 2,Team 1 Score,Team 2 Score,Prev Team 1 Form,Prev Team 2 Form,Prev Team 1 Goal Difference,Prev Team 2 Goal Difference,Prev Team 1 Points,Prev Team 2 Points,Cumulative_Avg_HST,Cumulative_Avg_AST,Cumulative_Avg_HF,Cumulative_Avg_AF,Cumulative_Avg_HC,Cumulative_Avg_AC,Cumulative_Avg_HY,team_1_fdr,team_2_fdr,Cumulative_Avg_AY,Cumulative_Avg_HR,Cumulative_Avg_AR,H2H_Home_Total_Wins_Last_4,H2H_Away_Total_Wins_Last_4,H2H_Draws_Last_4,Cum PPDA Team 2,Avg PPDA Team 1,Avg PPDA Team 2,Avg Deep Completions Team 1,Avg Deep Completions Team 2,Weighted Avg PPDA Team 1,Weighted Avg PPDA Team 2,Weighted Avg Deep Completions Team 1,Weighted Avg Deep Completions Team 2,Avg xG,Weighted Avg xG,Avg xG.1,Weighted Avg xG.1,Avg xG Team 1,Avg xG Team 2,Weighted Avg xG Team 1,Weighted Avg xG Team 2,Cum np_xg,Avg team1_np_xg,Avg team2_np_xg,Weighted Avg team1_np_xg,Weighted Avg team2_np_xg,Avg team1_expected_points,Avg team2_expected_points,Weighted Avg team1_expected_points,Weighted Avg team2_expected_points,Avg Team 1 Won,Avg Team 2 Won,Avg Team 1 Drawn,Avg Team 2 Drawn,Avg Team 1 Lost,Avg Team 2 Lost,Avg Team 1 Goals Scored,Avg Team 2 Goals Scored,Avg Team 1 Goals Conceded,Avg Team 2 Goals Conceded,Avg Team 1 Goal Difference,Avg Team 2 Goal Difference,time_period_encoded,ppg_team1,ppg_team2,team1_player_average,team2_player_average
0,189,Tottenham,Newcastle Utd,,,0.8,2.4,13,11,24,32,4.789474,4.0,13.052632,10.631579,5.210526,5.736842,2.736842,3.0,4.0,2.210526,0.052632,0.052632,,,,211.713639,9.969133,11.142823,10.368421,9.105263,29.382708,34.601398,30.559557,28.274238,1.836842,5.41385,1.694737,5.262604,2.093656,1.752172,6.170776,5.440954,70.787234,2.053595,1.672049,6.052702,5.192151,1.579384,1.540937,4.655027,4.785014,4.263158,4.263158,1.421053,3.052632,4.315789,2.684211,21.210526,13.210526,12.052632,11.263158,9.157895,1.947368,0,1.263158,1.684211,11.427816,9.600594
1,190,Southampton,Brentford,,,0.2,0.8,-27,0,6,24,7.611111,3.777778,8.5,9.888889,7.444444,4.444444,1.666667,2.0,2.0,2.0,0.055556,0.111111,,,,226.084585,12.367355,11.899189,6.052632,6.0,40.356632,36.950112,19.750693,18.631579,1.015789,3.314681,1.436842,4.461773,1.258535,1.54071,4.106798,4.78431,49.95486,1.168616,1.460587,3.81338,4.535506,0.693532,1.169984,2.263103,3.633109,0.526316,4.263158,1.315789,1.263158,8.157895,4.421053,6.473684,18.421053,20.368421,18.210526,-13.894737,0.210526,0,0.315789,1.333333,8.363351,0.0
2,191,Crystal Palace,Chelsea,,,1.6,1.4,-7,15,20,35,5.578947,5.368421,13.631579,10.894737,4.842105,5.0,3.421053,4.0,3.0,2.473684,0.105263,0.052632,,,,210.683989,13.709126,11.088631,6.157895,9.157895,43.291976,32.682281,19.445983,26.99169,1.357895,4.288089,2.026316,5.972299,1.612646,2.167603,5.092565,6.388724,68.780043,1.572584,2.047418,4.966055,6.034496,1.3417,1.660679,4.236947,4.894633,1.157895,5.473684,4.210526,2.736842,4.631579,1.789474,8.894737,21.315789,14.052632,11.842105,-5.157895,9.473684,0,1.052632,1.842105,10.914909,11.373515
3,192,Bournemouth,Everton,,,1.8,1.2,6,-9,30,17,4.166667,3.833333,10.5,10.166667,5.277778,4.611111,1.555556,2.0,3.0,1.555556,0.055556,0.055556,,,,279.058465,9.788856,15.503248,7.315789,5.444444,30.912178,46.509744,23.102493,16.333333,1.957895,6.182825,0.977778,2.933333,2.094688,1.086958,6.614804,3.260873,54.797291,1.854319,1.086958,5.855743,3.260873,1.838579,0.959494,5.806039,2.878483,3.894737,1.611111,2.894737,3.333333,3.210526,4.555556,14.210526,8.666667,13.052632,16.055556,1.157895,-7.388889,0,1.578947,0.944444,8.929617,9.039789
4,193,Aston Villa,Leicester City,,,1.4,0.2,-3,-20,29,14,4.842105,4.631579,11.631579,11.0,5.421053,4.894737,2.421053,2.0,4.0,2.105263,0.105263,0.0,,,,315.625745,14.234724,16.611881,7.842105,4.473684,42.704172,53.332882,23.526316,14.362881,1.526316,4.578947,0.989474,3.176731,1.848764,1.181223,5.546292,3.792346,53.763907,1.728579,1.1011,5.185738,3.535111,1.626495,0.813658,4.879484,2.61227,4.894737,1.631579,2.473684,3.421053,2.631579,4.947368,15.842105,12.842105,15.263158,19.947368,0.578947,-7.105263,0,1.526316,0.736842,8.962402,6.759556


In [927]:
# Load new data
new_data = pd.read_csv("prediction_data.csv")

# Function to preprocess and predict for new data
def predict_new_data(new_data, scaler_team1, scaler_team2, params):
    """
    Predict scores for new data.
    """
    # Align features
    new_team1 = new_data[team1_features]
    new_team2 = new_data[team2_features]

    # Scale the new data
    new_team1_scaled = scaler_team1.transform(new_team1)
    new_team2_scaled = scaler_team2.transform(new_team2)

    # Predict scores
    lambda1, lambda2, lambda12 = predict_scores(new_team1_scaled, new_team2_scaled, params)
    return pd.DataFrame({
        'Team 1 Predicted Score': lambda1,
        'Team 2 Predicted Score': lambda2,
        'Shared Covariance Term': lambda12
    })



# Predict for new data
try:
    new_predictions = predict_new_data(new_data, scaler_team1, scaler_team2, params)
    # Combine predictions for training data with Team 1 and Team 2 columns
    predictions_with_teams = pd.concat([
        new_data[['Team 1', 'Team 2']],  # Include Team 1 and Team 2 from modelling table
        new_predictions
    ], axis=1)
    print("\nNew Data Predictions:\n", predictions_with_teams.tail(10))
except KeyError as e:
    print("\nError: Missing required features in new data:", e)
except Exception as e:
    print("\nAn error occurred during prediction:", e)



New Data Predictions:
             Team 1             Team 2  Team 1 Predicted Score  \
0        Tottenham      Newcastle Utd            2.594740e+09   
1      Southampton          Brentford            3.119385e-06   
2   Crystal Palace            Chelsea            8.259286e+04   
3      Bournemouth            Everton            1.262257e+06   
4      Aston Villa     Leicester City            4.120845e+05   
5  Manchester City           West Ham            6.262083e+08   
6         Brighton            Arsenal            3.232552e+05   
7           Fulham       Ipswich Town            1.483957e+03   
8        Liverpool     Manchester Utd            8.237104e+16   
9           Wolves  Nottingham Forest            1.075538e-01   

   Team 2 Predicted Score  Shared Covariance Term  
0            6.652501e+06                2.718282  
1            1.037263e-01                2.718282  
2            5.891283e+10                2.718282  
3            4.273056e-04                2.718282  


In [929]:
# Add a "Difference" column to new_predictions DataFrame
new_predictions['Score Difference'] = abs(new_predictions['Team 1 Predicted Score'] - new_predictions['Team 2 Predicted Score'])

# Sort by "Score Difference" in descending order to identify the safest bets
safest_bets = new_predictions.sort_values(by='Score Difference', ascending=False)

# Display the top safest bets
print("\nSafest Bets (Highest Score Differences):\n", safest_bets)

# Optional: Save the safest bets to a CSV file for reference
safest_bets.to_csv("safest_bets.csv", index=False)



Safest Bets (Highest Score Differences):
    Team 1 Predicted Score  Team 2 Predicted Score  Shared Covariance Term  \
8            8.237104e+16            9.621221e+01                2.718282   
2            8.259286e+04            5.891283e+10                2.718282   
0            2.594740e+09            6.652501e+06                2.718282   
5            6.262083e+08            3.074293e+01                2.718282   
9            1.075538e-01            3.606112e+07                2.718282   
6            3.232552e+05            5.548395e+06                2.718282   
3            1.262257e+06            4.273056e-04                2.718282   
4            4.120845e+05            2.912717e-03                2.718282   
7            1.483957e+03            1.984472e-04                2.718282   
1            3.119385e-06            1.037263e-01                2.718282   

   Score Difference  
8      8.237104e+16  
2      5.891275e+10  
0      2.588088e+09  
5      6.262083e+08  

In [None]:
new_data = pd.read_csv("prediction_data.csv")
new_data.head(20)

In [None]:
count = new_data.isnull().sum()
print(count[count>0])