# Orbited Probit Model

In [721]:
import pandas as pd
# Remove the limit on the number of columns displayed
pd.set_option('display.max_columns', None)

In [722]:
data = pd.read_csv("modelling_table.csv")
len(data)

179

In [723]:
data.columns

Index(['Unnamed: 0', 'Team 1', 'Team 2', 'Team 1 Score', 'Team 2 Score',
       'Prev Team 1 Form', 'Prev Team 2 Form', 'Prev Team 1 Goal Difference',
       'Prev Team 2 Goal Difference', 'Prev Team 1 Points',
       'Prev Team 2 Points', 'Cumulative_Avg_HST', 'Cumulative_Avg_AST',
       'Cumulative_Avg_HF', 'Cumulative_Avg_AF', 'Cumulative_Avg_HC',
       'Cumulative_Avg_AC', 'Cumulative_Avg_HY', 'team_1_fdr', 'team_2_fdr',
       'Cumulative_Avg_AY', 'Cumulative_Avg_HR', 'Cumulative_Avg_AR',
       'H2H_Home_Total_Wins_Last_4', 'H2H_Away_Total_Wins_Last_4',
       'H2H_Draws_Last_4', 'Cum PPDA Team 2', 'Avg PPDA Team 1',
       'Avg PPDA Team 2', 'Avg Deep Completions Team 1',
       'Avg Deep Completions Team 2', 'Weighted Avg PPDA Team 1',
       'Weighted Avg PPDA Team 2', 'Weighted Avg Deep Completions Team 1',
       'Weighted Avg Deep Completions Team 2', 'Avg xG', 'Weighted Avg xG',
       'Avg xG.1', 'Weighted Avg xG.1', 'Avg xG Team 1', 'Avg xG Team 2',
       'Weighted Av

In [724]:
missing_values = data.isnull().sum()
# Filter to display columns with missing values only
missing_values_present = missing_values[missing_values > 0]

if not missing_values_present.empty:
    print("Columns with missing values:")
    print(missing_values_present)
else:
    print("No missing values found in the dataset.")

Columns with missing values:
Team 1 Score                            1
Team 2 Score                            1
team_1_fdr                              1
team_2_fdr                              1
H2H_Home_Total_Wins_Last_4            172
H2H_Away_Total_Wins_Last_4            172
H2H_Draws_Last_4                      172
Weighted Avg xG                         1
Weighted Avg xG.1                       1
Weighted Avg xG Team 1                  1
Weighted Avg xG Team 2                  1
Weighted Avg team1_np_xg                1
Weighted Avg team2_np_xg                1
Weighted Avg team1_expected_points      1
Weighted Avg team2_expected_points      1
dtype: int64


In [725]:
# Assuming 'data' is your pandas DataFrame
missing_values = data[data['team1_player_average'].isnull()]

# Display the rows with missing values
missing_values.head(19)

Unnamed: 0.1,Unnamed: 0,Team 1,Team 2,Team 1 Score,Team 2 Score,Prev Team 1 Form,Prev Team 2 Form,Prev Team 1 Goal Difference,Prev Team 2 Goal Difference,Prev Team 1 Points,Prev Team 2 Points,Cumulative_Avg_HST,Cumulative_Avg_AST,Cumulative_Avg_HF,Cumulative_Avg_AF,Cumulative_Avg_HC,Cumulative_Avg_AC,Cumulative_Avg_HY,team_1_fdr,team_2_fdr,Cumulative_Avg_AY,Cumulative_Avg_HR,Cumulative_Avg_AR,H2H_Home_Total_Wins_Last_4,H2H_Away_Total_Wins_Last_4,H2H_Draws_Last_4,Cum PPDA Team 2,Avg PPDA Team 1,Avg PPDA Team 2,Avg Deep Completions Team 1,Avg Deep Completions Team 2,Weighted Avg PPDA Team 1,Weighted Avg PPDA Team 2,Weighted Avg Deep Completions Team 1,Weighted Avg Deep Completions Team 2,Avg xG,Weighted Avg xG,Avg xG.1,Weighted Avg xG.1,Avg xG Team 1,Avg xG Team 2,Weighted Avg xG Team 1,Weighted Avg xG Team 2,Cum np_xg,Avg team1_np_xg,Avg team2_np_xg,Weighted Avg team1_np_xg,Weighted Avg team2_np_xg,Avg team1_expected_points,Avg team2_expected_points,Weighted Avg team1_expected_points,Weighted Avg team2_expected_points,Avg Team 1 Won,Avg Team 2 Won,Avg Team 1 Drawn,Avg Team 2 Drawn,Avg Team 1 Lost,Avg Team 2 Lost,Avg Team 1 Goals Scored,Avg Team 2 Goals Scored,Avg Team 1 Goals Conceded,Avg Team 2 Goals Conceded,Avg Team 1 Goal Difference,Avg Team 2 Goal Difference,time_period_encoded,ppg_team1,ppg_team2,team1_player_average,team2_player_average


In [726]:
data = data.drop(columns = ['H2H_Home_Total_Wins_Last_4','H2H_Away_Total_Wins_Last_4','H2H_Draws_Last_4'])


In [727]:
import numpy as np
import pandas as pd
from statsmodels.miscmodels.ordinal_model import OrderedModel
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

data = data.dropna()
print(data.columns)
# Discretize 'Team 1 Score' into ordinal categories (e.g., 0 = Low, 1 = Medium, 2 = High)
data['Team 1 Score Category'] = pd.cut(
    data['Team 1 Score'],
    bins=[-float('inf'), 0, 2, float('inf')],
    labels=[0, 1, 2]  # 0 = Low, 1 = Medium, 2 = High
).astype(int)
# Handle missing values for 'team1_player_average' if necessary
if 'team1_player_average' in data.columns:
    print("yes")
else:
    print("Column 'team1_player_average' not found in the dataset.")

# Feature selection for training
X_team1 = data

# Define target (dependent variable)
y_team1 = data['Team 1 Score Category']

# Scale the features
scaler = StandardScaler()
X_team1_scaled = scaler.fit_transform(X_team1)

# Fit the Ordered Probit model
ordered_model_team1 = OrderedModel(y_team1, X_team1_scaled, distr='probit')
result_team1 = ordered_model_team1.fit(method='bfgs')

# Display the summary of the model
print(result_team1.summary())

# Predict probabilities for each category (Low, Medium, High) for training data
predicted_probs = result_team1.predict()
predicted_probs_df = pd.DataFrame(predicted_probs, columns=['Low', 'Medium', 'High'])

# Assign the most likely category as the predicted class
predicted_classes = predicted_probs.argmax(axis=1)

# Evaluation metrics for the training data
accuracy = accuracy_score(y_team1, predicted_classes)
conf_matrix = confusion_matrix(y_team1, predicted_classes)
classification_rep = classification_report(y_team1, predicted_classes, target_names=['Low', 'Medium', 'High'])

print("Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)



Index(['Unnamed: 0', 'Team 1', 'Team 2', 'Team 1 Score', 'Team 2 Score',
       'Prev Team 1 Form', 'Prev Team 2 Form', 'Prev Team 1 Goal Difference',
       'Prev Team 2 Goal Difference', 'Prev Team 1 Points',
       'Prev Team 2 Points', 'Cumulative_Avg_HST', 'Cumulative_Avg_AST',
       'Cumulative_Avg_HF', 'Cumulative_Avg_AF', 'Cumulative_Avg_HC',
       'Cumulative_Avg_AC', 'Cumulative_Avg_HY', 'team_1_fdr', 'team_2_fdr',
       'Cumulative_Avg_AY', 'Cumulative_Avg_HR', 'Cumulative_Avg_AR',
       'Cum PPDA Team 2', 'Avg PPDA Team 1', 'Avg PPDA Team 2',
       'Avg Deep Completions Team 1', 'Avg Deep Completions Team 2',
       'Weighted Avg PPDA Team 1', 'Weighted Avg PPDA Team 2',
       'Weighted Avg Deep Completions Team 1',
       'Weighted Avg Deep Completions Team 2', 'Avg xG', 'Weighted Avg xG',
       'Avg xG.1', 'Weighted Avg xG.1', 'Avg xG Team 1', 'Avg xG Team 2',
       'Weighted Avg xG Team 1', 'Weighted Avg xG Team 2', 'Cum np_xg',
       'Avg team1_np_xg', 'Avg tea

ValueError: could not convert string to float: 'Brighton'

In [None]:
# Load the new data
new_data = pd.read_csv("prediction_data.csv")

# Extract features for prediction
X_new_team1 = new_data[['Prev Team 1 Form', 'team_1_fdr', 'ppg_team1', 
                        'Cumulative_Avg_HST', 'Avg Team 1 Goals Scored']]

# Ensure column order matches training data
X_new_team1 = X_new_team1[X_team1.columns]

# Scale the new data using the same scaler fitted on training data
X_new_team1_scaled = scaler.transform(X_new_team1)

# Predict probabilities for the new data
predicted_probs_new = result_team1.predict(X_new_team1_scaled)

# Convert probabilities into a DataFrame
predicted_probs_new_df = pd.DataFrame(predicted_probs_new, columns=['Low', 'Medium', 'High'])

# Assign the most likely category as the predicted class
predicted_probs_new_df['Predicted Class'] = predicted_probs_new_df.idxmax(axis=1)

# Display predictions
print(predicted_probs_new_df)


# Bivariate Poisson

In [None]:
len(modelling_table)

In [None]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize
from scipy.stats import poisson
from sklearn.preprocessing import StandardScaler

# Load the modelling table
modelling_table = pd.read_csv("modelling_table.csv")

# Define feature columns for Team 1 and Team 2
team1_features = ['Prev Team 1 Form', 'team_1_fdr', 'ppg_team1', 'Cumulative_Avg_HST']
team2_features = ['Prev Team 2 Form', 'team_2_fdr', 'ppg_team2', 'Cumulative_Avg_AF']

# Extract features and target variables
X_team1 = modelling_table[team1_features]
X_team2 = modelling_table[team2_features]
y_team1 = modelling_table['Team 1 Score']
y_team2 = modelling_table['Team 2 Score']

# Scale the features
scaler_team1 = StandardScaler()
scaler_team2 = StandardScaler()
X_team1_scaled = scaler_team1.fit_transform(X_team1)
X_team2_scaled = scaler_team2.fit_transform(X_team2)

# Define the bivariate Poisson log-likelihood function
def bivariate_poisson_loglik(params, X1, X2, y1, y2):
    """
    Computes the negative log-likelihood for the bivariate Poisson model.
    """
    beta1 = params[:X1.shape[1]]
    beta2 = params[X1.shape[1]:X1.shape[1] + X2.shape[1]]
    rho = params[-1]  # Shared covariance term

    # Compute Poisson rate parameters
    lambda1 = np.exp(np.dot(X1, beta1))
    lambda2 = np.exp(np.dot(X2, beta2))
    lambda12 = np.exp(rho)  # Shared component

    # Calculate log-likelihood
    loglik = (
        poisson.logpmf(y1, lambda1) +
        poisson.logpmf(y2, lambda2) +
        poisson.logpmf(np.minimum(y1, y2), lambda12)
    )
    return -np.sum(loglik)  # Negative log-likelihood for minimization

# Initial parameters
initial_params = np.ones(X_team1_scaled.shape[1] + X_team2_scaled.shape[1] + 1)

# Fit the model
result = minimize(
    bivariate_poisson_loglik,
    x0=initial_params,
    args=(X_team1_scaled, X_team2_scaled, y_team1.values, y_team2.values),
    method='L-BFGS-B'
)

# Extract model parameters
params = result.x
print("\nModel Parameters:\n", params)

# Make predictions for training data
def predict_scores(X1_scaled, X2_scaled, params):
    """
    Predict scores using the fitted bivariate Poisson model.
    """
    lambda1 = np.exp(np.dot(X1_scaled, params[:X1_scaled.shape[1]]))
    lambda2 = np.exp(np.dot(X2_scaled, params[X1_scaled.shape[1]:-1]))
    lambda12 = np.exp(params[-1])
    return lambda1, lambda2, lambda12

lambda1_pred, lambda2_pred, lambda12_pred = predict_scores(X_team1_scaled, X_team2_scaled, params)

# Combine predictions into a DataFrame
predictions = pd.DataFrame({
    'Team 1 Predicted Score': lambda1_pred,
    'Team 2 Predicted Score': lambda2_pred,
    'Shared Covariance Term': lambda12_pred
})

# Combine predictions for training data with Team 1 and Team 2 columns
predictions_with_teams = pd.concat([
    modelling_table[['Team 1', 'Team 2']],  # Include Team 1 and Team 2 from modelling table
    predictions
], axis=1)

print("\nTraining Data Predictions with Teams:\n", predictions_with_teams.tail())




In [None]:
new_data.head()

In [None]:
# Load new data
new_data = pd.read_csv("prediction_data.csv")

# Function to preprocess and predict for new data
def predict_new_data(new_data, scaler_team1, scaler_team2, params):
    """
    Predict scores for new data.
    """
    # Align features
    new_team1 = new_data[team1_features]
    new_team2 = new_data[team2_features]

    # Scale the new data
    new_team1_scaled = scaler_team1.transform(new_team1)
    new_team2_scaled = scaler_team2.transform(new_team2)

    # Predict scores
    lambda1, lambda2, lambda12 = predict_scores(new_team1_scaled, new_team2_scaled, params)
    return pd.DataFrame({
        'Team 1 Predicted Score': lambda1,
        'Team 2 Predicted Score': lambda2,
        'Shared Covariance Term': lambda12
    })



# Predict for new data
try:
    new_predictions = predict_new_data(new_data, scaler_team1, scaler_team2, params)
    # Combine predictions for training data with Team 1 and Team 2 columns
    predictions_with_teams = pd.concat([
        new_data[['Team 1', 'Team 2']],  # Include Team 1 and Team 2 from modelling table
        new_predictions
    ], axis=1)
    print("\nNew Data Predictions:\n", predictions_with_teams.tail(10))
except KeyError as e:
    print("\nError: Missing required features in new data:", e)
except Exception as e:
    print("\nAn error occurred during prediction:", e)


In [None]:
# Add a "Difference" column to new_predictions DataFrame
new_predictions['Score Difference'] = abs(new_predictions['Team 1 Predicted Score'] - new_predictions['Team 2 Predicted Score'])

# Sort by "Score Difference" in descending order to identify the safest bets
safest_bets = new_predictions.sort_values(by='Score Difference', ascending=False)

# Display the top safest bets
print("\nSafest Bets (Highest Score Differences):\n", safest_bets)

# Optional: Save the safest bets to a CSV file for reference
safest_bets.to_csv("safest_bets.csv", index=False)


In [732]:
new_data = pd.read_csv("prediction_data.csv")
new_data.head(11)

Unnamed: 0.1,Unnamed: 0,Team 1,Team 2,Team 1 Score,Team 2 Score,Prev Team 1 Form,Prev Team 2 Form,Prev Team 1 Goal Difference,Prev Team 2 Goal Difference,Prev Team 1 Points,Prev Team 2 Points,Cumulative_Avg_HST,Cumulative_Avg_AST,Cumulative_Avg_HF,Cumulative_Avg_AF,Cumulative_Avg_HC,Cumulative_Avg_AC,Cumulative_Avg_HY,team_1_fdr,team_2_fdr,Cumulative_Avg_AY,Cumulative_Avg_HR,Cumulative_Avg_AR,H2H_Home_Total_Wins_Last_4,H2H_Away_Total_Wins_Last_4,H2H_Draws_Last_4,Cum PPDA Team 2,Avg PPDA Team 1,Avg PPDA Team 2,Avg Deep Completions Team 1,Avg Deep Completions Team 2,Weighted Avg PPDA Team 1,Weighted Avg PPDA Team 2,Weighted Avg Deep Completions Team 1,Weighted Avg Deep Completions Team 2,Avg xG,Weighted Avg xG,Avg xG.1,Weighted Avg xG.1,Avg xG Team 1,Avg xG Team 2,Weighted Avg xG Team 1,Weighted Avg xG Team 2,Cum np_xg,Avg team1_np_xg,Avg team2_np_xg,Weighted Avg team1_np_xg,Weighted Avg team2_np_xg,Avg team1_expected_points,Avg team2_expected_points,Weighted Avg team1_expected_points,Weighted Avg team2_expected_points,Avg Team 1 Won,Avg Team 2 Won,Avg Team 1 Drawn,Avg Team 2 Drawn,Avg Team 1 Lost,Avg Team 2 Lost,Avg Team 1 Goals Scored,Avg Team 2 Goals Scored,Avg Team 1 Goals Conceded,Avg Team 2 Goals Conceded,Avg Team 1 Goal Difference,Avg Team 2 Goal Difference,time_period_encoded,ppg_team1,ppg_team2,team1_player_average,team2_player_average
0,189,Brentford,Arsenal,,,0.8,2.2,0,19,24,36,5.777778,2.833333,10.888889,10.333333,6.388889,3.222222,2.333333,4.0,4.0,2.444444,0.055556,0.111111,,,,162.048375,12.560255,9.002688,6.333333,9.388889,38.378556,27.508212,19.351852,28.688272,1.516667,4.709649,1.727778,5.365205,1.626305,2.192117,5.050104,6.8071,65.68691,1.54173,2.107542,4.787479,6.544474,1.234983,2.034283,3.834948,6.316985,4.111111,5.333333,1.166667,3.055556,4.222222,1.111111,17.666667,17.833333,17.444444,8.833333,0.222222,9.0,1,1.333333,2.0,9.277411,10.90088
1,190,Tottenham,Newcastle Utd,,,0.8,2.4,13,11,24,32,4.789474,4.0,13.052632,10.631579,5.210526,5.736842,2.736842,3.0,4.0,2.210526,0.052632,0.052632,,,,211.713639,9.969133,11.142823,10.368421,9.105263,29.382708,34.601398,30.559557,28.274238,1.836842,5.418684,1.694737,5.338421,2.093656,1.752172,6.176286,5.519341,70.787234,2.053595,1.672049,6.058106,5.266953,1.579384,1.540937,4.659183,4.853951,4.263158,4.263158,1.421053,3.052632,4.315789,2.684211,21.210526,13.210526,12.052632,11.263158,9.157895,1.947368,0,1.263158,1.684211,11.427816,9.600594
2,191,Southampton,Brentford,,,0.2,0.8,-27,0,6,24,,,,,,,,2.0,2.0,,,,,,,,12.367355,,6.052632,,40.356632,,19.750693,,1.015789,3.250526,1.436842,4.382368,1.258535,,4.027311,,49.95486,1.168616,1.460587,3.739573,4.454789,0.693532,1.169984,2.219301,3.568452,0.526316,4.263158,1.315789,1.263158,8.157895,4.421053,6.473684,18.421053,20.368421,18.210526,-13.894737,0.210526,0,0.315789,1.333333,8.363351,0.0
3,192,Crystal Palace,Chelsea,,,1.6,1.4,-7,15,20,35,5.578947,5.368421,13.631579,10.894737,4.842105,5.0,3.421053,4.0,3.0,2.473684,0.105263,0.052632,,,,210.683989,13.709126,11.088631,6.157895,9.157895,43.291976,32.682281,19.445983,26.99169,1.357895,4.345263,2.026316,5.977632,1.612646,2.167603,5.160466,6.394429,68.780043,1.572584,2.047418,5.032269,6.039884,1.3417,1.660679,4.29344,4.899003,1.157895,5.473684,4.210526,2.736842,4.631579,1.789474,8.894737,21.315789,14.052632,11.842105,-5.157895,9.473684,0,1.052632,1.842105,10.914909,11.373515
4,193,Bournemouth,Everton,,,1.8,1.2,6,-9,30,17,,,,,,,,2.0,3.0,,,,,,,,9.788856,,7.315789,,30.912178,,23.102493,,1.957895,6.069474,0.926316,,2.094688,,6.493533,,54.797291,1.854319,1.029749,5.748387,,1.838579,0.908995,5.699595,,3.894737,1.684211,2.894737,3.421053,3.210526,4.631579,14.210526,8.947368,13.052632,16.315789,1.157895,-7.368421,0,1.578947,0.944444,8.929617,9.039789
5,194,Aston Villa,Leicester City,,,1.4,0.2,-3,-20,29,14,4.842105,4.631579,11.631579,11.0,5.421053,4.894737,2.421053,2.0,4.0,2.105263,0.105263,0.0,,,,315.625745,14.234724,16.611881,7.842105,4.473684,42.704172,53.332882,23.526316,14.362881,1.526316,4.502632,0.989474,3.215789,1.848764,1.181223,5.453854,3.838973,53.763907,1.728579,1.1011,5.099309,3.578575,1.626495,0.813658,4.798159,2.644388,4.894737,1.631579,2.473684,3.421053,2.631579,4.947368,15.842105,12.842105,15.263158,19.947368,0.578947,-7.105263,0,1.526316,0.736842,8.962402,6.759556
6,195,Manchester City,West Ham,,,1.0,1.6,6,-12,31,23,4.842105,4.947368,10.526316,11.0,6.210526,5.157895,1.947368,2.0,4.0,1.842105,0.0,0.105263,,,,260.481212,12.749582,13.709537,12.736842,6.421053,38.248747,43.293276,38.210526,20.277008,1.789474,5.278947,1.421053,4.547368,1.96848,1.517271,5.807017,4.855269,62.423445,1.888358,1.397087,5.570655,4.470677,1.577689,1.056168,4.654184,3.379739,5.947368,3.0,1.894737,2.421053,2.157895,4.578947,19.421053,12.789474,12.789474,17.684211,6.631579,-4.894737,0,1.631579,1.210526,11.150033,4.701582
7,196,Brighton,Arsenal,,,0.8,2.2,1,19,27,36,,,,,,,,4.0,3.0,,,,,,,,9.653669,,8.052632,,30.485269,,25.429363,,1.378947,4.412632,1.636842,5.074211,1.55762,,4.984384,,67.530545,1.55762,1.996619,4.984384,6.189519,1.403374,1.927216,4.490796,5.974369,4.157895,5.578947,4.052632,3.210526,1.789474,1.157895,16.684211,18.736842,13.578947,9.210526,3.105263,9.526316,1,1.421053,2.0,6.742145,0.0
8,197,Fulham,Ipswich Town,,,1.4,1.2,3,-15,29,15,4.894737,4.052632,10.368421,12.315789,5.894737,5.105263,1.947368,2.0,3.0,2.105263,0.105263,0.052632,,,,273.069831,13.889236,14.372096,5.842105,3.842105,41.667707,45.385567,17.526316,12.132964,1.347368,3.974737,0.931579,2.934474,1.58201,1.165431,4.666929,3.671108,49.917869,1.501887,1.12537,4.430566,3.544914,1.606774,0.751789,4.739982,2.368137,3.894737,0.736842,3.368421,4.210526,2.736842,5.052632,14.052632,9.210526,12.789474,18.842105,1.263158,-9.631579,0,1.526316,0.789474,9.137281,6.752342
9,198,Liverpool,Manchester Utd,,,2.2,0.6,28,-5,45,22,4.368421,4.263158,10.947368,10.526316,5.368421,4.0,2.0,3.0,5.0,2.105263,0.105263,0.052632,0.0,0.0,0.0,204.957345,,10.787229,,6.947368,,32.929435,,21.207756,2.168421,,1.4,4.41,,1.552256,,4.889605,71.160617,2.273163,1.472133,,4.637219,2.194621,1.457342,,4.590628,7.789474,3.526316,1.105263,2.421053,0.842105,4.052632,20.421053,11.105263,6.684211,11.947368,13.736842,-0.842105,1,2.5,1.157895,11.470604,8.859151
