In [25]:
from predict_odds import match_odds 
from predict_odds import get_probability_array
from predict_odds import over_under_odds
import pandas as pd
import numpy as np

In [26]:
def df_epl_24_25(url):
    # Read CSV, standardise team names, and remove newly promoted teams
    df = pd.read_csv(url)
    columns = ['HomeTeam', 'AwayTeam', 'FTHG', 'FTAG', 'FTR', 'B365H', 'B365D', 'B365A', 'B365>2.5', 'B365<2.5']
    df['HomeTeam'] = df['HomeTeam'].replace('Man United', 'Manchester United')
    df['AwayTeam'] = df['AwayTeam'].replace('Man United', 'Manchester United')
    df['HomeTeam'] = df['HomeTeam'].replace('Man City', 'Manchester City')
    df['AwayTeam'] = df['AwayTeam'].replace('Man City', 'Manchester City')
    df['HomeTeam'] = df['HomeTeam'].replace("Nott'm Forest", 'Nottingham Forest')
    df['AwayTeam'] = df['AwayTeam'].replace("Nott'm Forest", 'Nottingham Forest')
    df['HomeTeam'] = df['HomeTeam'].replace('Wolves', 'Wolverhampton Wanderers')
    df['AwayTeam'] = df['AwayTeam'].replace('Wolves', 'Wolverhampton Wanderers')
    df['HomeTeam'] = df['HomeTeam'].replace('Newcastle', 'Newcastle United')
    df['AwayTeam'] = df['AwayTeam'].replace('Newcastle', 'Newcastle United')
    df = df[~df['HomeTeam'].isin(['Leicester', 'Southampton', 'Ipswich'])]
    df = df[~df['AwayTeam'].isin(['Leicester', 'Southampton', 'Ipswich'])]
    df = df.reset_index(drop=True)
    df = df[columns]
    return df

In [27]:
# Read data files

attacking_scores_csv_path = './data/data_EPL/attacking_scores.csv'
defending_scores_csv_path = './data/data_EPL/defending_scores.csv'
home_advantage_csv_path = './data/data_EPL/home_advantage.csv'
df_attack = pd.read_csv(attacking_scores_csv_path, index_col='team')
df_defence = pd.read_csv(defending_scores_csv_path, index_col='team')
df_home_advantage = pd.read_csv(home_advantage_csv_path, index_col='parameter')

In [28]:
def add_odds_to_df(df):
    # Calculate predicted odds for each match
    predicted_home_odds = []
    predicted_draw_odds = []
    predicted_away_odds = []
    over_2half_odds = []
    under_2half_odds = []
    for i in range(len(df)):
        prob_array = get_probability_array(df['HomeTeam'][i], df['AwayTeam'][i], df_attack, df_defence, df_home_advantage)
        result_odds = match_odds(prob_array)
        over_under_2half_odds = over_under_odds(prob_array, 2.5)
        predicted_home_odds.append(result_odds[0])
        predicted_draw_odds.append(result_odds[1])
        predicted_away_odds.append(result_odds[2])
        over_2half_odds.append(over_under_2half_odds[0])
        under_2half_odds.append(over_under_2half_odds[1])
    df['Predicted_H'] = predicted_home_odds
    df['Predicted_D'] = predicted_draw_odds
    df['Predicted_A'] = predicted_away_odds
    df['Predicted_Over2.5'] = over_2half_odds
    df['Predicted_Under2.5'] = under_2half_odds
    return df

In [29]:
def kelly_criterion(actual_odds, predicted_odds, wallet):
    p = 1/predicted_odds
    q = 1 - p
    b = actual_odds-1
    return (b*p-q)*wallet/b

In [30]:
def count_winnings(df, wallet):
    winnings = 0
    for i in range(len(df)):
        # Bet if actual odds are more than predicted odds
        if df['B365H'][i] > df['Predicted_H'][i]:
            # Bet for home win
            toBet = kelly_criterion(df['B365H'][i], df['Predicted_H'][i], wallet)
            if df['FTR'][i] == 'H': # Home Win
                winnings += toBet * (df['B365H'][i] - 1)
            else:
                winnings -= toBet
        if df['B365D'][i] > df['Predicted_D'][i]:
            # Bet for draw
            toBet = kelly_criterion(df['B365D'][i], df['Predicted_D'][i], wallet)
            if df['FTR'][i] == 'D': # Draw
                winnings += toBet * (df['B365D'][i] - 1)
            else:
                winnings -= toBet
        if df['B365A'][i] > df['Predicted_A'][i]:
            # Bet for away win
            toBet = kelly_criterion(df['B365A'][i], df['Predicted_A'][i], wallet)
            if df['FTR'][i] == 'A': # Draw
                winnings += toBet * (df['B365A'][i] - 1)
            else:
                winnings -= toBet
    return winnings

In [18]:
# Count potential winnings for 2024/25 season 

df_epl2425 = df_epl_24_25('./data/betting_odds/EPL_24_25.csv')
df_epl2425 = add_odds_to_df(df_epl2425)
wallet = 100
print(f'Potential winnings for 2024/25 season given ${wallet}: ${np.round(count_winnings(df_epl2425, 100),2)}')

Potential winnings for 2024/25 season given $100: $-8.45


In [19]:
def df_epl_23_24(url):
    # Read CSV, standardise team names, and remove newly promoted teams
    df = pd.read_csv(url)
    columns = ['HomeTeam', 'AwayTeam', 'FTHG', 'FTAG', 'FTR', 'B365H', 'B365D', 'B365A', 'B365>2.5', 'B365<2.5']
    df['HomeTeam'] = df['HomeTeam'].replace('Man United', 'Manchester United')
    df['AwayTeam'] = df['AwayTeam'].replace('Man United', 'Manchester United')
    df['HomeTeam'] = df['HomeTeam'].replace('Man City', 'Manchester City')
    df['AwayTeam'] = df['AwayTeam'].replace('Man City', 'Manchester City')
    df['HomeTeam'] = df['HomeTeam'].replace("Nott'm Forest", 'Nottingham Forest')
    df['AwayTeam'] = df['AwayTeam'].replace("Nott'm Forest", 'Nottingham Forest')
    df['HomeTeam'] = df['HomeTeam'].replace('Wolves', 'Wolverhampton Wanderers')
    df['AwayTeam'] = df['AwayTeam'].replace('Wolves', 'Wolverhampton Wanderers')
    df['HomeTeam'] = df['HomeTeam'].replace('Newcastle', 'Newcastle United')
    df['AwayTeam'] = df['AwayTeam'].replace('Newcastle', 'Newcastle United')
    df = df.reset_index(drop=True)
    df = df[columns]
    return df

In [38]:
df_epl2324 = df_epl_23_24('./data/betting_odds/EPL_23_24.csv')
df_epl2324 = add_odds_to_df(df_epl2324)
wallet = 100
print(f'Potential winnings for 2023/24 season given ${wallet}: ${np.round(count_winnings(df_epl2324, 100),2)}')

Potential winnings for 2023/24 season given $100: $508.13


In [35]:
def count_winnings_over_under(df, wallet):
    winnings = 0
    for i in range(len(df)):
        # Bet if actual odds are more than predicted odds
        if df['B365>2.5'][i] > df['Predicted_Over2.5'][i]:
            toBet = kelly_criterion(df['B365>2.5'][i], df['Predicted_Over2.5'][i], wallet)
            if df['FTHG'][i] + df['FTHG'][i] > 2.5:
                winnings += toBet * (df['B365>2.5'][i] - 1)
            else:
                winnings -= toBet
        if df['B365<2.5'][i] > df['Predicted_Under2.5'][i]:
            toBet = kelly_criterion(df['B365<2.5'][i], df['Predicted_Under2.5'][i], wallet)
            if df['FTHG'][i] + df['FTHG'][i] < 2.5: 
                winnings += toBet * (df['B365<2.5'][i] - 1)
            else:
                winnings -= toBet
    return winnings

In [36]:
df_epl2425 = df_epl_24_25('./data/betting_odds/EPL_24_25.csv')
df_epl2425 = add_odds_to_df(df_epl2425)
df_epl2425.head()

Unnamed: 0,HomeTeam,AwayTeam,FTHG,FTAG,FTR,B365H,B365D,B365A,B365>2.5,B365<2.5,Predicted_H,Predicted_D,Predicted_A,Predicted_Over2.5,Predicted_Under2.5
0,Manchester United,Fulham,1,0,H,1.6,4.2,5.25,1.53,2.5,2.15,4.05,3.46,1.87,2.14
1,Arsenal,Wolverhampton Wanderers,2,0,H,1.18,7.5,13.0,1.44,2.75,1.23,7.85,17.48,1.61,2.65
2,Everton,Brighton,0,3,A,2.63,3.3,2.63,1.8,2.0,2.67,3.42,3.0,2.68,1.6
3,Nottingham Forest,Bournemouth,1,1,D,2.45,3.5,2.8,1.73,2.1,2.44,4.04,2.92,1.81,2.23
4,West Ham,Aston Villa,1,2,A,2.45,3.6,2.75,1.57,2.38,3.15,4.84,2.1,1.34,3.91


In [37]:
count_winnings_over_under(df_epl2425, 100)

np.float64(-63.88135112590865)

In [39]:
count_winnings_over_under(df_epl2324, 100)

np.float64(370.9492368656038)