In [1]:
from SimulationMethods import match_sim
from DataPreprocessor import format_data
import pandas as pd
import time

In [2]:
csv_file = 'PL_23-24_Simulation_Dataset.csv'
df = format_data(csv_file)

In [3]:
def get_data(df, home_team_abbr, away_team_abbr):
    
    home_df = df[df['Abbr'] == home_team_abbr]
    away_df = df[df['Abbr'] == away_team_abbr]
    
    return home_df, away_df

In [4]:
def calc_xpts(wins, draws, losses):
    
    sample_size = wins + draws + losses
    xpts = (wins * 3 + draws) / sample_size
    
    return round(xpts, 2)

In [5]:
def simulate_match(df, home_team_abbr, away_team_abbr, sample_size):
    
    home_df, away_df = get_data(df, home_team_abbr, away_team_abbr)
    home_df_index = home_df.index[0]
    away_df_index = away_df.index[0]
    
    home_calc_xg, away_calc_xg, result_set = match_sim(sample_size, home_df['xG_Home_P90'][home_df_index], home_df['xGA_Home_P90'][home_df_index], 
                                                       away_df['xG_Away_P90'][away_df_index], away_df['xGA_Away_P90'][away_df_index])
    
    home_wins = result_set[0]
    draws = result_set[1]
    away_wins = result_set[2]
    
    home_xpts = calc_xpts(home_wins, draws, away_wins)
    away_xpts = calc_xpts(away_wins, draws, home_wins)
    
    results = dict()
    results['home_team'] = home_team_abbr
    results['away_team'] = away_team_abbr
    results['home_xg'] = home_calc_xg
    results['away_xg'] = away_calc_xg
    results['home_xpts'] = home_xpts
    results['away_xpts'] = away_xpts
    
    return results

In [6]:
def compile_results(season_results, team_abbr):
    
    total_xg = 0.0
    total_xga = 0.0
    total_xpts = 0.0
    
    for result in season_results:
        if result['home_team'] == team_abbr:
            total_xg = total_xg + result['home_xg']
            total_xga = total_xga + result['away_xg']
            total_xpts = total_xpts + result['home_xpts']
        elif result['away_team'] == team_abbr:
            total_xg = total_xg + result['away_xg']
            total_xga = total_xga + result['home_xg']
            total_xpts = total_xpts + result['away_xpts']
    
    season_metrics = dict()
    season_metrics['Team'] = team_abbr
    season_metrics['xG'] = round(total_xg, 2)
    season_metrics['xGA'] = round(total_xga, 2)
    season_metrics['xGD'] = round(total_xg - total_xga, 2)
    season_metrics['xPts'] = round(total_xpts, 2)
    
    return season_metrics

In [7]:
def simulate_season(sample_size):

    season_results = []
    league_metrics = []

    for home_team_abbr in df['Abbr']:
        for away_team_abbr in df['Abbr']:
            if away_team_abbr != home_team_abbr:
                results = simulate_match(df, home_team_abbr, away_team_abbr, sample_size)
                season_results.append(results)

    for team_abbr in df['Abbr']:
        season_metrics = compile_results(season_results, team_abbr)
        league_metrics.append(season_metrics)
        
    return league_metrics

In [8]:
def create_standings(sorted_league_metrics):
    print('Pos' + '\tTeam' + '\txPts' + '\txG'+ '\txGA'+ '\txGD')
    count = 1
    for team in sorted_league_metrics:
        line = str(count) + '\t' + team['Team'] + '\t' + str(team['xPts'])
        line = line + '\t' + str(team['xG']) + '\t' + str(team['xGA']) + '\t' + str(team['xGD'])
        print(line)
        count = count + 1

In [9]:
start_ts = time.time()
league_metrics = simulate_season(1000)
end_ts = time.time()

time_taken = end_ts - start_ts
print("Time taken for execution:", time_taken)

Time taken for execution: 28.600541591644287


In [10]:
sorted_league_metrics = sorted(league_metrics, key=lambda d: d['xPts'], reverse = True)
create_standings(sorted_league_metrics)

Pos	Team	xPts	xG	xGA	xGD
1	ARS	83.81	108.57	45.04	63.53
2	MCI	83.44	106.94	42.04	64.9
3	NEW	82.11	127.94	58.15	69.79
4	LIV	75.09	130.6	69.2	61.4
5	CHE	73.38	106.55	64.05	42.5
6	BRE	70.64	110.08	72.1	37.98
7	TOT	68.85	104.08	70.56	33.52
8	AVL	64.86	111.14	83.55	27.59
9	BRI	63.02	105.53	83.92	21.61
10	EVE	60.12	91.76	76.42	15.34
11	MUN	45.94	81.71	99.49	-17.78
12	NOT	45.79	67.28	87.9	-20.62
13	CRY	43.09	60.64	82.53	-21.89
14	WHU	41.26	78.86	109.42	-30.56
15	WOL	41.2	73.58	102.92	-29.34
16	LUT	40.82	68.58	109.96	-41.38
17	FUL	34.1	61.06	106.08	-45.02
18	BOU	25.94	59.81	131.97	-72.16
19	BUR	25.04	47.96	111.4	-63.44
20	SHU	15.8	42.45	138.42	-95.97
