In [1]:
import pandas as pd
import numpy as np
from scipy.stats import poisson
from glob import glob

In [2]:
files = glob('./statistics/EPL*.csv')

matches = pd.DataFrame()

for file in files:
    matches = pd.concat([ 
        matches, 
        pd.read_csv(file, usecols=range(1,12), date_parser='pandas.to_datetime')])

matches['Date'] = pd.to_datetime(matches['Date'], dayfirst=True)
matches.dropna(how = 'all')
matches.to_csv('./all_matches.csv')
matches.info()
matches.head(10)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 11449 entries, 0 to 379
Data columns (total 18 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   Date         10752 non-null  datetime64[ns]
 1   HomeTeam     10752 non-null  object        
 2   AwayTeam     10752 non-null  object        
 3   FTHG         10752 non-null  float64       
 4   FTAG         10752 non-null  float64       
 5   FTR          10752 non-null  object        
 6   HTHG         9828 non-null   float64       
 7   HTAG         9828 non-null   float64       
 8   HTR          9828 non-null   object        
 9   Unnamed: 10  0 non-null      float64       
 10  Unnamed: 11  0 non-null      float64       
 11  Referee      7928 non-null   object        
 12  HS           6460 non-null   float64       
 13  Unnamed: 7   0 non-null      float64       
 14  Unnamed: 8   0 non-null      float64       
 15  Unnamed: 9   0 non-null      float64       
 16  Time  

Unnamed: 0,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,Unnamed: 10,Unnamed: 11,Referee,HS,Unnamed: 7,Unnamed: 8,Unnamed: 9,Time,Attendance
0,1996-08-17,Arsenal,West Ham,2.0,0.0,H,2.0,0.0,H,,,,,,,,,
1,1996-08-17,Blackburn,Tottenham,0.0,2.0,A,0.0,1.0,A,,,,,,,,,
2,1996-08-17,Coventry,Nott'm Forest,0.0,3.0,A,0.0,2.0,A,,,,,,,,,
3,1996-08-17,Derby,Leeds,3.0,3.0,D,0.0,1.0,A,,,,,,,,,
4,1996-08-17,Everton,Newcastle,2.0,0.0,H,2.0,0.0,H,,,,,,,,,
5,1996-08-17,Middlesbrough,Liverpool,3.0,3.0,D,2.0,2.0,D,,,,,,,,,
6,1996-08-17,Sheffield Weds,Aston Villa,2.0,1.0,H,0.0,0.0,D,,,,,,,,,
7,1996-08-17,Sunderland,Leicester,0.0,0.0,D,0.0,0.0,D,,,,,,,,,
8,1996-08-17,Wimbledon,Man United,0.0,3.0,A,0.0,1.0,A,,,,,,,,,
9,1996-08-18,Southampton,Chelsea,0.0,0.0,D,0.0,0.0,D,,,,,,,,,


In [3]:
last_10_matches = (matches['Date'] > '2010-07') & (matches['Date'] < '2021-05')

matches[last_10_matches].groupby(['HomeTeam', 'AwayTeam']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,FTHG,FTAG,HTHG,HTAG,Unnamed: 10,Unnamed: 11,HS,Unnamed: 7,Unnamed: 8,Unnamed: 9,Attendance
HomeTeam,AwayTeam,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Arsenal,Aston Villa,2.375,1.375,0.750000,0.625000,,,18.833333,,,,
Arsenal,Birmingham,2.000,1.000,1.000000,1.000000,,,18.000000,,,,
Arsenal,Blackburn,3.500,0.500,1.500000,0.500000,,,19.500000,,,,
Arsenal,Blackpool,6.000,0.000,3.000000,0.000000,,,26.000000,,,,
Arsenal,Bolton,3.500,0.500,0.500000,0.500000,,,23.000000,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
Wolves,Tottenham,1.400,2.200,0.400000,1.200000,,,13.000000,,,,
Wolves,Watford,1.000,1.000,0.500000,1.000000,,,10.000000,,,,
Wolves,West Brom,2.000,3.000,1.666667,0.666667,,,12.000000,,,,
Wolves,West Ham,2.000,1.000,0.750000,0.750000,,,14.500000,,,,


In [4]:
matches[last_10_matches]['HomeTeam'].value_counts()

Liverpool           207
Arsenal             207
Man City            207
Tottenham           206
Man United          206
Chelsea             206
Everton             206
West Ham            188
Newcastle           187
West Brom           168
Southampton         167
Stoke               152
Crystal Palace      149
Aston Villa         149
Swansea             133
Sunderland          133
Leicester           131
Fulham              112
Burnley             111
Norwich              95
Watford              95
Bournemouth          95
Wolves               93
Brighton             73
Wigan                57
QPR                  57
Hull                 57
Cardiff              38
Bolton               38
Blackburn            38
Huddersfield         38
Sheffield United     36
Birmingham           19
Blackpool            19
Reading              19
Middlesbrough        19
Leeds                17
Name: HomeTeam, dtype: int64

In [5]:
h2h = matches[last_10_matches].groupby(['HomeTeam', 'AwayTeam']).mean()
matches[last_10_matches].groupby(['HomeTeam', 'AwayTeam']).get_group(('Man United', 'Liverpool')).sort_values('Date')

Unnamed: 0,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,Unnamed: 10,Unnamed: 11,Referee,HS,Unnamed: 7,Unnamed: 8,Unnamed: 9,Time,Attendance
48,2010-09-19,Man United,Liverpool,3.0,2.0,H,1.0,0.0,H,,,H Webb,16.0,,,,,
244,2012-02-11,Man United,Liverpool,2.0,1.0,H,0.0,0.0,D,,,P Dowd,11.0,,,,,
217,2013-01-13,Man United,Liverpool,2.0,1.0,H,1.0,0.0,H,,,H Webb,15.0,,,,,
289,2014-03-16,Man United,Liverpool,0.0,3.0,A,0.0,1.0,A,,,M Clattenburg,13.0,,,,,
157,2014-12-14,Man United,Liverpool,3.0,0.0,H,2.0,0.0,H,,,M Atkinson,11.0,,,,,
43,2015-09-12,Man United,Liverpool,3.0,1.0,H,0.0,0.0,D,,,M Oliver,9.0,,,,,
209,2017-01-15,Man United,Liverpool,1.0,1.0,D,0.0,1.0,A,,,M Oliver,9.0,,,,,
293,2018-03-10,Man United,Liverpool,2.0,1.0,H,2.0,0.0,H,,,C Pawson,5.0,,,,,
268,2019-02-24,Man United,Liverpool,0.0,0.0,D,0.0,0.0,D,,,M Oliver,6.0,,,,,
88,2019-10-20,Man United,Liverpool,1.0,1.0,D,1.0,0.0,H,,,M Atkinson,,,,,16:30,


In [8]:
home_team = 'Man United'
away_team = 'Liverpool'

def get_score_results(home_team, away_team):
    home_mean = h2h.loc[(home_team, away_team)][0]
    away_mean = h2h.loc[(home_team, away_team)][1]
    
    home_score = poisson.rvs(home_mean, size=1)[0]
    away_score = poisson.rvs(away_mean, size=1)[0]

    return (home_score, away_score)

sims = {}
trials = 100
for i in range(trials):
    score = get_score_results(home_team, away_team)
    sims[score] = sims.get(score, 0) + 1

hist = []
for k, v in sims.items():
    p = v / trials
    hist.append((v, k, p))
    
hist.sort(reverse=True)
hist

[(13, (1, 1), 0.13),
 (12, (2, 0), 0.12),
 (12, (1, 0), 0.12),
 (11, (3, 1), 0.11),
 (8, (2, 2), 0.08),
 (6, (2, 1), 0.06),
 (5, (0, 2), 0.05),
 (4, (3, 2), 0.04),
 (4, (3, 0), 0.04),
 (4, (0, 0), 0.04),
 (3, (2, 3), 0.03),
 (3, (1, 2), 0.03),
 (2, (4, 2), 0.02),
 (2, (1, 3), 0.02),
 (2, (0, 3), 0.02),
 (2, (0, 1), 0.02),
 (1, (6, 3), 0.01),
 (1, (5, 2), 0.01),
 (1, (5, 1), 0.01),
 (1, (4, 0), 0.01),
 (1, (3, 3), 0.01),
 (1, (2, 4), 0.01),
 (1, (0, 4), 0.01)]

In [9]:
home_team = 'Man United'
away_team = 'Liverpool'
trials = 10000

def get_score_results(home_team, away_team):
    try:
        home_mean = h2h.loc[(home_team, away_team)][0]
        away_mean = h2h.loc[(home_team, away_team)][1]

        home_scores = poisson.rvs(home_mean, size=trials).astype(str)
        away_scores = poisson.rvs(away_mean, size=trials).astype(str)

        scores = pd.DataFrame(data={'home':home_scores, 'away':away_scores})
        scores['result'] = scores['home'] + '-' + scores['away']
        predictions = scores['result'].value_counts()
        probability = round(predictions / trials * 100, 1)
        
        return predictions.index[0], probability[0]

    except KeyError:       
        return 'N/A', 'N/A'
    
get_score_results(home_team, away_team)

('1-1', 11.0)

In [14]:
home_teams = ['Arsenal', 'Aston Villa', 'West Ham', 'Wolves', 'Leicester', 'Liverpool', 'Leeds', 'Man City', 'Fulham', 'Sheffield United']
away_teams = ['Brighton', 'Chelsea', 'Southampton', 'Man United', 'Tottenham', 'Crystal Palace', 'West Brom', 'Everton', 'Newcastle', 'Burnley']

weekday = pd.DataFrame(data={'Home':home_teams, 'Away':away_teams})
weekday['Predictions'] = weekday.apply(lambda x: get_score_results(x.Home, x.Away)[0], axis=1)
weekday['Probability of predictions%'] = weekday.apply(lambda x: get_score_results(x.Home, x.Away)[1], axis=1)
weekday

Unnamed: 0,Home,Away,Predictions,Probability of predictions%
0,Arsenal,Brighton,1-1,13.4
1,Aston Villa,Chelsea,0-2,11.8
2,West Ham,Southampton,2-1,9.3
3,Wolves,Man United,1-1,9.9
4,Leicester,Tottenham,1-2,9.6
5,Liverpool,Crystal Palace,2-1,8.6
6,Leeds,West Brom,,
7,Man City,Everton,1-0,15.3
8,Fulham,Newcastle,1-1,10.0
9,Sheffield United,Burnley,2-0,23.1
