In [1]:
import pandas as pd 
from sklearn.externals import joblib
import numpy as np


In [2]:
df_week = pd.read_csv("../data/EPL-1516.csv")

In [3]:
def get_teams(df):
    
    teams = []
    
    for i,cal in enumerate(df.iterrows()):
        teams.append(cal[1]["HomeTeam"])
        teams.append(cal[1]["AwayTeam"])

        if i == 9: 
            return teams


In [4]:
teams_1617 = get_teams(df_week)

print teams_1617

['Bournemouth', 'Aston Villa', 'Chelsea', 'Swansea', 'Everton', 'Watford', 'Leicester', 'Sunderland', 'Man United', 'Tottenham', 'Norwich', 'Crystal Palace', 'Arsenal', 'West Ham', 'Newcastle', 'Southampton', 'Stoke', 'Liverpool', 'West Brom', 'Man City']


In [5]:
def build_features(df, teams):

    team_features = []
    
    for team in teams:
        #print team_a
        team_home = df[df['HomeTeam']==team]
        team_away = df[df['AwayTeam']==team]
        
        #shots made
        team_s    = team_away["AS"].sum()  + team_home["HS"].sum()
        #shots on-target made
        team_st   = team_away["AST"].sum() + team_home["HST"].sum()
        #shots conceded
        team_sc    = team_away["HS"].sum()  + team_home["AS"].sum()
        #shots on-target conceded
        team_stc   = team_away["HST"].sum() + team_home["AST"].sum()
        #corners awarded
        team_c    = team_away["AC"].sum()  + team_home["HC"].sum()
        #corners conceded
        team_cc    = team_away["HC"].sum()  + team_home["AC"].sum()

        team_features.append([team_s,team_sc,team_st,team_stc,team_c,team_cc])

    return team_features

In [6]:
def build_target(df, teams):

    team_target = []
    
    for team in teams:
        #print team_a
        t      = df[(df['HomeTeam']==team) | (df['AwayTeam']==team)]
        team_home = df[df['HomeTeam']==team]
        team_away = df[df['AwayTeam']==team]

        team_h_win = len(team_home[team_home['FTHG']>team_home['FTAG']])
        team_a_win = len(team_away[team_away['FTAG']>team_away['FTHG']])
        team_draw = len(t[t['FTAG']==t['FTHG']])

        team_points = 3*team_a_win + 3*team_h_win + team_draw
        team_target.append(team_points)

    return team_target


In [7]:
targ_1617 = build_target(df_week, teams_1617)
feat_1617 = build_features(df_week, teams_1617)

In [8]:
model_1 = joblib.load('../data/linreg_model_en.pkl')

pred_1617 = model_1.predict(feat_1617)

In [9]:
#print pred_1617
#print teams_1617
ranking = []

for t,p in zip(teams_1617,pred_1617):
    ranking.append((t, p)) 

In [10]:
ranking.sort(key=lambda x: x[1],reverse=True)
for t,p in ranking:
    print t,p

Tottenham 78.5832693539
Liverpool 68.8392146069
Man City 67.9356482457
Arsenal 63.9678376583
West Ham 59.1670232718
Leicester 58.7052750627
Southampton 56.0356134264
Chelsea 53.3699763978
Everton 51.3358196241
Crystal Palace 48.0548435309
Bournemouth 46.5020406134
Man United 45.940733038
Swansea 44.5297322704
Watford 43.8125510149
Newcastle 41.8693165841
Stoke 41.1619267146
Sunderland 40.9361584033
Norwich 39.8360064136
West Brom 36.8696108904
Aston Villa 32.3865398779


In [11]:
print targ_1617
#Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % model_1.score(feat_1617, targ_1617))
print("Residual sum of squares: %.2f"
      % np.mean((model_1.predict(feat_1617) - targ_1617) ** 2))

[42, 17, 50, 47, 47, 45, 81, 39, 66, 70, 34, 42, 71, 62, 37, 63, 51, 60, 43, 66]
Variance score: 0.63
Residual sum of squares: 84.41


In [12]:
model_1 = joblib.load('../data/regtree_model_en.pkl')

pred_1617 = model_1.predict(feat_1617)

In [16]:
#print pred_1617
#print teams_1617
ranking = []

for t,p,f in zip(teams_1617,pred_1617, targ_1617):
    ranking.append((t, p, f)) 

In [17]:
ranking.sort(key=lambda x: x[1],reverse=True)
for t,p,f in ranking:
    print t,p,f

Leicester 81.0 81
Tottenham 74.375 70
Arsenal 74.375 71
Man City 74.375 66
Man United 66.0 66
West Ham 63.8181818182 62
Southampton 63.8181818182 63
Liverpool 63.8181818182 60
Chelsea 50.0 50
Bournemouth 44.5641025641 42
Swansea 44.5641025641 47
Everton 44.5641025641 47
Watford 44.5641025641 45
Sunderland 44.5641025641 39
Crystal Palace 44.5641025641 42
Newcastle 44.5641025641 37
Stoke 44.5641025641 51
Norwich 38.0 34
West Brom 38.0 43
Aston Villa 17.0 17


In [15]:
print targ_1617
#Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % model_1.score(feat_1617, targ_1617))
print("Residual sum of squares: %.2f"
      % np.mean((model_1.predict(feat_1617) - targ_1617) ** 2))

[42, 17, 50, 47, 47, 45, 81, 39, 66, 70, 34, 42, 71, 62, 37, 63, 51, 60, 43, 66]
Variance score: 0.93
Residual sum of squares: 15.75
