In [1]:
import pandas as pd 
from sklearn.externals import joblib
import numpy as np


In [2]:
df_week = pd.read_csv("../data/EPL-1516.csv")

In [3]:
def get_teams(df):
    
    teams = []
    
    for i,cal in enumerate(df.iterrows()):
        teams.append(cal[1]["HomeTeam"])
        teams.append(cal[1]["AwayTeam"])

        if i == 9: 
            return teams


In [4]:
teams_1617 = get_teams(df_week)

print teams_1617

['Bournemouth', 'Aston Villa', 'Chelsea', 'Swansea', 'Everton', 'Watford', 'Leicester', 'Sunderland', 'Man United', 'Tottenham', 'Norwich', 'Crystal Palace', 'Arsenal', 'West Ham', 'Newcastle', 'Southampton', 'Stoke', 'Liverpool', 'West Brom', 'Man City']


In [5]:
def build_features(df, teams):

    team_features = []
    
    for team in teams:
        #print team_a
        team_home = df[df['HomeTeam']==team]
        team_away = df[df['AwayTeam']==team]
        
        #shots made
        team_s    = team_away["AS"].sum()  + team_home["HS"].sum()
        #shots on-target made
        team_st   = team_away["AST"].sum() + team_home["HST"].sum()
        #shots conceded
        team_sc    = team_away["HS"].sum()  + team_home["AS"].sum()
        #shots on-target conceded
        team_stc   = team_away["HST"].sum() + team_home["AST"].sum()
        #corners awarded
        team_c    = team_away["AC"].sum()  + team_home["HC"].sum()
        #corners conceded
        team_cc    = team_away["HC"].sum()  + team_home["AC"].sum()

        team_features.append([team_s,team_sc,team_st,team_stc,team_c,team_cc])

    return team_features

In [6]:
def build_target(df, teams):

    team_target = []
    
    for team in teams:
        #print team_a
        t      = df[(df['HomeTeam']==team) | (df['AwayTeam']==team)]
        team_home = df[df['HomeTeam']==team]
        team_away = df[df['AwayTeam']==team]

        team_h_win = len(team_home[team_home['FTHG']>team_home['FTAG']])
        team_a_win = len(team_away[team_away['FTAG']>team_away['FTHG']])
        team_draw = len(t[t['FTAG']==t['FTHG']])

        team_points = 3*team_a_win + 3*team_h_win + team_draw
        team_target.append(team_points)

    return team_target


In [7]:
targ_1617 = build_target(df_week, teams_1617)
feat_1617 = build_features(df_week, teams_1617)

In [8]:
model_1 = joblib.load('../data/linreg_model_en.pkl')

pred_1617 = model_1.predict(feat_1617)

In [9]:
#print pred_1617
#print teams_1617
ranking = []

for t,p in zip(teams_1617,pred_1617):
    ranking.append((t, p)) 

In [10]:
ranking.sort(key=lambda x: x[1],reverse=True)
for t,p in ranking:
    print t,p

Tottenham 77.9123213896
Liverpool 69.9612241114
Man City 69.2831130489
Arsenal 63.7431648178
West Ham 59.0005072499
Leicester 56.824835179
Southampton 55.5951869544
Chelsea 53.9859846505
Everton 50.3835580957
Bournemouth 47.1552814439
Crystal Palace 46.7905483811
Man United 45.7947037135
Swansea 43.7926515561
Watford 43.2684191109
Sunderland 40.6809212594
Stoke 40.0168795755
Norwich 39.7659020037
Newcastle 39.6181509439
West Brom 35.6037880269
Aston Villa 32.2549708922


In [11]:
print targ_1617
#Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % model_1.score(feat_1617, targ_1617))
print("Residual sum of squares: %.2f"
      % np.mean((model_1.predict(feat_1617) - targ_1617) ** 2))

[42, 17, 50, 47, 47, 45, 81, 39, 66, 70, 34, 42, 71, 62, 37, 63, 51, 60, 43, 66]
Variance score: 0.60
Residual sum of squares: 91.15


In [12]:
model_1 = joblib.load('../data/regtree_model_en.pkl')

pred_1617 = model_1.predict(feat_1617)

In [13]:
#print pred_1617
#print teams_1617
ranking = []

for t,p,f in zip(teams_1617,pred_1617, targ_1617):
    ranking.append((t, p, f)) 

In [14]:
ranking.sort(key=lambda x: x[1],reverse=True)
for t,p,f in ranking:
    print t,p,f

Tottenham 75.3571428571 70
Arsenal 75.3571428571 71
Liverpool 75.3571428571 60
Man City 75.3571428571 66
Chelsea 66.75 50
West Ham 66.75 62
Southampton 66.75 63
Leicester 59.3333333333 81
Swansea 50.0 47
Watford 50.0 45
Sunderland 50.0 39
Newcastle 50.0 37
Bournemouth 43.8571428571 42
Everton 43.8571428571 47
Man United 43.8571428571 66
Crystal Palace 43.8571428571 42
Aston Villa 37.0 17
Norwich 37.0 34
Stoke 37.0 51
West Brom 37.0 43


In [15]:
print targ_1617
#Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % model_1.score(feat_1617, targ_1617))
print("Residual sum of squares: %.2f"
      % np.mean((model_1.predict(feat_1617) - targ_1617) ** 2))

[42, 17, 50, 47, 47, 45, 81, 39, 66, 70, 34, 42, 71, 62, 37, 63, 51, 60, 43, 66]
Variance score: 0.42
Residual sum of squares: 131.49
