In [1]:
import pandas as pd 
from sklearn.externals import joblib
import numpy as np

In [2]:
df_week = pd.read_csv("../data/16-17/week4.csv")

In [3]:
def get_teams(df):
    
    teams = []
    
    for i,cal in enumerate(df.iterrows()):
        teams.append(cal[1]["HomeTeam"])
        teams.append(cal[1]["AwayTeam"])

        if i == 9: 
            return teams


In [4]:
teams_1617 = get_teams(df_week)

print teams_1617

['Juventus', 'Fiorentina', 'Roma', 'Udinese', 'Atalanta', 'Lazio', 'Bologna', 'Crotone', 'Chievo', 'Inter', 'Empoli', 'Sampdoria', 'Genoa', 'Cagliari', 'Milan', 'Torino', 'Palermo', 'Sassuolo', 'Pescara', 'Napoli']


In [5]:
def build_features(df, teams):

    team_features = []
    
    for team in teams:
        #print team_a
        team_home = df[df['HomeTeam']==team]
        team_away = df[df['AwayTeam']==team]
        
        #shots made
        team_s    = team_away["AS"].sum()  + team_home["HS"].sum()
        #shots on-target made
        team_st   = team_away["AST"].sum() + team_home["HST"].sum()
        #shots conceded
        team_sc    = team_away["HS"].sum()  + team_home["AS"].sum()
        #shots on-target conceded
        team_stc   = team_away["HST"].sum() + team_home["AST"].sum()
        #corners awarded
        team_c    = team_away["AC"].sum()  + team_home["HC"].sum()
        #corners conceded
        team_cc    = team_away["HC"].sum()  + team_home["AC"].sum()

        team_features.append([team_s,team_sc,team_st,team_stc,team_c,team_cc])

    return team_features

In [6]:
def build_target(df, teams):

    team_target = []
    
    for team in teams:
        #print team_a
        t      = df[(df['HomeTeam']==team) | (df['AwayTeam']==team)]
        team_home = df[df['HomeTeam']==team]
        team_away = df[df['AwayTeam']==team]

        team_h_win = len(team_home[team_home['FTHG']>team_home['FTAG']])
        team_a_win = len(team_away[team_away['FTAG']>team_away['FTHG']])
        team_draw = len(t[t['FTAG']==t['FTHG']])

        team_points = 3*team_a_win + 3*team_h_win + team_draw
        team_target.append(team_points)

    return team_target


In [7]:
targ_1617 = build_target(df_week, teams_1617)
feat_1617 = build_features(df_week, teams_1617)

In [8]:
model_1 = joblib.load('../data/linreg_model.pkl')

pred_1617 = model_1.predict(feat_1617)

In [9]:
#print pred_1617
#print teams_1617
ranking  = []
realrank = []

for t,p in zip(teams_1617,pred_1617):
    ranking.append((t, p))
    
for t,p in zip(teams_1617,targ_1617):
    realrank.append((t, p))
    

In [25]:
ranking.sort(key=lambda x: x[1],reverse=True)
for t,p in ranking:
    print t,'{:.1f}'.format(p)

Napoli 9.3
Roma 9.3
Torino 8.4
Juventus 7.6
Inter 7.4
Genoa 6.2
Sassuolo 5.7
Lazio 5.7
Bologna 5.5
Sampdoria 5.3
Chievo 5.3
Cagliari 4.8
Milan 4.6
Pescara 4.4
Atalanta 4.3
Empoli 4.1
Udinese 3.5
Palermo 2.9
Fiorentina 2.8
Crotone 2.2


In [17]:
print targ_1617
print model_1.predict(feat_1617)
#Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % model_1.score(feat_1617, targ_1617))
print("Residual sum of squares: %.2f"
      % np.mean((model_1.predict(feat_1617) - targ_1617) ** 2))

[9, 3, 7, 6, 3, 4, 6, 0, 4, 4, 3, 6, 6, 1, 3, 3, 1, 6, 1, 7]
[ 6.6162023   1.78152154  8.00756549  2.68493654  4.15749324  2.69441426
  4.99625778  1.45034291  3.20803299  5.69114916  3.76505933  4.15569381
  4.64998251  2.90550728  3.50982837  6.73109731  1.25624688  4.35953464
  3.86378384  6.58277064]
Variance score: 0.42
Residual sum of squares: 3.18


In [12]:
model_1 = joblib.load('../data/regtree_model.pkl')

pred_1617 = model_1.predict(feat_1617)

In [26]:
print pred_1617
print targ_1617

[ 7.5620905   2.80125714  9.25664294  3.50680177  4.26056926  5.6904567
  5.45391209  2.21995481  5.31535688  7.3547763   4.11077132  5.32366018
  6.21354512  4.84450966  4.5698418   8.40492037  2.87155497  5.74984379
  4.35735024  9.34533022]
[9, 6, 7, 6, 3, 7, 6, 1, 7, 7, 4, 6, 6, 4, 6, 4, 2, 9, 1, 10]
