In [1]:
import pandas as pd 
from sklearn.externals import joblib
import numpy as np

In [2]:
df_week = pd.read_csv("http://www.football-data.co.uk/mmz4281/1617/I1.csv")

In [3]:
df_week[(df_week.HomeTeam=="Inter") | (df_week.AwayTeam=="Inter")].shape[0]

37

In [4]:
def get_teams(df):
    
    teams = []
    
    for i,cal in enumerate(df.iterrows()):
        teams.append(cal[1]["HomeTeam"])
        teams.append(cal[1]["AwayTeam"])

        if i == 9: 
            return teams


In [5]:
def build_features(df, teams):

    team_features = []
    
    for team in teams:
        #print team_a
        team_home = df[df['HomeTeam']==team]
        team_away = df[df['AwayTeam']==team]
        
        #shots made
        team_s    = team_away["AS"].sum()  + team_home["HS"].sum()
        #shots on-target made
        team_st   = team_away["AST"].sum() + team_home["HST"].sum()
        #shots conceded
        team_sc    = team_away["HS"].sum()  + team_home["AS"].sum()
        #shots on-target conceded
        team_stc   = team_away["HST"].sum() + team_home["AST"].sum()
        #corners awarded
        team_c    = team_away["AC"].sum()  + team_home["HC"].sum()
        #corners conceded
        team_cc    = team_away["HC"].sum()  + team_home["AC"].sum()

        team_features.append([team_s,team_sc,team_st,team_stc,team_c,team_cc])

    return team_features

In [6]:
def build_target(df, teams):

    team_target = []
    
    for team in teams:
        #print team_a
        t      = df[(df['HomeTeam']==team) | (df['AwayTeam']==team)]
        team_home = df[df['HomeTeam']==team]
        team_away = df[df['AwayTeam']==team]

        team_h_win = len(team_home[team_home['FTHG']>team_home['FTAG']])
        team_a_win = len(team_away[team_away['FTAG']>team_away['FTHG']])
        team_draw = len(t[t['FTAG']==t['FTHG']])

        team_points = 3*team_a_win + 3*team_h_win + team_draw
        team_target.append(team_points)

    return team_target


In [7]:
teams_1617 = get_teams(df_week)


In [8]:
targ_1617 = build_target(df_week, teams_1617)
feat_1617 = build_features(df_week, teams_1617)

In [11]:
df_en = pd.read_csv("http://www.football-data.co.uk/mmz4281/1617/E0.csv")
df_it = pd.read_csv("http://www.football-data.co.uk/mmz4281/1617/I1.csv")
df_de = pd.read_csv("http://www.football-data.co.uk/mmz4281/1617/D1.csv")
df_es = pd.read_csv("http://www.football-data.co.uk/mmz4281/1617/L1.csv")


([[566, 341, 214, 106, 212, 127],
  [585, 443, 218, 164, 228, 161],
  [652, 455, 227, 150, 253, 205],
  [417, 455, 151, 151, 172, 216],
  [521, 394, 195, 147, 223, 173],
  [575, 405, 208, 158, 232, 143],
  [414, 511, 143, 181, 153, 225],
  [388, 563, 125, 213, 153, 248],
  [401, 564, 126, 185, 142, 246],
  [613, 390, 202, 157, 300, 154],
  [345, 594, 123, 208, 164, 196],
  [489, 479, 165, 172, 190, 207],
  [430, 454, 143, 165, 190, 171],
  [404, 630, 165, 205, 138, 243],
  [537, 506, 184, 179, 236, 222],
  [557, 523, 175, 169, 214, 205],
  [371, 530, 113, 202, 162, 223],
  [420, 576, 164, 191, 210, 279],
  [427, 576, 140, 209, 173, 217],
  [648, 371, 255, 124, 256, 140]],
 ['Juventus',
  'Fiorentina',
  'Roma',
  'Udinese',
  'Atalanta',
  'Lazio',
  'Bologna',
  'Crotone',
  'Chievo',
  'Inter',
  'Empoli',
  'Sampdoria',
  'Genoa',
  'Cagliari',
  'Milan',
  'Torino',
  'Palermo',
  'Sassuolo',
  'Pescara',
  'Napoli'])

In [12]:
model_1 = joblib.load('../data/linreg_model.pkl')
pred_1617 = model_1.predict(feat_1617)

In [13]:
ranking  = []
realrank = []

for t,p,tg in zip(teams_1617,pred_1617,targ_1617):
    ranking.append((t, p, tg))
    
for t,p in zip(teams_1617,targ_1617):
    realrank.append((t, p))


In [17]:
print('Variance score: %.2f' % model_1.score(feat_1617, targ_1617))
print("Residual sum of squares: %.2f"
      % np.mean((model_1.predict(feat_1617) - targ_1617) ** 2))

Variance score: 0.63
Residual sum of squares: 26.33


In [23]:
print targ_1617
print model_1.predict(feat_1617)

[36, 23, 32, 18, 28, 28, 16, 6, 19, 21, 10, 22, 20, 20, 32, 25, 6, 17, 8, 28]
[ 29.37067023  24.38342338  29.68291943  19.47851403  21.08370007
  24.05765331  14.88181823  11.55074775  16.24164547  27.59179919
  12.91220621  23.49038088  18.46895131  15.03955121  20.60065717
  26.18245254  11.79521552  18.84039917  18.76509135  30.20139132]
