In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

%matplotlib inline

In [2]:
filename = 'datasets/I1-2019.csv'
df = pd.read_csv(filename)

In [3]:
N_PREV_GAMES = 6

In [4]:
df.columns

Index(['Div', 'Date', 'HomeTeam', 'AwayTeam', 'FTHG', 'FTAG', 'FTR', 'HTHG',
       'HTAG', 'HTR', 'HS', 'AS', 'HST', 'AST', 'HF', 'AF', 'HC', 'AC', 'HY',
       'AY', 'HR', 'AR', 'B365H', 'B365D', 'B365A', 'BWH', 'BWD', 'BWA', 'IWH',
       'IWD', 'IWA', 'PSH', 'PSD', 'PSA', 'WHH', 'WHD', 'WHA', 'VCH', 'VCD',
       'VCA', 'Bb1X2', 'BbMxH', 'BbAvH', 'BbMxD', 'BbAvD', 'BbMxA', 'BbAvA',
       'BbOU', 'BbMx>2.5', 'BbAv>2.5', 'BbMx<2.5', 'BbAv<2.5', 'BbAH', 'BbAHh',
       'BbMxAHH', 'BbAvAHH', 'BbMxAHA', 'BbAvAHA', 'PSCH', 'PSCD', 'PSCA'],
      dtype='object')

In [11]:
teams = df.HomeTeam.unique().tolist()
print(teams)

['Chievo', 'Lazio', 'Bologna', 'Empoli', 'Parma', 'Sassuolo', 'Torino', 'Atalanta', 'Juventus', 'Napoli', 'Cagliari', 'Fiorentina', 'Frosinone', 'Genoa', 'Inter', 'Spal', 'Udinese', 'Roma', 'Milan', 'Sampdoria']


In [268]:
stats = df[['Date','HomeTeam', 'AwayTeam', 'FTHG', 'FTAG', 'FTR']].copy()

In [169]:
def scored(row, team):
    if row['HomeTeam'] == team:
        return row.FTHG
    if row['AwayTeam'] == team:
        return row.FTAG
    
def conceded(row, team):
    if row['HomeTeam'] == team:
        return row.FTAG
    if row['AwayTeam'] == team:
        return row.FTHG

In [181]:
all_team_stats = {}

In [231]:
for team in teams:
    team_stats = stats[(stats.HomeTeam == team)|(stats.AwayTeam == team)].copy()

    team_stats.loc[:,'scored'] = team_stats.apply(scored, team=team, axis=1)
    team_stats.loc[:,'conceded'] = team_stats.apply(conceded, team=team, axis=1)

    team_stats.loc[:,'rolling_scored'] = team_stats['scored'].rolling(window=N_PREV_GAMES).sum()
    team_stats.loc[:,'rolling_conceded'] = team_stats['conceded'].rolling(window=N_PREV_GAMES).sum()

    team_stats['rating'] = team_stats['rolling_scored'] - team_stats['rolling_conceded']
    team_stats['Date'] = pd.to_datetime(team_stats['Date'], infer_datetime_format=True)
    team_stats.rating = team_stats.rating.shift(1)

    all_team_stats[team] = team_stats

In [269]:
stats['id'] = stats.index

In [271]:
def calc_match_rating(row):
    h = row['HomeTeam']
    a = row['AwayTeam']
    i = row.id
    return all_team_stats[stats.loc[i].HomeTeam].loc[i].rating -\
            all_team_stats[stats.loc[i].AwayTeam].loc[i].rating

In [273]:
stats['match_rating'] = stats.apply(calc_match_rating, axis=1)

In [290]:
stats.to_csv('')

Unnamed: 0,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,id,match_rating
50,26/09/2018,Atalanta,Torino,0,0,D,50,
51,26/09/2018,Cagliari,Sampdoria,0,0,D,51,
52,26/09/2018,Genoa,Chievo,2,0,H,52,
53,26/09/2018,Juventus,Bologna,2,0,H,53,
54,26/09/2018,Napoli,Parma,3,0,H,54,
55,26/09/2018,Roma,Frosinone,4,0,H,55,
56,26/09/2018,Udinese,Lazio,1,2,A,56,
57,27/09/2018,Empoli,Milan,1,1,D,57,
58,27/09/2018,Spal,Sassuolo,0,2,A,58,
59,29/09/2018,Inter,Cagliari,2,0,H,59,6.0


In [272]:
i = 60  # Juventus - Napoli
print(all_team_stats[stats.loc[i].HomeTeam].loc[i].rating)
print(all_team_stats[stats.loc[i].AwayTeam].loc[i].rating)

9.0
5.0


In [278]:
i = 61  # Roma - Lazio
print(all_team_stats[stats.loc[i].HomeTeam].loc[i].rating)
print(all_team_stats[stats.loc[i].AwayTeam].loc[i].rating)

2.0
3.0


In [120]:
def scored(row, team):
    if row['HomeTeam'] == team:
        return row.FTHG
    if row['AwayTeam'] == team:
        return row.FTAG
    
def conceded(row, team):
    if row['HomeTeam'] == team:
        return row.FTAG
    if row['AwayTeam'] == team:
        return row.FTHG

def get_last_scored(df, team):
    tmpdf = df[(df.HomeTeam == team)|(df.AwayTeam == team)].copy()
    tmpdf['scored'] = tmpdf.apply(scored, team=team, axis=1)
    scored6 = tmpdf.scored.sum()
    return scored6

print(get_last_scored(stats.iloc[0:60,:], 'Juventus'))

def get_last_conceded(df, team):
    tmpdf = df[(df.HomeTeam == team)|(df.AwayTeam == team)].copy()
    tmpdf['conceded'] = tmpdf.apply(conceded, team=team, axis=1)
    conceded6 = tmpdf.conceded.sum()
    return conceded6

print(get_last_conceded(stats.iloc[0:60,:], 'Juventus'))

13
4


In [137]:
for i in range(60,stats.shape[0]):
    start = i - 10*N_PREV_GAMES # 0
    end = start + 10*N_PREV_GAMES # 60
    
    # HomeRating
    stats.iloc[i,6] = get_last_scored(stats.iloc[start:end,:], stats.iloc[end].HomeTeam) - \
                        get_last_conceded(stats.iloc[start:end,:], stats.iloc[end].HomeTeam)
    # AwayRating
    stats.iloc[i,7] = get_last_scored(stats.iloc[start:end,:], stats.iloc[end].AwayTeam) - \
                        get_last_conceded(stats.iloc[start:end,:], stats.iloc[end].AwayTeam)

In [140]:
stats[59:].head()

Unnamed: 0,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HomeRating,AwayRating
59,29/09/2018,Inter,Cagliari,2,0,H,0,0
60,29/09/2018,Juventus,Napoli,3,1,H,9,5
61,29/09/2018,Roma,Lazio,3,1,H,2,3
62,30/09/2018,Bologna,Udinese,2,1,H,-5,1
63,30/09/2018,Chievo,Torino,0,1,A,-9,-2


In [141]:
get_last_scored(stats.iloc[0:60,:], 'Napoli')

12

In [142]:
get_last_conceded(stats.iloc[0:60,:], 'Napoli')

7