In [1]:
import pandas as pd

In [2]:
class fixtures():
    
    def __init__(self,filename):
        self.df = pd.DataFrame()
        self.load(filename)
        
    def load(self, filename):
        self.df = pd.read_csv(filename)
        self.df['FixtureDateAsDate']=pd.to_datetime(self.df['fixturedate'])
        self.df=self.df.sort_values(by='FixtureDateAsDate')
        
        return self
    
    def clean_fixtures(self):
        self.df.FTHG.fillna(0, inplace=True)
        self.df.FTAG.fillna(0, inplace=True)
        return self
    
    def add_features(self, nlist):
        grouped = self.df.groupby('HomeTeamID')

        for n in nlist:
            print ('***', n)
            rollmean_H = grouped['FTHG'].apply(pd.rolling_mean, n, min_periods=n)
            self.df['FTHG_roll_avg_'+str(n)]=(rollmean_H*(n+1) - self.df['FTHG'])/n

            grouped = self.df.groupby('AwayTeamID')
            rollmean_H = grouped['FTAG'].apply(pd.rolling_mean, n+1, min_periods=n+1)
            self.df['FTAG_roll_avg_'+str(n)]=(rollmean_H*(n+1) - self.df['FTAG'])/n

            self.df['FTHG_roll_avg_diff_'+str(n)]= self.df['FTHG_roll_avg_'+str(n)] - self.df['FTAG_roll_avg_'+str(n)]

        self.df['PointsDiff'] = self.df['HTPointsCum'] - self.df['ATPointsCum']
        
        return self

    def prep_classification(self, nlist, cleanna):

        cols=['HTPointsCum', 'ATPointsCum', 'PointsDiff']
        for colname in [ 'FTHG_roll_avg', 'FTAG_roll_avg', 'FTHG_roll_avg_diff']:
            cols.extend([colname+'_'+str(n) for n in nlist])
        if cleanna :
            self.df.dropna(axis=0,inplace=True)
        X=self.df[ cols ]
        y = self.df['ThisResult']


        return X, y
    
    def fixture_filter(self, start_date, end_date=None):
        if start_date:
            self.df=self.df[self.df.FixtureDateAsDate >= start_date]
        if end_date:
            self.df=self.df[self.df.FixtureDateAsDate <= end_date]
        
        return self


In [3]:
myfix=fixtures('test_football1.csv').fixture_filter(None,'2016-01-01')

In [4]:
myfix = myfix.clean_fixtures().add_features([3,4,5])
X,y=myfix.prep_classification([3,4,5],True)

*** 3
*** 4
*** 5


In [5]:
from sklearn.cross_validation import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

In [6]:
    clf =  RandomForestClassifier(max_depth=10, n_estimators=40, min_samples_leaf=10)

    X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, random_state=0)
    y_pred = clf.fit(X_train, y_train).predict(X_test)
    print(accuracy_score(y_test, y_pred))

    clf

0.601699279094


RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=10, max_features='auto', max_leaf_nodes=None,
            min_samples_leaf=10, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=40, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [7]:
pred_fix=fixtures('test_football1.csv')

In [8]:
pred_fix = pred_fix.clean_fixtures().add_features([3,4,5])
pred_fix = pred_fix.fixture_filter('2017-01-21','2017-01-21')
X_pred,y=pred_fix.prep_classification([3,4,5],False)

*** 3
*** 4
*** 5


In [9]:
pred_fix.df['Predicted']=clf.predict(X_pred)
pred_fix.df[['fixturedate', 'HomeTeam', 'AwayTeam', 'FTHG', 'FTAG', 'Predicted']].head()

Unnamed: 0,fixturedate,HomeTeam,AwayTeam,FTHG,FTAG,Predicted
198,2017-01-21,Grimsby,Notts County,2,0,W
212,2017-01-21,Walsall,Bristol Rvs,3,1,L
211,2017-01-21,Man City,Tottenham,2,2,W
210,2017-01-21,Milton Keynes Dons,Northampton,5,3,L
209,2017-01-21,Norwich,Wolves,3,1,W


In [11]:
pred_fix.df[pred_fix.df.fixturedate=='2017-01-21']

Unnamed: 0,dummy,LeagueAlternateCode,seasonID,fixturedate,HomeTeam,AwayTeam,HomeTeamID,AwayTeamID,FTHG,FTAG,...,FTAG_roll_avg_3,FTHG_roll_avg_diff_3,FTHG_roll_avg_4,FTAG_roll_avg_4,FTHG_roll_avg_diff_4,FTHG_roll_avg_5,FTAG_roll_avg_5,FTHG_roll_avg_diff_5,PointsDiff,Predicted
198,x,SKY BET LEAGUE TWO,2016,2017-01-21,Grimsby,Notts County,52,80,2,0,...,0.666667,-0.444444,3.25,1.0,2.25,3.2,0.8,2.4,3,W
212,x,SKY BET LEAGUE ONE,2016,2017-01-21,Walsall,Bristol Rvs,108,21,3,1,...,0.666667,1.444444,3.0,1.0,2.0,3.0,0.8,2.2,-3,L
211,x,PREMIER LEAGUE,2016,2017-01-21,Man City,Tottenham,68,106,2,2,...,2.666667,-0.666667,1.0625,2.25,-1.1875,1.28,2.0,-0.72,0,W
210,x,SKY BET LEAGUE ONE,2016,2017-01-21,Milton Keynes Dons,Northampton,73,77,5,3,...,0.333333,1.555556,2.1875,1.0,1.1875,2.12,0.8,1.32,-4,L
209,x,SKY BET CHAMPIONSHIP,2016,2017-01-21,Norwich,Wolves,78,115,3,1,...,0.666667,1.444444,1.125,0.5,0.625,1.08,0.6,0.48,5,W
208,x,SKY BET LEAGUE ONE,2016,2017-01-21,Charlton,Scunthorpe,29,92,0,0,...,1.0,0.777778,0.9375,1.25,-0.3125,1.44,1.4,0.04,-1,L
207,x,PREMIER LEAGUE,2016,2017-01-21,West Brom,Sunderland,111,102,2,0,...,0.666667,0.888889,3.25,0.5,2.75,2.96,0.8,2.16,11,W
206,x,SKY BET LEAGUE ONE,2016,2017-01-21,Swindon,Peterboro,104,83,0,1,...,1.333333,-0.888889,1.875,1.0,0.875,1.44,1.0,0.44,-4,L
205,x,SKY BET LEAGUE TWO,2016,2017-01-21,Doncaster,Crewe,42,37,3,1,...,0.333333,1.777778,1.75,0.25,1.5,2.28,0.2,2.08,4,W
204,x,SKY BET LEAGUE ONE,2016,2017-01-21,Shrewsbury,Oldham,95,81,1,0,...,0.333333,0.222222,1.3125,0.75,0.5625,1.48,0.6,0.88,-3,L
