In [2]:
import os
import numpy as np
import pandas as pd

In [3]:
match_info = ["Season",
              "League",
              "Date",
              "HomeTeam",
              "AwayTeam"]
match_statistics = ["FTHG",
                    "FTAG",
                    "FTR",
                    "HTHG",
                    "HTAG",
                    "HTR",
                    "Attendance",
                    "Referee",
                    "HS",
                    "AS",
                    "HST",
                    "AST",
                    "HHW",
                    "AHW",
                    "HC",
                    "AC",
                    "HF",
                    "AF",
                    "HO",
                    "AO",
                    "HY",
                    "AY",
                    "HR",
                    "AR",
                    "HBP",
                    "ABP"]

In [4]:
def get_measures():
    if os.path.isfile("../data/processed/measures.csv"):
        measures = pd.read_csv("../data/processed/measures.csv")
        measures.Date = pd.to_datetime(measures.Date)
        return measures
    else:
        measures = pd.read_csv("../data/processed/historical_scores.csv", index_col=0)
        measures = measures[match_info]
        measures.Date = pd.to_datetime(measures.Date)
        measures = measures.reset_index()
        measures.to_csv("../data/processed/measures.csv")
        return measures

measures = get_measures()
raw_data = pd.read_csv("../data/processed/historical_scores.csv", index_col=0)
raw_data = raw_data[match_info + match_statistics]
raw_data.Date = pd.to_datetime(raw_data.Date)
raw_data = raw_data.reset_index()

In [7]:
idx = 551
df = raw_data
date, home_team, away_team = df.loc[idx, ["Date", "HomeTeam", "AwayTeam"]]

row_filter = df.Date < date
row_filter = row_filter & ((df.HomeTeam.isin([home_team, away_team])) |
                           df.AwayTeam.isin([home_team, away_team]))

print(date, home_team, away_team)
temp_df = df[row_filter]

1994-08-05 00:00:00 Wolves Leicester


Unnamed: 0,index,Season,League,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,...,HF,AF,HO,AO,HY,AY,HR,AR,HBP,ABP
5,5,9394,2,1993-08-14,Leicester,Peterboro,2,1,H,,...,,,,,,,,,,
10,10,9394,2,1993-08-14,Wolves,Bristol City,3,1,H,,...,,,,,,,,,,
22,22,9394,2,1993-08-21,Tranmere,Leicester,1,0,H,,...,,,,,,,,,,
25,25,9394,2,1993-08-22,Birmingham,Wolves,2,2,D,,...,,,,,,,,,,
31,31,9394,2,1993-08-25,Wolves,Millwall,2,0,H,,...,,,,,,,,,,
37,37,9394,2,1993-08-28,Leicester,Millwall,4,0,H,,...,,,,,,,,,,
43,43,9394,2,1993-08-28,Wolves,Middlesbrough,2,3,A,,...,,,,,,,,,,
54,54,9394,2,1993-05-09,West Brom,Wolves,3,2,H,,...,,,,,,,,,,
55,55,9394,2,1993-07-09,Watford,Wolves,1,0,H,,...,,,,,,,,,,
65,65,9394,2,1993-11-09,Wolves,Portsmouth,1,1,D,,...,,,,,,,,,,


In [52]:
def home_win_percentage(df, n_games):
    temp_df = df[(df.HomeTeam == home_team) | (df.AwayTeam == home_team)]
    temp_df = temp_df.sort_values("Date", ascending=False).head(n_games)[["HomeTeam", "AwayTeam", "FTR"]]
    if len(temp_df) < n_games:
        return "NaN"
    wins = ((temp_df.HomeTeam == home_team) & (temp_df.FTR == "H")) |\
           ((temp_df.AwayTeam == home_team) & (temp_df.FTR == "A"))
    win_percent = wins.sum() / n_games
    print(temp_df)
    return win_percent #round(win_percent, 2)

In [53]:
home_win_percentage(temp_df, 15)

            HomeTeam    AwayTeam FTR
33101         Wolves       Luton   A
378    Middlesbrough      Wolves   H
334            Stoke      Wolves   D
521         Barnsley      Wolves   H
512           Wolves       Luton   H
501         Millwall      Wolves   H
491     Notts County      Wolves   A
33144       Millwall      Wolves   H
461           Oxford      Wolves   H
434           Bolton      Wolves   A
425         Charlton      Wolves   A
417           Wolves     Grimsby   D
402       Portsmouth      Wolves   H
32967     Sunderland      Wolves   D
538           Wolves  Sunderland   D


0.26666666666666666

In [54]:
raw_data.tail()

Unnamed: 0,index,Season,League,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,...,HF,AF,HO,AO,HY,AY,HR,AR,HBP,ABP
46807,547,1516,3,2016-08-05,Peterboro,Blackpool,5,1,H,0,...,16,7,,,3,1,0,0,,
46808,548,1516,3,2016-08-05,Port Vale,Walsall,0,5,A,0,...,7,5,,,2,1,0,0,,
46809,549,1516,3,2016-08-05,Sheffield United,Scunthorpe,0,2,A,0,...,11,6,,,1,0,0,0,,
46810,550,1516,3,2016-08-05,Swindon,Shrewsbury,3,0,H,1,...,7,16,,,0,3,0,0,,
46811,551,1516,3,2016-08-05,Wigan,Barnsley,1,4,A,1,...,11,6,,,1,1,1,0,,
