In [1]:
import pandas as pd

In [32]:
def init_pl_teams():
    teams = []
    teamstring = """
     AFC Bournemouth
     Arsenal
     Burnley
     Chelsea
     Crystal Palace
     Everton
     Hull City
     Leicester City
     Liverpool
     Manchester City
     Manchester United
     Middlesbrough
     Southampton
     Stoke City
     Sunderland
     Swansea City
     Tottenham Hotspur
     Watford
     West Bromwich Albion
     West Ham United"""

    for team in teamstring.split('\n'):
        if len(team.strip()) > 1:
            teams.append(team.strip())

    return teams
pl_teams = init_pl_teams()

def is_pl_team(team):
    global pl_teams
    return team in pl_teams
        
pl_teams

['AFC Bournemouth',
 'Arsenal',
 'Burnley',
 'Chelsea',
 'Crystal Palace',
 'Everton',
 'Hull City',
 'Leicester City',
 'Liverpool',
 'Manchester City',
 'Manchester United',
 'Middlesbrough',
 'Southampton',
 'Stoke City',
 'Sunderland',
 'Swansea City',
 'Tottenham Hotspur',
 'Watford',
 'West Bromwich Albion',
 'West Ham United']

In [97]:
seasons = ['2016','2015','2014','2013','2012']
season_histories = {season:None for season in seasons}
for season in seasons:
    df = pd.read_csv('premier-league-' + season +'.csv')
    season_histories[season] = df
#season_histories

{'2012':              date                 home                      away  home_goals  \
 0     06 Aug 2011    Charlton Athletic           AFC Bournemouth           3   
 1     09 Aug 2011      AFC Bournemouth    Dagenham and Redbridge           5   
 2     13 Aug 2011      AFC Bournemouth       Sheffield Wednesday           2   
 3     16 Aug 2011      AFC Bournemouth                 Stevenage           1   
 4     20 Aug 2011      Carlisle United           AFC Bournemouth           2   
 5     23 Aug 2011      AFC Bournemouth      West Bromwich Albion           1   
 6     27 Aug 2011      AFC Bournemouth                   Walsall           0   
 7     30 Aug 2011      AFC Bournemouth           Hereford United           4   
 8     03 Sep 2011         Notts County           AFC Bournemouth           3   
 9     10 Sep 2011      AFC Bournemouth              Chesterfield           0   
 10    13 Sep 2011        Leyton Orient           AFC Bournemouth           1   
 11    17 Sep 2011  

# Baseline Predictor
The baseline predictor will attempt to predict the outcome of a football match (either a home win, draw, or home loss), by considering the following factors:

    1) The overall win/loss ratio of each of the two teams
    2) The win/loss ratio of the two teams specifically against each other
    3) The sum of the FIFA_OVA ratings for the players on each team (more involved because we need the roster for each team for each game we want to predict).

In [81]:
# get the overall win/loss/draw ratio for teams 
def get_h_ratios(team, seasons):
    #home overall win count, home overall draw count, home overall loss count, home overall number of games
    home_w, home_d, home_l, home_g = 0,0,0,0
        
    #away overall win count, away overall draw count, away overall loss count, away oveall number of games
    away_w, away_d, away_l, away_g = 0,0,0,0

    home_ow_rat, away_ow_rat, home_ol_rat, away_ol_rat, home_od_rat, away_od_rat = 0,0,0,0,0,0
    
    #print('-------------' + team + '----------------------')
    
    #iterate through the seasons
    for season in seasons:
        #change the name for ease of use
        df = season_histories[season]
        
        #get a dataframe of all the games from this season that the home team played in (either as home or away)
        home_games = df[(df['home'] == team) & (df['league'] == 'Premier League')]
        
        #same for the away team
        away_games = df[(df['away'] == team) & (df['league'] == 'Premier League')]
        
        
        # add on total number of overall games for each    
        home_g += len(home_games.index)
        #print("Total Home Games played in Season " + season + " = " + str(home_g))
        away_g += len(away_games.index)
        #print("Total Away Games played in Season " + season + " = " + str(away_g))
        
        # lets count how many games the team at home won, drew, and lost (overall)
        home_w += len(home_games[home_games['result'] == 'W'].index)
        home_d += len(home_games[home_games['result'] == 'D'].index)
        home_l += len(home_games[home_games['result'] == 'L'].index)
        
        # lets count how many games the team at away won, drew, and lost (overall)
        away_w += len(away_games[away_games['result'] == 'W'].index)
        away_d += len(away_games[away_games['result'] == 'D'].index)
        away_l += len(away_games[away_games['result'] == 'L'].index)
        #print()

    
    
    #print('stats for season range:',seasons)

    #divide by total number of games to create the ratio
    if home_g != 0:
        home_ow_rat = home_w / float(home_g)
        home_od_rat = home_d / float(home_g)
        home_ol_rat = home_l / float(home_g)
        #print(' Home Stats ')
        #print("overall win ratio:",'\t',home_ow_rat)
        #print("overall draw ratio:",'\t',home_od_rat)
        #print("overall loss ratio:",'\t',home_ol_rat)
        #print()


    #divide by the total number of games to create the ratio
    if away_g != 0:
        away_ow_rat = away_w / float(away_g)
        away_od_rat = away_d / float(away_g)
        away_ol_rat = away_l / float(away_g)
        #print(' Away Stats ')
        #print("overall win ratio:",'\t',away_ow_rat)
        #print("overall draw ratio:",'\t',away_od_rat)
        #print("overall loss ratio:",'\t',away_ol_rat)
        #print()
        
    return home_ow_rat, away_ow_rat, home_ol_rat, away_ol_rat, home_od_rat, away_od_rat


        


# Over all Ratios for each of the team    
home_win_ratio = []
away_win_ratio = []
home_draw_ratio = []
away_draw_ratio = []
home_loss_ratio = []
away_loss_ratio = []


for i in range(len(pl_teams)):
    hw, aw, hl, al, hd, ad = get_h_ratios(pl_teams[i], ['2016','2015','2014','2012'])
    home_win_ratio.append(hw)
    away_win_ratio.append(aw)
    home_draw_ratio.append(hd)
    away_draw_ratio.append(ad)
    home_loss_ratio.append(hl)
    away_loss_ratio.append(al)

#print(home_win_ratio)

In [100]:
# get the head-on ratio of win/draw/loss for home team against all other opposition
def get_s_ratio(home, away, seasons, home_roster = None, away_roster = None):
    # count of home wins/draw/loss/ number of total games against this specific team, 
    # no need for another set of variables for the away team,
    # for they are complementary
    home_sw, home_sd, home_sl, home_sg = 0,0,0,0
    
    home_sw_rat, home_sd_rat, home_sl_rat = 0,0,0
    
    #print('-------------' + home + " Vs " +  away +'----------------------')
    
    #iterate through the seasons
    for season in seasons:
        #change the name for ease of use
        df = season_histories[season]
        
        #get a dataframe of all the games from this season that the home team played in (either as home or away)
        home_games = df[((df['home'] == home) | (df['away'] == home)) & (df['league'] == 'Premier League')]
        
        # add on total number of games against each other
        against_each_other_games = home_games[(home_games['home'] == away) | (home_games['away'] == away)]
        home_sg += len(against_each_other_games.index)
        
        
        # lets count how many games the home team won, drew, and lost against the away team. 
        home_sw += len(home_games[((home_games['home'] == home) | (home_games['away'] == away)) & (home_games['result'] == 'W')].index)
        home_sd += len(home_games[((home_games['home'] == home) | (home_games['away'] == away)) & (home_games['result'] == 'D')].index)
        home_sl += len(home_games[((home_games['home'] == home) | (home_games['away'] == away)) & (home_games['result'] == 'L')].index)
        
    #print('stats for season range:',seasons)

    #divide by total number of games to create the ratio
    if home_sg != 0:
        #divide by the total number of games to create the ratio
        home_sw_rat = home_sw / float(home_sg)
        home_sd_rat = home_sd / float(home_sg)
        home_sl_rat = home_sl / float(home_sg)
        #print("headon win ratio:",'\t',home_sw_rat)
        #print("headon draw ratio:",'\t',home_sd_rat)
        #print("headon loss ratio:",'\t',home_sl_rat)
        #print()
    return home_sw_rat, home_sd_rat, home_sl_rat
        

#store head on results for only home team 
win_ratio = {}
loss_ratio = {}
draw_ratio = {}

for i in pl_teams:
    win_ratio[i] = []
    loss_ratio[i] = []
    draw_ratio[i] = []
    for j in pl_teams:
        if j is not i:
            w,d,l = get_s_ratio(i, j, ['2016','2015','2014','2012'])
            win_ratio[i].append(w)
            draw_ratio[i].append(d)
            loss_ratio[i].append(l)
    print("Team = " + i )
    print("Win Ratio" + str(win_ratio[i]))
    print("Loss Ratio" + str(loss_ratio[i]))
    print("Draw Ratio" + str(draw_ratio[i]))
    print()


#print(win_ratio)

Team = AFC Bournemouth
Win Ratio[2.25, 0, 2.25, 2.25, 2.25, 0, 2.25, 2.25, 2.25, 2.25, 0, 2.25, 2.25, 2.25, 2.25, 2.25, 2.25, 2.25, 2.25]
Loss Ratio[4.0, 0, 4.0, 4.0, 4.0, 0, 4.0, 4.0, 4.0, 4.0, 0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0]
Draw Ratio[2.5, 0, 2.5, 2.5, 2.5, 0, 2.5, 2.5, 2.5, 2.5, 0, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5]

Team = Arsenal
Win Ratio[20.75, 20.75, 5.1875, 6.916666666666667, 5.1875, 10.375, 10.375, 5.1875, 5.1875, 5.1875, 0, 6.916666666666667, 5.1875, 5.1875, 5.1875, 5.1875, 20.75, 5.1875, 6.916666666666667]
Loss Ratio[4.0, 4.0, 1.0, 1.3333333333333333, 1.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0, 1.3333333333333333, 1.0, 1.0, 1.0, 1.0, 4.0, 1.0, 1.3333333333333333]
Draw Ratio[8.25, 8.25, 2.0625, 2.75, 2.0625, 4.125, 4.125, 2.0625, 2.0625, 2.0625, 0, 2.75, 2.0625, 2.0625, 2.0625, 2.0625, 8.25, 2.0625, 2.75]

Team = Burnley
Win Ratio[0, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 0, 1.75, 1.75, 1.75, 1.75, 1.75, 0, 1.75, 1.75]
Loss Ratio[0, 4.0, 4.0, 4.0, 4.0, 4

In [101]:
#now load up the data from the 2017 CSV
season = '2017'
df = pd.read_csv('premier-league-' + season +'.csv')
season_histories[season] = df
pl_games = df[(df['league'] == 'Premier League')]
pl_games=pl_games.reset_index(drop=True)
pl_games

Unnamed: 0,date,home,away,home_goals,away_goals,result,league
0,14 Aug 2016,AFC Bournemouth,Manchester United,1,3,L,Premier League
1,21 Aug 2016,West Ham United,AFC Bournemouth,1,0,W,Premier League
2,27 Aug 2016,Crystal Palace,AFC Bournemouth,1,1,D,Premier League
3,10 Sep 2016,AFC Bournemouth,West Bromwich Albion,1,0,W,Premier League
4,17 Sep 2016,Manchester City,AFC Bournemouth,4,0,W,Premier League
5,24 Sep 2016,AFC Bournemouth,Everton,1,0,W,Premier League
6,01 Oct 2016,Watford,AFC Bournemouth,2,2,D,Premier League
7,15 Oct 2016,AFC Bournemouth,Hull City,6,1,W,Premier League
8,22 Oct 2016,AFC Bournemouth,Tottenham Hotspur,0,0,D,Premier League
9,29 Oct 2016,Middlesbrough,AFC Bournemouth,2,0,W,Premier League


In [95]:
# The prediction method
def prediction(home, away, home_roster = None, away_roster = None):
    #we take the win ratio of the home team based on over all + against specific team 
    # and then give the prediction 
    h_index = pl_teams.index(home)
    a_index = pl_teams.index(away)
    
    score = 1*home_win_ratio[h_index] + 2*win_ratio.get(home)[a_index]
    return score

#now make a list of predictions based in the model we have generated 
# teams faceoff will result in condition of success failure
results = []
for i in range(len(pl_games.index)):
    h_team = pl_games['home'][i]
    a_team = pl_games['away'][i]
    print(h_team + " vs " + a_team + ' ' + str(prediction(h_team, a_team)))
    results.append(prediction(h_team, a_team))

AFC Bournemouth vs Manchester United 0.2571428571428571
West Ham United vs AFC Bournemouth 21.91747572815534
Crystal Palace vs AFC Bournemouth 17.33009708737864
AFC Bournemouth vs West Bromwich Albion 4.757142857142857
Manchester City vs AFC Bournemouth 51.77272727272727
AFC Bournemouth vs Everton 0.2571428571428571
Watford vs AFC Bournemouth 4.757142857142857
AFC Bournemouth vs Hull City 4.757142857142857
AFC Bournemouth vs Tottenham Hotspur 4.757142857142857
Middlesbrough vs AFC Bournemouth 0
AFC Bournemouth vs Sunderland 4.757142857142857
Arsenal vs Liverpool 11.003787878787879
Leicester City vs Arsenal 8.457142857142857
Watford vs Arsenal 4.757142857142857
Arsenal vs Southampton 11.003787878787879
Hull City vs Arsenal 5.294117647058823
Arsenal vs Chelsea 14.462121212121213
Burnley vs Arsenal 3.7
Arsenal vs Swansea City 11.003787878787879
Arsenal vs Middlesbrough 14.462121212121213
Sunderland vs Arsenal 4.7727272727272725
Arsenal vs Tottenham Hotspur 42.128787878787875
Burnley vs Sw

IndexError: list index out of range