In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from tpot import TPOTRegressor
from sklearn.metrics import make_scorer
import matplotlib.pyplot as plt
from featuresv2 import features, targets, metadata
import json
import requests
%matplotlib inline

## Version 2 Model

In [15]:
data = pd.read_csv('outputs/model_training_data_v2.csv')
data_2023 = data[(data['round.year']!= 2023) & (data['round.year']!= 2022)]

In [3]:
round_matchups = [
    ('Sydney Swans','Melbourne'),
    ('Brisbane Lions','Carlton'),
    ('Gold Coast Suns','Richmond'),
    ('GWS Giants','Collingwood')]

In [4]:
round1 = [('Carlton', 'Richmond'),
('Collingwood','Sydney Swans'),
('Essendon','Hawthorn'),
('GWS Giants','North Melbourne'),
('Geelong Cats','St Kilda'),
('Gold Coast Suns','Adelaide Crows'),
('Melbourne','Western Bulldogs'),
('Port Adelaide','West Coast Eagles'),
('Fremantle','Brisbane Lions')]

In [5]:
targets

['homeTeamScore.matchScore.totalScore',
 'awayTeamScore.matchScore.totalScore',
 'score_diff']

In [6]:
def generate_test_train_split(data, features, target, test_size=0.3):
    X = data[features]
    y = data[target]

    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
    
    return {"x_train":x_train,"x_test":x_test,"y_train":y_train,"y_test":y_test}

In [7]:
def tpot_pipeline_v2(data, features, target, size=20, scoring='r2'):
    data_ = generate_test_train_split(data, features, target)

    my_scorer = make_scorer(mean_squared_error, greater_is_better=False)

    pipeline_optimizer = TPOTRegressor(generations=5, population_size=size, verbosity=2, scoring=scoring)

    pipeline_optimizer.fit(data_['x_train'], data_['y_train'])
    
    print(pipeline_optimizer.score(data_['x_test'], data_['y_test']))
    
    return pipeline_optimizer

In [8]:
def return_stats_input(data, home_team, away_team, model_inputs, games=5):
    '''Take in player_stats and returns aggregated team stats for previous home/away team matchups'''
    
#     if 'team.name' not in model_inputs:
#         model_inputs.append('team.name')
        
    match_ups = data[(data['match.homeTeam.name']==home_team) & (data['match.awayTeam.name']==away_team)]
    weights = range(1,len(match_ups)+1)

    stats = match_ups[model_inputs+['match.homeTeam.name']].groupby('match.homeTeam.name').apply(lambda x: np.average(x, weights=weights, axis=0))
    
    return pd.DataFrame(dict(zip(model_inputs,stats[0])),index=[0])

In [9]:
def generate_margin_tear_off(model, round_matchups, team_stats, model_inputs):
    print('-------------------' + str(model.fitted_pipeline_[0]).replace('()','') + ' TEAR-OFF-------------------')
    for tup in round_matchups:
        predict = model.predict(return_stats_input(team_stats, tup[0], tup[1], model_inputs))[0]
        if predict < 0:
            winner = tup[1]
        else:
            winner = tup[0]
        print("------------------------------------------------")
        print("Home: " + tup[0])
        print("Away: " + tup[1])
        print("Winner: " + winner + "\t       " + "Margin: " + str(abs(round(predict))))

In [10]:
def generate_score_tear_off(home_model, away_model, round_matchups, team_stats, model_inputs):
   # print('-------------------' + str(model.fitted_pipeline_[0]).replace('()','') + ' TEAR-OFF-------------------')
    preds = []
    for tup in round_matchups:
        try:
            h_predict = home_model.predict(return_stats_input(team_stats, tup[0], tup[1], model_inputs))[0]
            a_predict = away_model.predict(return_stats_input(team_stats, tup[0], tup[1], model_inputs))[0]
            if h_predict < a_predict:
                winner = tup[1]
                margin = round(a_predict) - round(h_predict)
            else:
                winner = tup[0]
                margin = round(h_predict) - round(a_predict)
            paddingh = 25 - len("Home: " + tup[0])
            paddinga = 25 - len("Away: " + tup[1])
            print("------------------------------------------------")
            print("Home: " + tup[0] + " "*paddingh + str(abs(round(h_predict))))
            print("Away: " + tup[1] + " "*paddinga + str(abs(round(a_predict))))
            print("Winner: " + winner + " by "+ str(margin) + " points")
            print("Total Score: " + str(round(h_predict) + round(a_predict)))
            preds.append((h_predict, a_predict, margin))
        except:
            print("No prior game")
            preds.append((0, 0, 0))
    return preds

In [16]:
home_model = tpot_pipeline_v2(data, features, targets[0], size=10)

Optimization Progress:   0%|          | 0/60 [00:00<?, ?pipeline/s]


Generation 1 - Current best internal CV score: 0.9715273338099124

Generation 2 - Current best internal CV score: 0.9715273338099124

Generation 3 - Current best internal CV score: 0.9715273338099124

Generation 4 - Current best internal CV score: 0.9715273338099124

Generation 5 - Current best internal CV score: 0.9715273338099124

Best pipeline: RidgeCV(input_matrix)
0.9733412515746034


In [17]:
away_model = tpot_pipeline_v2(data, features, targets[1], size=10)

Optimization Progress:   0%|          | 0/60 [00:00<?, ?pipeline/s]


Generation 1 - Current best internal CV score: 0.9724996542190854

Generation 2 - Current best internal CV score: 0.9724996542190854

Generation 3 - Current best internal CV score: 0.9734531794501624

Generation 4 - Current best internal CV score: 0.9734531794501624

Generation 5 - Current best internal CV score: 0.9734531794501624

Best pipeline: ElasticNetCV(RobustScaler(input_matrix), l1_ratio=0.9500000000000001, tol=0.001)
0.9720637747470147


In [None]:
#margin_model = tpot_pipeline_v2(data, features, targets[2], size=10)

In [None]:
#generate_score_tear_off(home_model, away_model, round1, data, features)

In [None]:
# print(return_stats_inputv3(data, home_team, away_team, features))
#generate_margin_tear_off(margin_model, round1, data, features)
# data[(data['match.homeTeam.name']==home_team) & (data['match.awayTeam.name']==away_team)][features].head()

In [18]:
fix_data_2024 = json.loads(requests.get('https://fixturedownload.com/feed/json/afl-2024').content)

fix_data_2023 = json.loads(requests.get('https://fixturedownload.com/feed/json/afl-2023').content)

fix_data_2022 = json.loads(requests.get('https://fixturedownload.com/feed/json/afl-2022').content)

fixtures2024 = pd.DataFrame(fix_data_2024)
games2024 = list(zip(fixtures2024.HomeTeam, fixtures2024.AwayTeam))

fixtures2023 = pd.DataFrame(fix_data_2023)
games2023 = list(zip(fixtures2023.HomeTeam, fixtures2023.AwayTeam))

fixtures2022 = pd.DataFrame(fix_data_2022)
games2022 = list(zip(fixtures2022.HomeTeam, fixtures2022.AwayTeam))

predicted_season = generate_score_tear_off(home_model, away_model, games2022, data_2023, features)

fixtures2022['predicted_home'] = [round(i[0]) for i in predicted_season]
fixtures2022['predicted_away'] = [round(i[1]) for i in predicted_season]

------------------------------------------------
Home: Melbourne          114
Away: Western Bulldogs   76
Winner: Melbourne by 38 points
Total Score: 190
------------------------------------------------
Home: Carlton            63
Away: Richmond           94
Winner: Richmond by 31 points
Total Score: 157
No prior game
------------------------------------------------
Home: Geelong Cats       96
Away: Essendon           49
Winner: Geelong Cats by 47 points
Total Score: 145


To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  stats = match_ups[model_inputs+['match.homeTeam.name']].groupby('match.homeTeam.name').apply(lambda x: np.average(x, weights=weights, axis=0))


------------------------------------------------
Home: GWS Giants         77
Away: Sydney Swans       91
Winner: Sydney Swans by 14 points
Total Score: 168
------------------------------------------------
Home: Brisbane Lions     95
Away: Port Adelaide      59
Winner: Brisbane Lions by 36 points
Total Score: 154
------------------------------------------------
Home: Hawthorn           76
Away: North Melbourne    72
Winner: Hawthorn by 4 points
Total Score: 148
------------------------------------------------
Home: Adelaide Crows     59
Away: Fremantle          67
Winner: Fremantle by 8 points
Total Score: 126
------------------------------------------------
Home: West Coast Eagles  84
Away: Gold Coast Suns    58
Winner: West Coast Eagles by 26 points
Total Score: 142
------------------------------------------------
Home: Western Bulldogs   91
Away: Carlton            93
Winner: Carlton by 2 points
Total Score: 184
------------------------------------------------
Home: Sydney Swans     

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  stats = match_ups[model_inputs+['match.homeTeam.name']].groupby('match.homeTeam.name').apply(lambda x: np.average(x, weights=weights, axis=0))


------------------------------------------------
Home: Essendon           57
Away: Brisbane Lions     81
Winner: Brisbane Lions by 24 points
Total Score: 138
------------------------------------------------
Home: Port Adelaide      70
Away: Hawthorn           50
Winner: Port Adelaide by 20 points
Total Score: 120
------------------------------------------------
Home: Gold Coast Suns    41
Away: Melbourne          102
Winner: Melbourne by 61 points
Total Score: 143
------------------------------------------------
Home: North Melbourne    34
Away: West Coast Eagles  45
Winner: West Coast Eagles by 11 points
Total Score: 79
------------------------------------------------
Home: Richmond           94
Away: GWS Giants         60
Winner: Richmond by 34 points
Total Score: 154
------------------------------------------------
Home: Fremantle          74
Away: St Kilda           69
Winner: Fremantle by 5 points
Total Score: 143
------------------------------------------------
Home: Western Bull

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  stats = match_ups[model_inputs+['match.homeTeam.name']].groupby('match.homeTeam.name').apply(lambda x: np.average(x, weights=weights, axis=0))


------------------------------------------------
Home: Fremantle          77
Away: GWS Giants         68
Winner: Fremantle by 9 points
Total Score: 145
------------------------------------------------
Home: Essendon           82
Away: Adelaide Crows     21
Winner: Essendon by 61 points
Total Score: 103
No prior game


To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  stats = match_ups[model_inputs+['match.homeTeam.name']].groupby('match.homeTeam.name').apply(lambda x: np.average(x, weights=weights, axis=0))


------------------------------------------------
Home: Gold Coast Suns    50
Away: Carlton            71
Winner: Carlton by 21 points
Total Score: 121
------------------------------------------------
Home: Brisbane Lions     99
Away: Collingwood        62
Winner: Brisbane Lions by 37 points
Total Score: 161
------------------------------------------------
Home: North Melbourne    54
Away: Western Bulldogs   167
Winner: Western Bulldogs by 113 points
Total Score: 221
------------------------------------------------
Home: West Coast Eagles  76
Away: Sydney Swans       48
Winner: West Coast Eagles by 28 points
Total Score: 124
------------------------------------------------
Home: St Kilda           84
Away: Gold Coast Suns    82
Winner: St Kilda by 2 points
Total Score: 166
------------------------------------------------
Home: Adelaide Crows     56
Away: Richmond           73
Winner: Richmond by 17 points
Total Score: 129
------------------------------------------------
Home: Melbourne 

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  stats = match_ups[model_inputs+['match.homeTeam.name']].groupby('match.homeTeam.name').apply(lambda x: np.average(x, weights=weights, axis=0))


------------------------------------------------
Home: Essendon           90
Away: Collingwood        64
Winner: Essendon by 26 points
Total Score: 154
------------------------------------------------
Home: West Coast Eagles  84
Away: Richmond           86
Winner: Richmond by 2 points
Total Score: 170


To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  stats = match_ups[model_inputs+['match.homeTeam.name']].groupby('match.homeTeam.name').apply(lambda x: np.average(x, weights=weights, axis=0))


No prior game
------------------------------------------------
Home: Adelaide Crows     50
Away: GWS Giants         80
Winner: GWS Giants by 30 points
Total Score: 130
------------------------------------------------
Home: Melbourne          78
Away: Hawthorn           79
Winner: Hawthorn by 1 points
Total Score: 157
------------------------------------------------
Home: St Kilda           63
Away: Port Adelaide      92
Winner: Port Adelaide by 29 points
Total Score: 155
------------------------------------------------
Home: Carlton            72
Away: North Melbourne    117
Winner: North Melbourne by 45 points
Total Score: 189
------------------------------------------------
Home: Collingwood        71
Away: Gold Coast Suns    60
Winner: Collingwood by 11 points
Total Score: 131
------------------------------------------------
Home: Western Bulldogs   83
Away: Essendon           62
Winner: Western Bulldogs by 21 points
Total Score: 145
------------------------------------------------


To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  stats = match_ups[model_inputs+['match.homeTeam.name']].groupby('match.homeTeam.name').apply(lambda x: np.average(x, weights=weights, axis=0))


No prior game
------------------------------------------------
Home: Brisbane Lions     110
Away: GWS Giants         71
Winner: Brisbane Lions by 39 points
Total Score: 181
------------------------------------------------
Home: Geelong Cats       95
Away: Adelaide Crows     67
Winner: Geelong Cats by 28 points
Total Score: 162
------------------------------------------------
Home: Melbourne          64
Away: Fremantle          57
Winner: Melbourne by 7 points
Total Score: 121
------------------------------------------------
Home: West Coast Eagles  74
Away: Western Bulldogs   94
Winner: Western Bulldogs by 20 points
Total Score: 168
------------------------------------------------
Home: Gold Coast Suns    113
Away: Hawthorn           77
Winner: Gold Coast Suns by 36 points
Total Score: 190
------------------------------------------------
Home: St Kilda           84
Away: North Melbourne    67
Winner: St Kilda by 17 points
Total Score: 151
-----------------------------------------------

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  stats = match_ups[model_inputs+['match.homeTeam.name']].groupby('match.homeTeam.name').apply(lambda x: np.average(x, weights=weights, axis=0))


------------------------------------------------
Home: West Coast Eagles  80
Away: Essendon           69
Winner: West Coast Eagles by 11 points
Total Score: 149
------------------------------------------------
Home: Carlton            108
Away: Fremantle          68
Winner: Carlton by 40 points
Total Score: 176
------------------------------------------------
Home: Geelong Cats       71
Away: Richmond           56
Winner: Geelong Cats by 15 points
Total Score: 127
------------------------------------------------
Home: Sydney Swans       98
Away: St Kilda           73
Winner: Sydney Swans by 25 points
Total Score: 171
------------------------------------------------
Home: North Melbourne    83
Away: Adelaide Crows     81
Winner: North Melbourne by 2 points
Total Score: 164
------------------------------------------------
Home: Collingwood        62
Away: GWS Giants         77
Winner: GWS Giants by 15 points
Total Score: 139
------------------------------------------------
Home: Port Ade

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  stats = match_ups[model_inputs+['match.homeTeam.name']].groupby('match.homeTeam.name').apply(lambda x: np.average(x, weights=weights, axis=0))


------------------------------------------------
Home: Richmond           89
Away: West Coast Eagles  66
Winner: Richmond by 23 points
Total Score: 155
------------------------------------------------
Home: GWS Giants         65
Away: Hawthorn           80
Winner: Hawthorn by 15 points
Total Score: 145
------------------------------------------------
Home: Fremantle          57
Away: Port Adelaide      74
Winner: Port Adelaide by 17 points
Total Score: 131
------------------------------------------------
Home: Geelong Cats       90
Away: Melbourne          74
Winner: Geelong Cats by 16 points
Total Score: 164
------------------------------------------------
Home: Sydney Swans       42
Away: Western Bulldogs   60
Winner: Western Bulldogs by 18 points
Total Score: 102
------------------------------------------------
Home: Collingwood        55
Away: North Melbourne    52
Winner: Collingwood by 3 points
Total Score: 107
------------------------------------------------
Home: Gold Coast Sun

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  stats = match_ups[model_inputs+['match.homeTeam.name']].groupby('match.homeTeam.name').apply(lambda x: np.average(x, weights=weights, axis=0))


------------------------------------------------
Home: Richmond           52
Away: Fremantle          29
Winner: Richmond by 23 points
Total Score: 81
------------------------------------------------
Home: North Melbourne    89
Away: Hawthorn           63
Winner: North Melbourne by 26 points
Total Score: 152
------------------------------------------------
Home: Sydney Swans       94
Away: Adelaide Crows     87
Winner: Sydney Swans by 7 points
Total Score: 181
------------------------------------------------
Home: Port Adelaide      77
Away: Geelong Cats       68
Winner: Port Adelaide by 9 points
Total Score: 145
------------------------------------------------
Home: Brisbane Lions     117
Away: Gold Coast Suns    57
Winner: Brisbane Lions by 60 points
Total Score: 174
------------------------------------------------
Home: Western Bulldogs   69
Away: Melbourne          74
Winner: Melbourne by 5 points
Total Score: 143
------------------------------------------------
Home: Carlton      

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  stats = match_ups[model_inputs+['match.homeTeam.name']].groupby('match.homeTeam.name').apply(lambda x: np.average(x, weights=weights, axis=0))


------------------------------------------------
Home: Collingwood        65
Away: Port Adelaide      62
Winner: Collingwood by 3 points
Total Score: 127
------------------------------------------------
Home: Sydney Swans       69
Away: GWS Giants         67
Winner: Sydney Swans by 2 points
Total Score: 136
------------------------------------------------
Home: St Kilda           107
Away: Hawthorn           63
Winner: St Kilda by 44 points
Total Score: 170
------------------------------------------------
Home: Geelong Cats       100
Away: Western Bulldogs   78
Winner: Geelong Cats by 22 points
Total Score: 178
No prior game
------------------------------------------------
Home: Gold Coast Suns    88
Away: West Coast Eagles  51
Winner: Gold Coast Suns by 37 points
Total Score: 139


To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  stats = match_ups[model_inputs+['match.homeTeam.name']].groupby('match.homeTeam.name').apply(lambda x: np.average(x, weights=weights, axis=0))


------------------------------------------------
Home: Richmond           93
Away: Brisbane Lions     69
Winner: Richmond by 24 points
Total Score: 162
------------------------------------------------
Home: Essendon           109
Away: North Melbourne    66
Winner: Essendon by 43 points
Total Score: 175
------------------------------------------------
Home: Melbourne          72
Away: Collingwood        67
Winner: Melbourne by 5 points
Total Score: 139
------------------------------------------------
Home: Hawthorn           108
Away: Gold Coast Suns    50
Winner: Hawthorn by 58 points
Total Score: 158
------------------------------------------------
Home: GWS Giants         108
Away: Essendon           84
Winner: GWS Giants by 24 points
Total Score: 192
------------------------------------------------
Home: Western Bulldogs   118
Away: Fremantle          64
Winner: Western Bulldogs by 54 points
Total Score: 182
------------------------------------------------
Home: Geelong Cats       

In [21]:
pred_winner = []
winner = []
for i, row in fixtures2022.iterrows():

    if row[7] > row[8]:
        winner.append(row[4])
    elif row[8] > row[7]:
        winner.append(row[5])
    else:
        winner.append('Draw')


    if row[9] > row[10]:
        pred_winner.append(row[4])
    elif row[10] > row[9]:
        pred_winner.append(row[5])
    else:
        pred_winner.append('Draw')

fixtures2022['predicted_winner'] = pred_winner
fixtures2022['winner'] = winner
fixtures2022['predicted_margin'] = fixtures2022['predicted_home'] - fixtures2022['predicted_away']
fixtures2022['actual_margin'] = fixtures2022['HomeTeamScore'] - fixtures2022['AwayTeamScore']
fixtures2022['correct'] = [1 if row[11]==row[12] else 0 for i,row in fixtures2022.iterrows()]
# fixtures2022['bet'] = ["1-39" if abs(row[13]) < 40 else "40+" for i,row in fixtures2022.iterrows()]


fixtures2022.to_csv('outputs/predictions_and_results_2022.csv', index=False)

backtest_data = pd.read_csv('outputs/predictions_and_results_2022.csv')
backtest_data.head()

Unnamed: 0,MatchNumber,RoundNumber,DateUtc,Location,HomeTeam,AwayTeam,Group,HomeTeamScore,AwayTeamScore,predicted_home,predicted_away,predicted_winner,winner,predicted_margin,actual_margin,correct
0,1,1,2022-03-16 08:10:00Z,MCG,Melbourne,Western Bulldogs,,97,71,114,76,Melbourne,Melbourne,38,26,1
1,2,1,2022-03-17 08:25:00Z,MCG,Carlton,Richmond,,101,76,63,94,Richmond,Carlton,-31,25,0
2,3,1,2022-03-18 08:50:00Z,Marvel Stadium,St Kilda,Collingwood,,85,102,0,0,Draw,Collingwood,0,-17,0
3,4,1,2022-03-19 03:10:00Z,MCG,Geelong Cats,Essendon,,138,72,96,49,Geelong Cats,Geelong Cats,47,66,1
4,5,1,2022-03-19 06:10:00Z,Accor Stadium,GWS Giants,Sydney Swans,,92,112,77,91,Sydney Swans,Sydney Swans,-14,-20,1


In [20]:
fixtures2022.head()

Unnamed: 0,MatchNumber,RoundNumber,DateUtc,Location,HomeTeam,AwayTeam,Group,HomeTeamScore,AwayTeamScore,predicted_home,predicted_away
0,1,1,2022-03-16 08:10:00Z,MCG,Melbourne,Western Bulldogs,,97,71,114,76
1,2,1,2022-03-17 08:25:00Z,MCG,Carlton,Richmond,,101,76,63,94
2,3,1,2022-03-18 08:50:00Z,Marvel Stadium,St Kilda,Collingwood,,85,102,0,0
3,4,1,2022-03-19 03:10:00Z,MCG,Geelong Cats,Essendon,,138,72,96,49
4,5,1,2022-03-19 06:10:00Z,Accor Stadium,GWS Giants,Sydney Swans,,92,112,77,91
