In [55]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [56]:
from fastai.tabular.all import *
from wwf.tab.export import *
from bnb import *

from classes import *
from utils import *

In [57]:
SEASON = '21'
set_seed(int(SEASON))

## Download HKJC odds

In [58]:
path_raw = Path('raw_data')
path_data = Path('data')

path_output = Path('output')

In [59]:
session = requests.Session()
r = session.get('http://bet.hkjc.com')
cookies = r.cookies

In [60]:
odds_url = 'https://bet.hkjc.com/football/getJSON.aspx?jsontype=odds_chl.aspx'
response = session.post(
    odds_url,
    headers={'referer':'http://bet.hkjc.com'},
    cookies=cookies
)

In [61]:
with open(path_data/'json'/f'odds_chl-{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}.txt', 'w') as f:
    f.write(response.text)

In [62]:
matches_json = json.loads(response.text)[1]['matches']

In [63]:
# Example
# matches_json = json.loads(open(path_data/'json'/'odds_chl-2021-09-21-14-03-17.txt').read())[1]['matches']

In [64]:
matches = [Match(m) for m in matches_json]
odds = [m.export() for m in matches]

['20211210FRI2', '2021-Dec-10', '23:00:00', 'Cup Competition [CUP]', 'Tunisia', 'Oman', 'true', '9.5', '2.20', '1.59', 'false', '10.5', '2.70', '1.40', 'false', '12.5', '4.55', '1.15']
['20211210FRI3', '2021-Dec-11', '01:30:00', 'German Division 2 [GD2]', 'Schalke 04', 'Nurnberg', 'false', '13.5', '3.90', '1.20', 'true', '10.5', '1.92', '1.78', 'false', '11.5', '2.40', '1.50']
['20211210FRI4', '2021-Dec-11', '01:30:00', 'German Division 2 [GD2]', 'Jahn Regensburg', 'Werder Bremen', 'false', '13.5', '5.10', '1.12', 'false', '10.5', '2.20', '1.59', 'true', '9.5', '1.77', '1.93']
['20211210FRI16', '2021-Dec-11', '03:00:00', 'Cup Competition [CUP]', 'Qatar', 'United Arab Emirates', 'false', '9.5', '2.55', '1.45', 'true', '8.5', '1.95', '1.75', 'false', '11.5', '4.55', '1.15']
['20211210FRI17', '2021-Dec-11', '03:30:00', 'German Division 1 [GSL]', 'Cologne', 'Augsburg', 'false', '10.5', '2.45', '1.48', 'true', '9.5', '1.92', '1.78', 'false', '12.5', '4.15', '1.18']
['20211210FRI18', '2021-D

In [65]:
cols_match = ['MatchDay', 'Date', 'Time', 'LeagueJC', 'HomeTeamJC', 'AwayTeamJC']
cols_odds0 = ['MAINLINE_0', 'CHL_LINE_0', 'CHL_H_0', 'CHL_L_0']
cols_odds1 = ['MAINLINE_1', 'CHL_LINE_1', 'CHL_H_1', 'CHL_L_1']
cols_odds2 = ['MAINLINE_2', 'CHL_LINE_2', 'CHL_H_2', 'CHL_L_2']
cols_odds  = ['MAINLINE', 'CHL_LINE', 'CHL_H', 'CHL_L']
cols_pred  = ['prob_0', 'prob_1', 'prob_2', 'total_count']

cols = cols_match + cols_odds0 + cols_odds1 + cols_odds2

In [66]:
odds = pd.DataFrame(odds, columns=cols)
odds = odds.fillna(value=np.nan)

In [67]:
cols_odds_ = cols_odds0[1:]+cols_odds1[1:]+cols_odds2[1:]
odds[cols_odds_] = odds[cols_odds_].astype(float)

In [68]:
odds['MatchDay'] = odds['MatchDay'].str[8:]
odds['Date'] = pd.to_datetime(odds['Date'])
odds['Time'] = pd.to_datetime(odds['Time'], format='%H:%M:%S').dt.time

In [69]:
odds['DateTimeJC'] = pd.to_datetime(odds['Date'].dt.date.map(str) + '-' + odds['Time'].map(str))
odds['DateTimeJC'] = odds['DateTimeJC'].dt.tz_localize('Hongkong')
odds['DateTime'] = odds['DateTimeJC'].dt.tz_convert('GB')

In [70]:
map_league = pd.read_csv(path_data/'league.csv')
map_team = pd.read_csv(path_data/'team.csv')

In [71]:
div = ['E0', 'D1', 'SP1', 'I1', 'F1']
map_league = map_league[map_league.Div.isin(div)].reset_index(drop=True)

In [72]:
# Map Div name
odds = odds.merge(map_league[['LeagueJC', 'Div']], 'inner', on='LeagueJC')

In [73]:
# Map Team name
odds = odds.merge(map_team[['TeamNameJC', 'TeamName']].rename(columns={'TeamName':'HomeTeam'}), 'inner', 
                  left_on='HomeTeamJC', right_on='TeamNameJC').drop(columns=['TeamNameJC'])
odds = odds.merge(map_team[['TeamNameJC', 'TeamName']].rename(columns={'TeamName':'AwayTeam'}), 'inner', 
                  left_on='AwayTeamJC', right_on='TeamNameJC').drop(columns=['TeamNameJC'])

## Download recent stats

In [74]:
# Download latest results in current season
!wget -q https://www.football-data.co.uk/mmz4281/{SEASON}{int(SEASON)+1}/data.zip -O raw_data/data.zip

# Unzip to folder
!unzip -q -o raw_data/data.zip -d raw_data/{SEASON}

In [75]:
usecols = ['Div', 'Date', 'HomeTeam', 'AwayTeam', 'HC', 'AC', 'FTHG', 'FTAG', 'HS', 'AS', 'HST', 'AST']
dtype = {'HC':'float', 'AC':'float'}
parse_dates = ['Date']

seasons = [SEASON]

dfs = []

for folder in sorted(path_raw.iterdir()):
    if folder.is_dir() and folder.name in seasons: 
        for file in sorted(folder.glob('*.csv')):
            try:
                df = pd.read_csv(file, usecols=usecols, dtype=dtype, parse_dates=parse_dates, dayfirst=True)
                df['Season'] = folder.name
                dfs.append(df)
            except:
                continue

In [76]:
df_season = pd.concat(dfs)
df_season = df_season.dropna()
df_season = df_season.sort_values(['Div', 'Date', 'HomeTeam']).reset_index(drop=True)

In [77]:
df_hist = pd.read_csv(path_data/'data.csv', dtype={'HC':'float', 'AC':'float'}, parse_dates=['Date'])
df_hist = df_hist.query(f'Season == {int(SEASON)-1}').reset_index(drop=True)

In [78]:
df_season = pd.concat([df_hist[df_season.columns], df_season])

In [79]:
# Make features on historical stats (Home and Away)
stats = ['FTHG', 'HS', 'HST', 'HC', 'FTAG', 'AS', 'AST', 'AC']
df_home, df_away = joinLastGamesStatsHomeAway(df_season, stats)

In [80]:
# Make features on historical stats (For and Against)
stats = [('FTHG', 'FTAG', 'FTG'), ('HS', 'AS', 'S'), ('HST', 'AST', 'ST'), ('HC', 'AC', 'C')]
df_for, df_against = joinLastGamesStatsForAgainst(df_season, stats)

In [81]:
df_home = df_home.sort_values(['HomeTeam', 'Date']).reset_index(drop=True)
df_away = df_away.sort_values(['AwayTeam', 'Date']).reset_index(drop=True)

df_home = df_home.groupby('HomeTeam')[df_home.columns[df_home.columns.str.contains('Avg')]].last().reset_index()
df_away = df_away.groupby('AwayTeam')[df_away.columns[df_away.columns.str.contains('Avg')]].last().reset_index()

In [82]:
odds = odds.merge(df_home, 'left', 'HomeTeam').merge(df_away, 'left', 'AwayTeam')

In [83]:
cols_home = df_for.columns[df_for.columns.str.contains('Avg')]
cols_home = dict(zip(cols_home, 'Home'+cols_home))
cols_home.update({'Team':'HomeTeam'})

cols_away = df_for.columns[df_for.columns.str.contains('Avg')]
cols_away = dict(zip(cols_home, 'Away'+cols_away))
cols_away.update({'Team':'AwayTeam'})

df_for = df_for.groupby('Team')[df_for.columns[df_for.columns.str.contains('Avg')]].last().reset_index()
odds = odds.merge(df_for.rename(columns=cols_home), 'left', 'HomeTeam').merge(df_for.rename(columns=cols_away), 'left', 'AwayTeam')

In [84]:
cols_home = df_against.columns[df_against.columns.str.contains('Avg')]
cols_home = dict(zip(cols_home, 'Home'+cols_home))
cols_home.update({'Team':'HomeTeam'})

cols_away = df_against.columns[df_against.columns.str.contains('Avg')]
cols_away = dict(zip(cols_home, 'Away'+cols_away))
cols_away.update({'Team':'AwayTeam'})

df_against = df_against.groupby('Team')[df_against.columns[df_against.columns.str.contains('Avg')]].last().reset_index()
odds = odds.merge(df_against.rename(columns=cols_home), 'left', 'HomeTeam').merge(df_against.rename(columns=cols_away), 'left', 'AwayTeam')

In [85]:
add_datepart(odds, 'DateTime', prefix='', drop=False);

In [86]:
display_df(odds.head(5).T)

Unnamed: 0,0,1,2,3,4
MatchDay,FRI17,SAT19,SAT20,SAT21,SAT22
Date,2021-12-11 00:00:00,2021-12-11 00:00:00,2021-12-11 00:00:00,2021-12-11 00:00:00,2021-12-11 00:00:00
Time,03:30:00,22:30:00,22:30:00,22:30:00,22:30:00
LeagueJC,German Division 1 [GSL],German Division 1 [GSL],German Division 1 [GSL],German Division 1 [GSL],German Division 1 [GSL]
HomeTeamJC,Cologne,Bayern Munich,Bochum,Freiburg,Hertha Berlin
AwayTeamJC,Augsburg,Mainz,Dortmund,Hoffenheim,Bielefeld
MAINLINE_0,false,true,false,false,false
CHL_LINE_0,10.5,10.5,12.5,12.5,10.5
CHL_H_0,2.45,1.98,4.15,4.15,2.5
CHL_L_0,1.48,1.73,1.18,1.18,1.46


## Load model

In [87]:
learn_bnb = load_learner('models/learn_bnb.pkl')

In [88]:
to = load_pandas('models/to.pkl')

In [89]:
def predict(self, row):
    "Predict on a Pandas Series"
    dl = self.dls.test_dl(row.to_frame().T)
    dl.dataset.conts = dl.dataset.conts.astype(np.float32)
    inp,preds,_ = self.get_preds(dl=dl, with_input=True, with_decoded=False)
    b = tuplify(inp)
    full_dec = self.dls.decode(b)
    return full_dec,preds[0]

learn_bnb.predict = MethodType(predict, learn_bnb)

In [90]:
to_tst = to.new(odds)
to_tst.process()
# to_tst.items.head()

In [91]:
tst_dl = learn_bnb.dls.valid.new(to_tst)
tst_dl.show(max_n=999)

Unnamed: 0,Div,HomeTeam,AwayTeam,FTHGLast5Avg,HSLast5Avg,HSTLast5Avg,HCLast5Avg,FTAGLast5Avg,ASLast5Avg,ASTLast5Avg,ACLast5Avg,HomeFTGForLast5Avg,HomeSForLast5Avg,HomeSTForLast5Avg,HomeCForLast5Avg,AwayFTGForLast5Avg,AwaySForLast5Avg,AwaySTForLast5Avg,AwayCForLast5Avg,HomeFTGAgainstLast5Avg,HomeSAgainstLast5Avg,HomeSTAgainstLast5Avg,HomeCAgainstLast5Avg,AwayFTGAgainstLast5Avg,AwaySAgainstLast5Avg,AwaySTAgainstLast5Avg,AwayCAgainstLast5Avg
0,D1,FC Koln,Augsburg,2.0,17.2,5.8,5.6,0.4,9.0,3.2,4.0,1.8,15.8,6.2,5.0,1.6,13.0,5.4,4.8,1.6,10.2,4.0,4.2,1.6,13.6,4.6,5.2
1,D1,Bayern Munich,Mainz,3.8,20.8,10.0,6.0,1.0,12.4,4.0,4.6,2.6,19.8,7.4,6.8,1.8,16.6,5.8,6.4,1.0,9.2,3.0,3.4,1.2,10.4,4.6,4.2
2,D1,Bochum,Dortmund,1.4,15.0,5.0,4.4,1.8,13.0,3.8,4.4,1.4,15.2,5.4,3.6,2.2,13.2,5.2,4.2,0.8,14.0,5.6,6.8,1.0,14.8,6.0,5.0
3,D1,Freiburg,Hoffenheim,2.0,12.8,4.6,4.6,0.6,11.2,3.0,4.6,1.4,15.8,6.0,5.4,2.0,15.2,5.0,6.4,1.4,16.0,6.2,4.6,1.8,10.4,4.0,3.2
4,D1,Hertha,Bielefeld,1.2,12.8,3.8,4.8,0.8,12.2,3.2,3.4,0.6,9.2,3.6,4.4,1.0,12.2,3.6,5.4,1.2,13.6,3.8,6.8,1.6,15.0,6.2,6.8
5,D1,RB Leipzig,M'gladbach,3.2,14.2,7.2,2.8,1.0,13.8,4.6,5.4,1.6,12.2,4.4,4.4,1.6,13.8,5.4,5.4,1.6,12.8,4.4,4.8,1.4,11.8,5.4,4.6
6,D1,Wolfsburg,Stuttgart,0.8,11.8,4.2,5.0,0.6,9.0,3.2,3.2,1.2,13.4,5.6,4.2,1.0,9.8,3.2,4.4,1.4,12.4,3.8,6.4,1.8,16.8,5.2,4.2
7,D1,Greuther Furth,Union Berlin,0.6,14.0,3.8,4.8,1.8,11.0,4.6,4.0,1.2,10.8,3.8,4.6,1.6,10.4,4.2,3.8,3.8,15.6,7.2,5.6,2.0,12.2,4.0,4.0
8,D1,Ein Frankfurt,Leverkusen,0.8,14.6,4.2,6.2,2.8,11.2,5.0,5.2,1.4,14.0,4.8,5.2,1.4,10.2,3.8,6.2,1.0,16.2,5.2,5.4,1.2,15.6,5.6,5.4
9,I1,Genoa,Sampdoria,1.2,11.6,3.2,4.6,1.6,10.8,4.0,5.4,0.4,10.6,3.4,4.2,1.4,10.8,4.0,4.8,1.4,14.0,3.2,4.8,1.8,15.8,5.0,6.0


In [92]:
pred, _ = learn_bnb.get_preds(dl=tst_dl)

In [93]:
probs = F.softmax(pred[:, :3], dim=-1)
total_count = 1 / F.softplus(pred[:, 3:])

In [94]:
odds[cols_pred] = torch.cat([probs, total_count], dim=-1)

In [95]:
odds0 = odds[cols_match+cols_odds0+cols_pred].rename(columns=dict(zip(cols_odds0, cols_odds)))
odds1 = odds[cols_match+cols_odds1+cols_pred].rename(columns=dict(zip(cols_odds1, cols_odds)))
odds2 = odds[cols_match+cols_odds2+cols_pred].rename(columns=dict(zip(cols_odds2, cols_odds)))

In [96]:
odds = pd.concat([odds0, odds1, odds2]).dropna().reset_index(drop=True)
odds['MAINLINE'] = np.where(odds['MAINLINE']=='true', True, False)

In [97]:
odds.head(10)

Unnamed: 0,MatchDay,Date,Time,LeagueJC,HomeTeamJC,AwayTeamJC,MAINLINE,CHL_LINE,CHL_H,CHL_L,prob_0,prob_1,prob_2,total_count
0,FRI17,2021-12-11,03:30:00,German Division 1 [GSL],Cologne,Augsburg,False,10.5,2.45,1.48,0.79929,0.122709,0.078001,35.255527
1,SAT19,2021-12-11,22:30:00,German Division 1 [GSL],Bayern Munich,Mainz,True,10.5,1.98,1.73,0.69972,0.234256,0.066024,27.718168
2,SAT20,2021-12-11,22:30:00,German Division 1 [GSL],Bochum,Dortmund,False,12.5,4.15,1.18,0.783931,0.100204,0.115865,32.857338
3,SAT21,2021-12-11,22:30:00,German Division 1 [GSL],Freiburg,Hoffenheim,False,12.5,4.15,1.18,0.805192,0.10033,0.094478,42.177868
4,SAT22,2021-12-11,22:30:00,German Division 1 [GSL],Hertha Berlin,Bielefeld,False,10.5,2.5,1.46,0.811423,0.100699,0.087878,39.179932
5,SAT23,2021-12-11,22:30:00,German Division 1 [GSL],RB Leipzig,Monchengladbach,False,10.5,2.3,1.54,0.807938,0.107894,0.084169,38.495243
6,SAT64,2021-12-12,01:30:00,German Division 1 [GSL],Wolfsburg,Stuttgart,True,9.5,1.78,1.92,0.818674,0.093913,0.087412,41.856003
7,SUN29,2021-12-12,22:30:00,German Division 1 [GSL],Greuther Furth,Union Berlin,False,12.5,4.55,1.15,0.818951,0.097001,0.084048,39.76152
8,SUN50,2021-12-13,00:30:00,German Division 1 [GSL],Frankfurt,Leverkusen,True,10.5,2.13,1.63,0.811428,0.110151,0.078421,45.49929
9,FRI18,2021-12-11,03:45:00,Italian Division 1 [ISA],Genoa,Sampdoria,False,10.5,2.3,1.54,0.810743,0.103862,0.085395,43.209206


In [98]:
prob_hilo = []

for r in list(zip(odds['prob_0'], odds['prob_1'], odds['prob_2'], odds['total_count'], odds['CHL_LINE'])):
    probs = torch.tensor(r[0:3], device='cpu')
    total_count = torch.tensor(r[3], device='cpu')

    bnb_corner = BivariateNegativeBinomial(total_count=total_count, probs=probs)
    value = torch.cartesian_prod(torch.arange(0., 15.), torch.arange(0., 15.))
    corner = bnb_corner.log_prob(value).exp()
    
    line = r[4]
    mask = value.sum(-1) < line
    prob_lo = corner[mask].sum()
    prob_hi = 1 - prob_lo
    
    prob_hilo.append([prob_hi.item(), prob_lo.item()])

In [99]:
odds[['prob_hi', 'prob_lo']] = prob_hilo

In [100]:
odds['kelly_hi'] = (odds['prob_hi'] * odds['CHL_H'] - 1) / (odds['CHL_H'] - 1)
odds['kelly_lo'] = (odds['prob_lo'] * odds['CHL_L'] - 1) / (odds['CHL_L'] - 1)

In [101]:
odds['kelly'] = np.where(
    np.maximum(odds['kelly_hi'], odds['kelly_lo']) > 0, 
    np.where(odds['kelly_hi'] > odds['kelly_lo'], odds['kelly_hi'], odds['kelly_lo']), 
    np.nan
)

In [102]:
odds['bet'] = np.where(
    np.maximum(odds['kelly_hi'], odds['kelly_lo']) > 0, 
    np.where(odds['kelly_hi'] > odds['kelly_lo'], 'High', 'Low'), 
    None
)

In [103]:
odds = odds.sort_values('kelly', ascending=False).reset_index(drop=True)

In [104]:
odds['selected'] = np.where(
    odds['MAINLINE']==True, np.where(
        odds['kelly']>0.3, '$$$', np.where(
            odds['kelly']>0.2, '$$', np.where(
                odds['kelly']>0.1, '$', None))), 
    None
)

In [105]:
odds = odds.drop(columns=cols_pred+['kelly_hi', 'kelly_lo'])

In [106]:
odds[odds.bet.notna() & odds.selected.notna()]

Unnamed: 0,MatchDay,Date,Time,LeagueJC,HomeTeamJC,AwayTeamJC,MAINLINE,CHL_LINE,CHL_H,CHL_L,prob_hi,prob_lo,kelly,bet,selected
2,SUN36,2021-12-13,00:00:00,French Division 1 [FFL],Strasbourg,Marseille,True,9.5,1.78,1.92,0.34266,0.65734,0.284883,Low,$$
3,SAT16,2021-12-11,22:00:00,Italian Division 1 [ISA],Fiorentina,Salernitana,True,9.5,1.73,1.98,0.693983,0.306017,0.274781,High,$$
4,SAT26,2021-12-11,23:00:00,Eng Premier [EPL],Arsenal,Southampton,True,10.5,1.88,1.82,0.65947,0.34053,0.272504,High,$$
7,SUN61,2021-12-13,03:45:00,French Division 1 [FFL],Paris Saint Germain,Monaco,True,9.5,1.95,1.75,0.619737,0.380263,0.21946,High,$$
8,SAT19,2021-12-11,22:30:00,German Division 1 [GSL],Bayern Munich,Mainz,True,10.5,1.98,1.73,0.606679,0.393321,0.205332,High,$$
10,SUN62,2021-12-13,04:00:00,Spanish Division 1 [SFL],Real Madrid,Atletico Madrid,True,9.5,2.02,1.7,0.585746,0.414254,0.179614,High,$
13,SUN7,2021-12-12,19:30:00,Italian Division 1 [ISA],AC Torino,Bologna,True,9.5,1.82,1.88,0.615826,0.384174,0.147322,High,$
14,SAT63,2021-12-12,01:30:00,Spanish Division 1 [SFL],Valencia,CF Elche,True,8.5,1.9,1.8,0.596023,0.403977,0.147159,High,$
17,SAT12,2021-12-11,21:00:00,Spanish Division 1 [SFL],Espanyol,Levante,True,9.5,1.85,1.85,0.593211,0.406789,0.114636,High,$
19,MON8,2021-12-14,03:45:00,Italian Division 1 [ISA],Roma,Spezia,True,9.5,1.83,1.87,0.595511,0.404489,0.108176,High,$


In [107]:
display_df(odds[odds.MatchDay.isin(odds[odds.bet.notna() & odds.selected.notna()].MatchDay) & odds.bet.notna()])

Unnamed: 0,MatchDay,Date,Time,LeagueJC,HomeTeamJC,AwayTeamJC,MAINLINE,CHL_LINE,CHL_H,CHL_L,prob_hi,prob_lo,kelly,bet,selected
0,SUN36,2021-12-13,00:00:00,French Division 1 [FFL],Strasbourg,Marseille,False,13.5,4.75,1.14,0.069637,0.930363,0.432955,Low,
1,SUN36,2021-12-13,00:00:00,French Division 1 [FFL],Strasbourg,Marseille,False,10.5,2.2,1.59,0.245425,0.754575,0.338601,Low,
2,SUN36,2021-12-13,00:00:00,French Division 1 [FFL],Strasbourg,Marseille,True,9.5,1.78,1.92,0.34266,0.65734,0.284883,Low,$$
3,SAT16,2021-12-11,22:00:00,Italian Division 1 [ISA],Fiorentina,Salernitana,True,9.5,1.73,1.98,0.693983,0.306017,0.274781,High,$$
4,SAT26,2021-12-11,23:00:00,Eng Premier [EPL],Arsenal,Southampton,True,10.5,1.88,1.82,0.65947,0.34053,0.272504,High,$$
5,SAT16,2021-12-11,22:00:00,Italian Division 1 [ISA],Fiorentina,Salernitana,False,10.5,2.15,1.62,0.591629,0.408371,0.236525,High,
6,SAT26,2021-12-11,23:00:00,Eng Premier [EPL],Arsenal,Southampton,False,11.5,2.38,1.51,0.556578,0.443422,0.235258,High,
7,SUN61,2021-12-13,03:45:00,French Division 1 [FFL],Paris Saint Germain,Monaco,True,9.5,1.95,1.75,0.619737,0.380263,0.21946,High,$$
8,SAT19,2021-12-11,22:30:00,German Division 1 [GSL],Bayern Munich,Mainz,True,10.5,1.98,1.73,0.606679,0.393321,0.205332,High,$$
9,SAT19,2021-12-11,22:30:00,German Division 1 [GSL],Bayern Munich,Mainz,False,11.5,2.5,1.46,0.508187,0.491813,0.180312,High,


In [108]:
odds.to_csv(path_output/f'odds-{datetime.now().strftime("%Y-%m-%d")}.csv', float_format='%.2f', index=False)

## END