In [119]:
import os
import sys

import pandas as pd
import numpy as np

from sklearn.metrics import brier_score_loss

import optuna

from utils import load_atp_data
sys.path.append('../')

In [120]:
class MatchElo:

    def __init__(self, df: pd.DataFrame, base_k: int = 225, exponent: float=0.9983, floor: int=50, offset: int=10):
        self.df = df
        self.names = set(
            pd.concat([self.df['winner_name'], self.df['loser_name']]))
        self.winners = self.df.winner_name
        self.winners_mp = self.df.winner_mp
        self.losers = self.df.loser_name
        self.losers_mp = self.df.loser_mp
        self.elo_dict = {name: 1500 for name in self.names}
        self.base_k = base_k
        self.exponent = exponent
        self.floor = floor
        self.offset = offset
        self.processed = False
        self.winner_elo = []
        self.loser_elo = []
        self.winner_probs = []

    def update_elo(self, winner=None, loser=None, winner_mp=0, loser_mp=0):

        prematch_winner_elo = self.elo_dict[winner]
        prematch_loser_elo = self.elo_dict[loser]

        exp_a = 1 / \
            (1 + 10 ** ((prematch_loser_elo - prematch_winner_elo)/400))
        exp_b = 1 - exp_a
        
        winner_k = np.maximum(self.floor, (self.base_k / (winner_mp + self.offset)) ** self.exponent)
        loser_k = np.maximum(self.floor, (self.base_k / (loser_mp + self.offset)) ** self.exponent)

        winner_delta = winner_k * (1 - exp_a)
        loser_delta = loser_k * (0 - exp_b)

        self.elo_dict[winner] = prematch_winner_elo + winner_delta
        self.elo_dict[loser] = prematch_loser_elo + loser_delta

        return exp_a, prematch_winner_elo, prematch_loser_elo

    def process_elo(self):

        if self.processed:
            return "elo already processed"

        else:
            for w, l, wp, lp in zip(self.winners, self.losers, self.winners_mp, self.losers_mp):
                win_prob_, pm_w, pm_l = self.update_elo(winner=w, loser=l, winner_mp=wp, loser_mp=lp)

                self.winner_elo.append(pm_w)
                self.loser_elo.append(pm_l)
                self.winner_probs.append(win_prob_)

            self.processed = True
            print("elo ratings processed successfully")
            return self
        

In [121]:
df = load_atp_data(folder_path='../tennis_atp')

shape before dropping match dupes: (583792, 75)
shape after dropping match dupes: (583731, 75)


In [122]:
match_df = (
    pd.concat([
    df[['winner_id', 'tourney_date']].rename(columns={'winner_id' : 'player_id'}),
    df[['loser_id', 'tourney_date']].rename(columns={'loser_id' : 'player_id'})])
    .sort_index()
    .assign(matches_played = match_df.groupby('player_id').cumcount() )
)

In [123]:
matches_dict = match_df.set_index('player_id', append=True)['matches_played'].to_dict()

mp_winner = []
mp_loser = []

for i, winner, loser in zip(df.index, df['winner_id'], df['loser_id']):
    
    winner_mp_ = matches_dict[(i, winner)]
    loser_mp_ = matches_dict[(i, loser)]
    
    mp_winner.append(winner_mp_)
    mp_loser.append(loser_mp_)

df['winner_mp'] = mp_winner
df['loser_mp'] = mp_loser

In [124]:
challenger_df = df[df['tourney_level'] == 'C'].copy()
not_challenger_df = df[df['tourney_level'] != 'C'].copy()

In [131]:
def objective(trial):
    
    n = trial.suggest_int("base_k", 125, 500, step=25)
    m = trial.suggest_float("exponent", 0.9, 1.2)
    f = trial.suggest_int("floor", 33, 75)
    o = trial.suggest_int("offset", 6, 20)
    
    elo_obj = MatchElo(not_challenger_df, base_k=n, exponent=m, floor=f, offset=o)
    _ = elo_obj.process_elo()
    
    result_ = elo_obj.winner_probs
        
    return brier_score_loss([1] * len(not_challenger_df), result_)

In [None]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=125)

In [133]:
study.best_params

{'base_k': 450, 'exponent': 0.9815329839141791, 'floor': 56, 'offset': 8}