In [1]:
import os
import pandas as pd
import numpy as np
import pickle

from scipy.optimize import minimize
from trueskillthroughtime import Game, Player, History, Gaussian
from utils import load_wta_data

In [2]:
df = load_wta_data()

shape before dropping match dupes: (456418, 54)
shape after dropping match dupes: (455283, 54)


In [7]:
# extracting the only two columns that matter
columns = zip(df.winner_name, df.loser_name, df.surface)

# structuring the data to include a generic player plus 
# a surface specific player. we treat them as a team.
composition = [[[winner, winner + surface], [loser, loser + surface]]
               for winner, loser, surface in columns]

# initializing a dictionary for every player in the dataset
all_wta_players = set([val for sublist in zip(df.winner_name, df.loser_name) for val in sublist])

# assigning prior ratings. they're identical for all players, but they needn't be
# this is identical to, say, starting each player off with 1500 in Elo except
# TrueSkill includes several other parameters
priors = dict([(p, Player(Gaussian(0., 1.51), 1.5, 0.098) ) for p in all_wta_players])

# running the TrueSkill algorithm on the dataset
true_skill_history_priors = History(composition=composition,
                                    priors=priors,
                                    beta=0,
                                    sigma=0.5,
                                    gamma=0.01)

# printing the log evidence of obtaining this sequence of wins and losses 
# given the current sequences of player skill

true_skill_history_priors.log_evidence() # initial results with 0, 1.51, 1.5, 0.098

In [16]:
def training(x):
    
    "a simple function to minimize with scipy.minimize"
    
    a, b, c = x
    
    priors = dict([(p, Player(Gaussian(0, a), b, c)) for p in all_wta_players])
    
    true_skill_history_priors = History(composition=composition,
                                    priors=priors,
                                    beta=0,
                                    sigma=0.5,
                                    gamma=0.01)
    
    log_ev = true_skill_history_priors.log_evidence()
    
    return -1 * log_ev  

In [17]:
# selecting the initial parameters and defining their min/max possible values
to_start = [1.5, 1.5, 0.1]

bounds = ((1.3, 1.8), (1.3, 1.8), (0.08, 0.12))

result = minimize(training, to_start, bounds=bounds, method='SLSQP')

print(result)

In [39]:
# now running the TrueSkillThroughTime algorithm
true_skill_history_priors.convergence(epsilon=0.01, iterations=10)

# printing the log evidence after optimization
print(true_skill_history_priors.log_evidence())

Iteration =  0 , step =  (3.7638065299271775, 0.9568087274906079)
Iteration =  1 , step =  (0.5992915591564723, 0.04234960836586121)
Iteration =  2 , step =  (0.3750498157579929, 0.014743538780252585)
Iteration =  3 , step =  (0.27690898982839673, 0.010851022039432667)
Iteration =  4 , step =  (0.23299169677792353, 0.009073882388377585)
Iteration =  5 , step =  (0.20274338728320362, 0.007937127428417012)
Iteration =  6 , step =  (0.1811644466948925, 0.006970466322375524)
Iteration =  7 , step =  (0.16183371420684978, 0.006148849143449153)
Iteration =  8 , step =  (0.14468375524345944, 0.0054461237981418)
Iteration =  9 , step =  (0.12951581360599285, 0.0048408627363858425)
End


((0.12951581360599285, 0.0048408627363858425), 10)

In [20]:
ts_dict = true_skill_history_priors.learning_curves()

In [21]:
small_dict = dict()

for key in ts_dict.keys():
    small_dict[key] = ts_dict[key][-1]