In [1]:
import numpy as np

In [10]:
import pandas as pd
from functions import epoch, epochs, cutDf
from datetime import datetime, timedelta
import numpy as np
import sys
sys.path.append('..')
from pyglicko2.glicko2_tests import exampleCase
from pyglicko2.glicko2 import Player
import glicko2
import time


In [11]:
matches = pd.read_csv('../Data/matches_glicko2.csv',parse_dates=['tourney_date'])

In [14]:
matches['tourney_date'].dtypes

dtype('<M8[ns]')

In [4]:
# matches['tourney_date'] = matches['tourney_date'].apply(
#     lambda x: pd.to_datetime(str(x), format='%Y%m%d')) # convert the tourney_date to datetime

In [5]:
def epochElo(matches, players_dict, cutoff_date):
    '''Take in a dataframe with matches, a list of players, a dictionary of players with the elements
    of player list as keys and an instance of the glicko2 class Player() as the value.
    This function wll return the updated players_list and players_dict.
    
    Future iterations of this function should do away with the p_ , and only should need the 
    players_dict with the keys as integers indicating the player id.
    '''
    players_list = list(players_dict.keys())
    players = [p for p in np.append(matches['winner_id'].unique(),\
                                                matches['loser_id'].unique())]
    # instantiate player class for players not yet instantiated
    players_to_instantiate = list(set(players) - set(players_list))
    
    new_players = {player: PlayerElo() for player in players_to_instantiate}
    # print(type(new_players))

    # update list of instantiated players
    players_list = list(set(players).union(set(players_list)))
    # update dictionary of instantiated players
    players_dict.update(new_players)
    # print(type(players_dict))
    # determine who competed and who didn't
    players_dnc = list(set(players_list) - set(players))
    players_compete = list(set(players_list)-set(players_dnc))
    # fill dictionary with player:(wins,losses), a tuple of list of opponents in wins and losses for each match
    results = {}
    for player in players_compete:
        wins = [winner for winner in matches[matches['winner_id']==player]['loser_id']]
        losses = [loser for loser in matches[matches['loser_id']== player]['winner_id']]
        losses_rating = [players_dict[loss].getRating() for loss in losses]
        wins_rating = [players_dict[win].getRating() for win in wins]
        outcomes = np.append(np.ones(len(wins)),np.zeros(len(losses)))
        opponent_rating = wins_rating + losses_rating
        # print(f'{opponent_rating=}')
        results[player] = (opponent_rating, outcomes)
    ratings_timestamp = {}
    # update players
    for player in list(results.keys()):
        (rating_list, outcome_list) = results[player]
        # print(f'{rating_list=}, {RD_list=}, {outcome_list=}')
        players_dict[player].update_player(rating_list,outcome_list)
        ratings_timestamp[(player,cutoff_date)] = players_dict[player].getRating()
    return players_dict,ratings_timestamp
    # opponent_indices = matches[matches['loser_id'].str==player '']

In [6]:
# an implementation of a class that holds a player's rating,
# updates the rating, instantiate's the player.
# It ammounts to a simplified modeled of Ryan Kirkman's 
# glicko2 implementation.
import numpy as np # numpy will be used for vectorized calculations.
class PlayerElo:
    # initialize the update rate
    _k = 16 
    def getRating(self):
        return self.__rating
    def setRating(self, rating):
        self.__rating = rating
    def __init__(self, rating = 1500):
        self.__rating = rating
    def update_player(self, rating_list, outcome_list):
        _k = self._k
        rating_list = np.array(rating_list).astype(float) # avoid exponentiation of int 
        # by negative value error.
        n = len(rating_list)
        # calculate expected wins
        expected_wins = np.divide(1,(1 + np.power(10,(rating_list-self.__rating)/400)))
        self.__rating = self.__rating + _k*np.sum(outcome_list - expected_wins)

In [None]:
def epochsElo(match_history, interval_length = 365):
    '''Calculate the ending rating for each player with the lengh of each epoch being
    a funtion of the interval_length
    
    Next iterations: generate a rating history indicating the rating of each player each time that
    the rating updates.
    '''
    # check if the match_history is empty, as if it is the function call will not complete.
    if match_history.empty:
        return "Try again with a non-empty match history!"
    max_date = max(match_history['tourney_date']) # maximum date in the records
    min_date = min(match_history['tourney_date']) # minimum date in the records
    date_range = range(0,(max_date-min_date).days + 1,interval_length) # days from date of first
    # match in increments of the interval (default 365).  This is the length of each epoch. 
    epoch_cutoffs = [min_date + timedelta(days = x) for x in date_range] # The times that
    # divide the matches into each epoch.
    epoch_ranges = zip(epoch_cutoffs[0:-2],epoch_cutoffs[1:-1]) # each epoch will include matches
    # greater than or equal to the first element, less than the second element for the zip
    # generator's respective item.
    players_dict = {} # instantiate the dictionary that will hold a PlayerElo() class for each player.
    ratings_history = {}
    # iteratively re-update for each epoch
    for period in epoch_ranges:
        # rating_period is the df of matches that fall within an epoch
        rating_period = match_history[(match_history['tourney_date']>=period[0])&(match_history['tourney_date']<period[1])]
        # make sure the rating period isn't empty and then update the players.
        if not rating_period.empty:
            players_dict,ratings_timestamp = epochElo(rating_period,players_dict,period[1])
            ratings_history.update(ratings_timestamp)
    # get the final rating period updates (for matches in the final 365 to 729 days).
    rating_period = match_history[match_history['tourney_date']>=epoch_cutoffs[-1]]
    players_dict, ratings_timestamp = epochElo(rating_period,players_dict,epoch_cutoffs[-1])
    ratings_history.update(ratings_timestamp)
    return players_dict,ratings_history

In [17]:
playerClasses, ratingHistory = epochsElo(matches)

In [19]:
ratingHistory

{(113953, Timestamp('1878-07-09 00:00:00')): 1540.0,
 (113954, Timestamp('1878-07-09 00:00:00')): 1492.0,
 (113955, Timestamp('1878-07-09 00:00:00')): 1508.0,
 (113956, Timestamp('1878-07-09 00:00:00')): 1492.0,
 (113957, Timestamp('1878-07-09 00:00:00')): 1500.0,
 (113958, Timestamp('1878-07-09 00:00:00')): 1492.0,
 (113959, Timestamp('1878-07-09 00:00:00')): 1500.0,
 (113960, Timestamp('1878-07-09 00:00:00')): 1492.0,
 (113961, Timestamp('1878-07-09 00:00:00')): 1516.0,
 (113962, Timestamp('1878-07-09 00:00:00')): 1492.0,
 (113963, Timestamp('1878-07-09 00:00:00')): 1508.0,
 (113964, Timestamp('1878-07-09 00:00:00')): 1492.0,
 (113965, Timestamp('1878-07-09 00:00:00')): 1508.0,
 (113966, Timestamp('1878-07-09 00:00:00')): 1492.0,
 (113967, Timestamp('1878-07-09 00:00:00')): 1516.0,
 (113968, Timestamp('1878-07-09 00:00:00')): 1492.0,
 (113969, Timestamp('1878-07-09 00:00:00')): 1492.0,
 (113970, Timestamp('1878-07-09 00:00:00')): 1492.0,
 (113971, Timestamp('1878-07-09 00:00:00')): 1

In [20]:
mi = pd.MultiIndex.from_tuples(ratingHistory)



MultiIndex([(113953, '1878-07-09'),
            (113954, '1878-07-09'),
            (113955, '1878-07-09'),
            (113956, '1878-07-09'),
            (113957, '1878-07-09'),
            (113958, '1878-07-09'),
            (113959, '1878-07-09'),
            (113960, '1878-07-09'),
            (113961, '1878-07-09'),
            (113962, '1878-07-09'),
            ...
            (106408, '2022-06-04'),
            (106410, '2022-06-04'),
            (106415, '2022-06-04'),
            (106421, '2022-06-04'),
            (106422, '2022-06-04'),
            (106423, '2022-06-04'),
            (106425, '2022-06-04'),
            (106426, '2022-06-04'),
            (106432, '2022-06-04'),
            (204737, '2022-06-04')],
           length=123946)

In [18]:
pd.DataFrame.from_dict(ratingHistory)

ValueError: If using all scalar values, you must pass an index