In [2]:
import pandas as pd
from functions import assembleDf
from datetime import datetime, timedelta
import numpy as np
import sys
sys.path.append('..')
from pyglicko2.glicko2_tests import exampleCase
from pyglicko2.glicko2 import Player
import glicko2
import time


In [106]:
matches = pd.read_csv('../Data/matches_glicko2.csv',parse_dates = ['tourney_date'], 
                      infer_datetime_format = True)

In [1]:
def epochG(matches, players_dict,cutoff_date):
    '''Take in a dataframe with matches, a list of players, a dictionary of players with the elements
    of player list as keys and an instance of the glicko2 class Player() as the value.
    This function wll return the updated players_list and players_dict.'''
    
    players_list = list(players_dict.keys())
    players = [p for p in np.append(matches['winner_id'].unique(),\
                                                matches['loser_id'].unique())]
    # instantiate player class for players not yet instantiated
    players_to_instantiate = list(set(players) - set(players_list))
    
    new_players = {player: Player() for player in players_to_instantiate}
    # print(type(new_players))
    players_list = list(set(players).union(set(players_list)))

    # update list of instantiated players
    # update dictionary of instantiated players
    players_dict.update(new_players)
    # print(type(players_dict))
    # determine who competed and who didn't
    players_dnc = list(set(players_list) - set(players))
    players_compete = players
    # print(f'{players_compete=}')
    # update rating deviation for players who didn't compete
    for player in players_dnc:
        players_dict[player].did_not_compete()
    # fill dictionary with (wins,losses), a tuple of list of opponents in wins and losses for each match
    results = {}
    # update players who did compete
    for player in players_compete:
        # get id of players they beat
        wins = [winner for winner in matches[matches['winner_id']==player]['loser_id']]
        # get id of players they lost to
        losses = [loser for loser in matches[matches['loser_id']== player]['winner_id']]
        # get opponents' rating and rd
        losses_rating = [players_dict[loss].getRating() for loss in losses]
        losses_rd = [players_dict[loss].getRd() for loss in losses]
        wins_rating = [players_dict[win].getRating() for win in wins]
        wins_rd = [players_dict[win].getRd() for win in wins]
        outcomes = np.append(np.ones(len(wins)),np.zeros(len(losses)))
        opponent_rating = wins_rating + losses_rating
        # print(f'{opponent_rating=}')
        opponent_rd = wins_rd + losses_rd
        results[player] = (opponent_rating, opponent_rd, outcomes)
    
    ratings_timestamp = {}
    # update players
    for player in list(results.keys()):
        (rating_list, RD_list, outcome_list) = results[player]
        players_dict[player].update_player(rating_list, RD_list, outcome_list)
        ratings_timestamp[(player,cutoff_date)] = players_dict[player].getRating()
        
    return players_dict,ratings_timestamp


In [67]:
epochG(matches[1:10],{},matches.iloc[10]['tourney_date'])

({113953: <pyglicko2.glicko2.Player at 0x7fc8a00089d0>,
  114146: <pyglicko2.glicko2.Player at 0x7fc8a0008970>,
  113987: <pyglicko2.glicko2.Player at 0x7fc8a0008850>,
  114147: <pyglicko2.glicko2.Player at 0x7fc8a0008880>,
  114149: <pyglicko2.glicko2.Player at 0x7fc8a00084c0>,
  114154: <pyglicko2.glicko2.Player at 0x7fc8a00085e0>,
  113963: <pyglicko2.glicko2.Player at 0x7fc8a0008430>,
  114158: <pyglicko2.glicko2.Player at 0x7fc8a0008100>,
  113999: <pyglicko2.glicko2.Player at 0x7fc8a0008250>,
  113976: <pyglicko2.glicko2.Player at 0x7fc8a0008730>,
  113982: <pyglicko2.glicko2.Player at 0x7fc8a00087f0>},
 {(113987, Timestamp('1877-07-09 00:00:00')): 1799.6258356900184,
  (113963, Timestamp('1877-07-09 00:00:00')): 1599.8752635134226,
  (113999, Timestamp('1877-07-09 00:00:00')): 1599.8752635134226,
  (114158, Timestamp('1877-07-09 00:00:00')): 1500.0,
  (114149, Timestamp('1877-07-09 00:00:00')): 1662.3108949741131,
  (113953, Timestamp('1877-07-09 00:00:00')): 1337.6891050258869,

In [70]:
def epochsG(match_history, interval_length = 365):
    '''Calculate the ending rating for each player with the lengh of each epoch being
    a funtion of the interval_length
    
    Next iterations: generate a rating history indicating the rating of each player each time that
    the rating updates.
    '''
    # check if the match_history is empty, as if it is the function call will not complete.
    if match_history.empty:
        return "Try again with a non-empty match history!"
    max_date = max(match_history['tourney_date']) # maximum date in the records
    min_date = min(match_history['tourney_date']) # minimum date in the records
    date_range = range(0,(max_date-min_date).days + 1,interval_length) # days from date of first
    # match in increments of the interval (default 365).  This is the length of each epoch. 
    epoch_cutoffs = [min_date + timedelta(days = x) for x in date_range] # The times that
    # divide the matches into each epoch.
    epoch_ranges = zip(epoch_cutoffs[0:-2],epoch_cutoffs[1:-1]) # each epoch will include matches
    # greater than or equal to the first element, less than the second element for the zip
    # generator's respective item.
    # print([r for r in epoch_ranges])
    players_dict = {} # instantiate the dictionary that will hold a Player() class for each player.
    ratings_history = {}

    # iteratively re-update for each epoch
    for period in epoch_ranges:
        # rating_ period is the df of matches that fall within a given epoch
        rating_period = match_history[(match_history['tourney_date']>=period[0])&
                                      (match_history['tourney_date']<period[1])]
        # If the rating period is empty, then adjust the rating deviation of the players.
        if not rating_period.empty:
            players_dict,ratings_timestamp = epochG(rating_period,players_dict,period[1])
            ratings_history.update(ratings_timestamp)
        else:
            for player in players_dict:
                players_dict[player].did_not_compete()
    # get the final rating period updates (for matches in the final 365 to 729 days).
    rating_period = match_history[match_history['tourney_date']>=epoch_cutoffs[-1]]
    
    players_dict,ratings_timestamp = epochG(rating_period,players_dict,max_date)
    ratings_history.update(ratings_timestamp)
    return players_dict, ratings_history

In [78]:
%%time
start = time.time()
pd,rh = epochsG(matches[256_000:360_000],180)
compute_time = time.time()-start
print(compute_time)


9.954494953155518
CPU times: user 9.92 s, sys: 41.5 ms, total: 9.96 s
Wall time: 9.95 s


In [84]:
rh

{(101736, Timestamp('1998-07-04 00:00:00')): 1764.687220531697,
 (101601, Timestamp('1998-07-04 00:00:00')): 1485.2473842857473,
 (101889, Timestamp('1998-07-04 00:00:00')): 1465.6096834787659,
 (103720, Timestamp('1998-07-04 00:00:00')): 1397.5054693308086,
 (101727, Timestamp('1998-07-04 00:00:00')): 1597.7161583742354,
 (102257, Timestamp('1998-07-04 00:00:00')): 1645.7194628013067,
 (102223, Timestamp('1998-07-04 00:00:00')): 1563.0207045249715,
 (101441, Timestamp('1998-07-04 00:00:00')): 1534.390316521234,
 (102148, Timestamp('1998-07-04 00:00:00')): 1564.5084538723263,
 (101964, Timestamp('1998-07-04 00:00:00')): 1586.0704702389721,
 (101820, Timestamp('1998-07-04 00:00:00')): 1560.7359526702744,
 (101463, Timestamp('1998-07-04 00:00:00')): 1409.4239212591574,
 (101723, Timestamp('1998-07-04 00:00:00')): 1524.4952292299379,
 (108812, Timestamp('1998-07-04 00:00:00')): 1283.6116256109933,
 (102817, Timestamp('1998-07-04 00:00:00')): 1804.6864646791116,
 (102687, Timestamp('1998-0

In [82]:
# function to get all unique timestamps:
def getUniqueTimestamps(ratingHistory):
    '''take in rating histories and return the unique timestamps.'''
    times = set()
    for key in ratingHistory.keys():
        times.add(key[1])
    return times

In [86]:
import numpy as np
from scipy.sparse import csr_matrix

In [None]:
for key

In [87]:
[(k,v) for k,v in rh()]

AttributeError: 'dict' object has no attribute 'iteritems'

In [88]:
pd.fromkeys()

AttributeError: 'dict' object has no attribute 'DataFrame'

In [104]:
assembleDf(rh)

Unnamed: 0,101736,101601,101889,103720,101727,102257,102223,101441,102148,101964,...,103637,109078,104125,104191,104772,104287,109426,104275,104494,104601
1998-07-04,1764.687221,1485.247384,1465.609683,1397.505469,1597.716158,1645.719463,1563.020705,1534.390317,1564.508454,1586.07047,...,1252.681917,,,,,,,,,
1998-12-31,1809.448613,1454.187099,1477.551703,1600.274201,1595.860473,1741.844615,1556.692408,1428.627796,1496.20584,1637.432679,...,1083.174795,,,,,,,,,
1999-06-29,1817.877312,1462.504323,1448.05993,1694.062114,1612.633857,1700.173348,1577.443065,,1576.651647,1600.948349,...,1029.86743,,,,,,,,,
1999-12-26,1854.466054,1539.687488,1456.366354,1712.328816,1596.305794,1733.118385,1656.597162,,1611.936205,1602.475168,...,1072.936201,,,,,,,,,
2000-06-23,1850.474525,,1485.300663,1784.847743,1643.718197,1722.148443,1646.023661,1454.357823,1645.625131,1594.213008,...,1293.655475,,,,,,,,,
2000-12-20,1842.383212,,1515.792451,1785.148658,1639.483331,1702.999605,1635.811338,1452.629504,1643.334271,1583.81596,...,1353.708759,,,,,,,,,
2001-06-18,1858.599138,,1540.086536,1817.256076,1638.734396,1711.714862,1619.280736,,1667.965093,1596.408505,...,1411.489731,,1430.279366,,,,,,,
2001-12-15,1850.376006,,1560.054327,1843.081174,1640.098815,1716.558304,1594.074642,,1666.414464,1641.209932,...,1402.936791,,1202.56938,1325.437261,,,,,,
2002-06-13,1875.826208,,,1856.525316,,1720.431058,1583.958975,,1668.035841,1638.105303,...,1426.17189,,1161.759131,1328.198412,,,,,,
2003-03-10,1887.113945,,1554.445275,1870.246874,,,1581.408639,,1664.668074,1633.229991,...,1422.644215,1215.521759,1159.341951,1299.475755,1289.103725,1302.352629,1279.656266,1303.050459,1231.429757,1300.672216
