In [1]:
# Imports
from chessdotcom import *
import pprint
import requests

import urllib.request
import json

import numpy as np
import pandas as pd
import pickle
import json

In [2]:
## Configure Headers, as the Project uses requests package to interact with the API. Headers and paxis can be set
## through the Client Object.
from chessdotcom import Client

Client.request_config["headers"]["User-Agent"] = (
 "Machine learning for chess match outcome Prediction, BSc in Computer Science Dissertation, University of Lincoln"
 "Contact me at conorjackvincent@live.co.uk"
)

# Implement rate limiting to be applied to any method used within the chessdotcom library.
Client.rate_limit_handler.tries = 2
Client.rate_limit_handler.tts = 4

In [3]:
## Load the player_profiles_orginal.pkl file, so that we can use this pandas dataframes usernames to collect further statistical data.
with open('Collected_Chess_Data\player_profiles_original.pkl', 'rb') as file:
    player_profiles = pickle.load(file)

In [4]:
## View the loaded pandas dataframe.
player_profiles.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1540 entries, 0 to 1539
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   name          1299 non-null   object
 1   username      1540 non-null   object
 2   title         1540 non-null   object
 3   followers     1540 non-null   int64 
 4   country_code  1540 non-null   object
 5   country       1540 non-null   object
 6   status        1540 non-null   object
 7   is_streamer   1540 non-null   bool  
 8   verified      1540 non-null   bool  
 9   league        1230 non-null   object
dtypes: bool(2), int64(1), object(7)
memory usage: 99.4+ KB


In [5]:
# Create a list of all the usernames from the player_profiles pandas dataframe.
usernames = player_profiles['username'].tolist()

In [6]:
player_profile_stat_response = get_player_stats(usernames[0])
json_structure = player_profile_stat_response.json
print(json_structure.keys())

dict_keys(['stats'])


In [7]:
print(json_structure['stats'].keys())

dict_keys(['chess_rapid', 'chess_bullet', 'chess_blitz', 'tactics', 'puzzle_rush'])


In [8]:
def player_stat_dict(username_list):
    """
    Retrieves player statistics from Chess.com API for a list of usernames and returns a dictionary of statistics.

    Parameters:
        username_list (list): A list of usernames for which player statistics are to be retrieved.

    Returns:
        dict: A dictionary where keys are usernames and values are dictionaries containing player statistics.
        list: A list of usernames for which retrieval of statistics failed after maximum retries.

    This function iterates through the provided list of usernames, retrieving player statistics from the Chess.com API 
    using the `get_player_stats` function. It flattens the JSON response structure and stores the statistics in a dictionary 
    with usernames as keys. If an error occurs during retrieval, it retries up to 15 times before moving on to the next username. 
    Failed usernames are appended to the fail_list. The function returns the dictionary of player statistics and the list 
    of failed usernames separately.
    """
    stat_dict = {} # Dictionary to store player statistics.
    fail_list = [] # List to store usernames for which retrieval failed.
    
    counter = 0 # Counter for displaying progress.
    retry = 0 # Counter for retry attempts.
    
    for username in username_list:
        while True:
            try:
                # Retrieve player statistics for the current username.
                player_profile_stat_response = get_player_stats(username)
                json_structure = player_profile_stat_response.json

                # Flatten the JSON structure and convert to a dictionary.
                flatten_json_structure = pd.json_normalize(json_structure, sep='_').to_dict(orient='records')[0]

                # Print the counter, overwriting the previous counter.
                counter+=1
                print(f'\r{counter} {username}', end='', flush=True)

                # Store the player statistics in the stat_dict.
                stat_dict[username] = flatten_json_structure

                break
            
            except ChessDotComError as e:
                # Handle the ChessDotComError and retry
                print(f'\rError for {username}: {e}')
                if retry < 15:
                    retry += 1
                    print(f'\rRetrying for {username} attempt {retry}...')
                    
                # If max retries reached, move on to the next username
                else:
                    print(f'\rMax retries reached for {username}. Moving on.')
                    fail_list.append(username)
                    retry = 0
                    break

    return stat_dict, fail_list                

In [9]:
# Call the player_stat_dict with usernames list as argument.
player_statistics, fail_list = player_stat_dict(usernames)

1540 zvonokchess1996eyovaalla

In [10]:
print(fail_list)

[]


In [11]:
# Visually inspect a random usernames statistics.
print(player_statistics[usernames[3]])
print(len(player_statistics))

{'stats_chess_daily_last_rating': 1930, 'stats_chess_daily_last_date': 1704274099, 'stats_chess_daily_last_rd': 125, 'stats_chess_daily_best_rating': 1930, 'stats_chess_daily_best_date': 1562019289, 'stats_chess_daily_best_game': 'https://www.chess.com/game/daily/209805608', 'stats_chess_daily_record_win': 27, 'stats_chess_daily_record_loss': 22, 'stats_chess_daily_record_draw': 1, 'stats_chess_daily_record_time_per_move': 42285, 'stats_chess_daily_record_timeout_percent': 0, 'stats_chess_rapid_last_rating': 2565, 'stats_chess_rapid_last_date': 1706901648, 'stats_chess_rapid_last_rd': 43, 'stats_chess_rapid_best_rating': 2617, 'stats_chess_rapid_best_date': 1690310908, 'stats_chess_rapid_best_game': 'https://www.chess.com/game/live/83954946123', 'stats_chess_rapid_record_win': 31, 'stats_chess_rapid_record_loss': 23, 'stats_chess_rapid_record_draw': 12, 'stats_chess_bullet_last_rating': 2876, 'stats_chess_bullet_last_date': 1710103169, 'stats_chess_bullet_last_rd': 48, 'stats_chess_bul

In [12]:
def player_stat_dict_to_df(dict):
    """
    Converts player statistics dictionary into separate lists of dictionaries for different game categories.

    Parameters:
        stat_dict (dict): A dictionary containing player statistics.

    Returns:
        list: List of dictionaries for daily chess statistics.
        list: List of dictionaries for Chess960 (Fischer Random Chess) statistics.
        list: List of dictionaries for rapid chess statistics.
        list: List of dictionaries for bullet chess statistics.
        list: List of dictionaries for blitz chess statistics.
        list: List of dictionaries for FIDE (International Chess Federation) rating statistics.
        list: List of dictionaries for tactics training statistics.
        list: List of dictionaries for Puzzle Rush statistics.

    This function takes a dictionary containing player statistics and transforms it into separate lists of dictionaries, 
    each representing a different game category. The function iterates through the input dictionary and extracts relevant 
    statistics for each game category, such as daily chess, chess960, rapid, bullet, blitz, FIDE, tactics, and puzzle rush. 
    Each dictionary in the resulting lists contains the username and statistics specific to the corresponding game category.

    The function returns multiple lists of dictionaries, each containing statistics for a specific game category.
    """
    chess_daily_data = []
    chess960_daily_data = []
    chess_rapid_data = []
    chess_bullet_data = []
    chess_blitz_data = []
    fide_data = []
    tactics_data = []
    puzzle_rush_data = []

    # Iterate through the player statistics dictionary.
    for k, v in dict.items():

        # Initialise Dictionaries for different game categories for the current username.
        chess_daily_dict = {}
        chess960_daily_dict = {}
        chess_rapid_dict = {}
        chess_bullet_dict = {}
        chess_blitz_dict = {}
        fide_dict = {}
        tactics_dict = {}
        puzzle_rush_dict = {}
        
        chess_daily_dict = {'username': k}
        chess960_daily_dict = {'username': k}
        chess_rapid_dict = {'username': k}
        chess_bullet_dict = {'username': k}
        chess_blitz_dict = {'username': k}
        fide_dict = {'username': k}
        tactics_dict = {'username': k}
        puzzle_rush_dict = {'username': k}
    
        # Extract relevant statistics for different game categories.
        for sub_k, sub_b in v.items():
            
            if 'chess_daily' in sub_k:
                key = sub_k.replace('stats_', '')
                chess_daily_dict[key] = sub_b
                
            elif 'chess960_daily' in sub_k:
                key = sub_k.replace('stats_', '')
                chess960_daily_dict[key] = sub_b

            elif 'chess_rapid' in sub_k:
                key = sub_k.replace('stats_', '')
                chess_rapid_dict[key] = sub_b

            elif 'chess_bullet' in sub_k:
                key = sub_k.replace('stats_', '')
                chess_bullet_dict[key] = sub_b

            elif 'chess_blitz' in sub_k:
                key = sub_k.replace('stats_', '')
                chess_blitz_dict[key] = sub_b

            elif 'fide' in sub_k:
                key = sub_k.replace('stats_', '')
                fide_dict[key] = sub_b

            elif 'tactics' in sub_k:
                key = sub_k.replace('stats_', '')
                tactics_dict[key] = sub_b

            elif 'puzzle_rush' in sub_k:
                key = sub_k.replace('stats_', '')
                puzzle_rush_dict[key] = sub_b

        # Append dictionarie sto corresponding lists.
        chess_daily_data.append(chess_daily_dict)
        chess960_daily_data.append(chess960_daily_dict)
        chess_rapid_data.append(chess_rapid_dict)
        chess_bullet_data.append(chess_bullet_dict)
        chess_blitz_data.append(chess_blitz_dict)
        fide_data.append(fide_dict)
        tactics_data.append(tactics_dict)
        puzzle_rush_data.append(puzzle_rush_dict)

    # Return lists of dictionaries for different game categories.
    return chess_daily_data, chess960_daily_data, chess_rapid_data, chess_bullet_data, chess_blitz_data, fide_data, tactics_data, puzzle_rush_data
    
    

    

In [13]:
# Call the player_stat_dict_to_df and store the lists of dictionaries into variables.
chess_daily_data, chess960_daily_data, chess_rapid_data, chess_bullet_data, chess_blitz_data, fide_data, tactics_data, puzzle_rush_data = player_stat_dict_to_df(player_statistics)

# Display the second element of the fide_data list to check its contents.
print(fide_data[1])

{'username': '124chess'}


In [14]:
# Convert all the lists of dictionaries into pandas dataframes

df_chess_daily = pd.DataFrame.from_records(chess_daily_data)
df_chess960_daily_data = pd.DataFrame.from_records(chess960_daily_data)
df_chess_rapid_data = pd.DataFrame.from_records(chess_rapid_data)
df_chess_bullet_data = pd.DataFrame.from_records(chess_bullet_data)
df_chess_blitz_data = pd.DataFrame.from_records(chess_blitz_data)
df_fide_data = pd.DataFrame.from_records(fide_data)
df_tactics_data = pd.DataFrame.from_records(tactics_data)
df_puzzle_rush_data = pd.DataFrame.from_records(puzzle_rush_data)

In [15]:
display(df_chess_daily.head())
display(df_chess_daily.count())

Unnamed: 0,username,chess_daily_last_rating,chess_daily_last_date,chess_daily_last_rd,chess_daily_best_rating,chess_daily_best_date,chess_daily_best_game,chess_daily_record_win,chess_daily_record_loss,chess_daily_record_draw,chess_daily_record_time_per_move,chess_daily_record_timeout_percent,chess_daily_tournament_points,chess_daily_tournament_withdraw,chess_daily_tournament_count,chess_daily_tournament_highest_finish
0,123lt,,,,,,,,,,,,,,,
1,124chess,,,,,,,,,,,,,,,
2,1977ivan,,,,,,,,,,,,,,,
3,1stsecond,1930.0,1704274000.0,125.0,1930.0,1562019000.0,https://www.chess.com/game/daily/209805608,27.0,22.0,1.0,42285.0,0.0,,,,
4,4thd-alpeacefulmoon,,,,,,,,,,,,,,,


username                                 1540
chess_daily_last_rating                   428
chess_daily_last_date                     428
chess_daily_last_rd                       428
chess_daily_best_rating                   342
chess_daily_best_date                     342
chess_daily_best_game                     342
chess_daily_record_win                    428
chess_daily_record_loss                   428
chess_daily_record_draw                   428
chess_daily_record_time_per_move          428
chess_daily_record_timeout_percent        428
chess_daily_tournament_points               8
chess_daily_tournament_withdraw             8
chess_daily_tournament_count                8
chess_daily_tournament_highest_finish       8
dtype: int64

In [16]:
from chessdotcom import Client

Client.request_config["headers"]["User-Agent"] = (
 "Machine learning for chess match outcome Prediction"
 "Contact me at conorjackvincent@live.co.uk"
)

In [17]:
display(df_chess960_daily_data.head())
display(df_chess960_daily_data.count())

Unnamed: 0,username,chess960_daily_last_rating,chess960_daily_last_date,chess960_daily_last_rd,chess960_daily_best_rating,chess960_daily_best_date,chess960_daily_best_game,chess960_daily_record_win,chess960_daily_record_loss,chess960_daily_record_draw,chess960_daily_record_time_per_move,chess960_daily_record_timeout_percent,chess960_daily_tournament_points,chess960_daily_tournament_withdraw,chess960_daily_tournament_count,chess960_daily_tournament_highest_finish
0,123lt,,,,,,,,,,,,,,,
1,124chess,,,,,,,,,,,,,,,
2,1977ivan,,,,,,,,,,,,,,,
3,1stsecond,,,,,,,,,,,,,,,
4,4thd-alpeacefulmoon,,,,,,,,,,,,,,,


username                                    1540
chess960_daily_last_rating                    66
chess960_daily_last_date                      66
chess960_daily_last_rd                        66
chess960_daily_best_rating                    56
chess960_daily_best_date                      56
chess960_daily_best_game                      56
chess960_daily_record_win                     66
chess960_daily_record_loss                    66
chess960_daily_record_draw                    66
chess960_daily_record_time_per_move           66
chess960_daily_record_timeout_percent         66
chess960_daily_tournament_points               2
chess960_daily_tournament_withdraw             2
chess960_daily_tournament_count                2
chess960_daily_tournament_highest_finish       2
dtype: int64

In [18]:
display(df_chess_rapid_data.head())
display(df_chess_rapid_data.count())

Unnamed: 0,username,chess_rapid_last_rating,chess_rapid_last_date,chess_rapid_last_rd,chess_rapid_best_rating,chess_rapid_best_date,chess_rapid_best_game,chess_rapid_record_win,chess_rapid_record_loss,chess_rapid_record_draw
0,123lt,2477.0,1659713000.0,67.0,2550.0,1659100000.0,https://www.chess.com/game/live/53103644193,34.0,14.0,17.0
1,124chess,2244.0,1638019000.0,168.0,2350.0,1595647000.0,https://www.chess.com/game/live/6543549448,6.0,1.0,2.0
2,1977ivan,2386.0,1675287000.0,103.0,2556.0,1597419000.0,https://www.chess.com/game/live/69022308373,12.0,13.0,4.0
3,1stsecond,2565.0,1706902000.0,43.0,2617.0,1690311000.0,https://www.chess.com/game/live/83954946123,31.0,23.0,12.0
4,4thd-alpeacefulmoon,,,,,,,,,


username                   1540
chess_rapid_last_rating    1094
chess_rapid_last_date      1094
chess_rapid_last_rd        1094
chess_rapid_best_rating    1039
chess_rapid_best_date      1039
chess_rapid_best_game      1039
chess_rapid_record_win     1094
chess_rapid_record_loss    1094
chess_rapid_record_draw    1094
dtype: int64

In [19]:
display(df_chess_bullet_data.head())
display(df_chess_bullet_data.count())

Unnamed: 0,username,chess_bullet_last_rating,chess_bullet_last_date,chess_bullet_last_rd,chess_bullet_best_rating,chess_bullet_best_date,chess_bullet_best_game,chess_bullet_record_win,chess_bullet_record_loss,chess_bullet_record_draw
0,123lt,2725.0,1625236000.0,79.0,2824.0,1624890000.0,https://www.chess.com/game/live/17850165657,255.0,197.0,23.0
1,124chess,2764.0,1691867000.0,48.0,2846.0,1691862000.0,https://www.chess.com/game/live/85603805241,103.0,102.0,11.0
2,1977ivan,2650.0,1597411000.0,209.0,2075.0,1587320000.0,https://www.chess.com/game/live/4743965043,2.0,0.0,0.0
3,1stsecond,2876.0,1710103000.0,48.0,2892.0,1659773000.0,https://www.chess.com/game/live/3551342825,1058.0,858.0,154.0
4,4thd-alpeacefulmoon,,,,,,,,,


username                    1540
chess_bullet_last_rating    1245
chess_bullet_last_date      1245
chess_bullet_last_rd        1245
chess_bullet_best_rating    1212
chess_bullet_best_date      1212
chess_bullet_best_game      1212
chess_bullet_record_win     1245
chess_bullet_record_loss    1245
chess_bullet_record_draw    1245
dtype: int64

In [20]:
display(df_chess_blitz_data.head())
display(df_chess_blitz_data.count())

Unnamed: 0,username,chess_blitz_last_rating,chess_blitz_last_date,chess_blitz_last_rd,chess_blitz_best_rating,chess_blitz_best_date,chess_blitz_best_game,chess_blitz_record_win,chess_blitz_record_loss,chess_blitz_record_draw
0,123lt,2781.0,1692722000.0,115.0,2803.0,1692719000.0,https://www.chess.com/game/live/5253547679,756.0,689.0,140.0
1,124chess,2586.0,1709057000.0,47.0,2830.0,1589185000.0,https://www.chess.com/game/live/5011066375,1255.0,1044.0,242.0
2,1977ivan,2869.0,1713479000.0,34.0,2875.0,1713368000.0,https://www.chess.com/game/live/56222072419,1128.0,644.0,145.0
3,1stsecond,2904.0,1713117000.0,38.0,2941.0,1692897000.0,https://www.chess.com/game/live/4376821835,5762.0,4442.0,843.0
4,4thd-alpeacefulmoon,2718.0,1704728000.0,107.0,2718.0,1704728000.0,https://www.chess.com/game/live/98466881019,10.0,0.0,3.0


username                   1540
chess_blitz_last_rating    1454
chess_blitz_last_date      1454
chess_blitz_last_rd        1454
chess_blitz_best_rating    1452
chess_blitz_best_date      1452
chess_blitz_best_game      1452
chess_blitz_record_win     1454
chess_blitz_record_loss    1454
chess_blitz_record_draw    1454
dtype: int64

In [21]:
display(df_fide_data.head())
display(df_fide_data.count())

Unnamed: 0,username,fide
0,123lt,
1,124chess,
2,1977ivan,2641.0
3,1stsecond,2582.0
4,4thd-alpeacefulmoon,0.0


username    1540
fide         916
dtype: int64

In [22]:
display(df_tactics_data.head())
display(df_tactics_data.count())

Unnamed: 0,username,tactics_highest_rating,tactics_highest_date,tactics_lowest_rating,tactics_lowest_date
0,123lt,3497,1712612947,1303,1452514736
1,124chess,3316,1643195146,1358,1471316437
2,1977ivan,2713,1595628420,1812,1595623512
3,1stsecond,4255,1576940318,1539,1391518318
4,4thd-alpeacefulmoon,1000,1576488055,1000,1576488055


username                  1540
tactics_highest_rating    1540
tactics_highest_date      1540
tactics_lowest_rating     1540
tactics_lowest_date       1540
dtype: int64

In [23]:
display(df_puzzle_rush_data.head())
display(df_puzzle_rush_data.count())

Unnamed: 0,username,puzzle_rush_best_total_attempts,puzzle_rush_best_score,puzzle_rush_daily_total_attempts,puzzle_rush_daily_score
0,123lt,63.0,60.0,,
1,124chess,59.0,56.0,,
2,1977ivan,,,,
3,1stsecond,56.0,53.0,,
4,4thd-alpeacefulmoon,,,,


username                            1540
puzzle_rush_best_total_attempts     1192
puzzle_rush_best_score              1192
puzzle_rush_daily_total_attempts      57
puzzle_rush_daily_score               57
dtype: int64

In [24]:
with open('Collected_Chess_Data/chess_daily.pkl', 'wb') as file:
    pickle.dump(df_chess_daily, file)

with open('Collected_Chess_Data/chess960_daily.pkl', 'wb') as file:
    pickle.dump(df_chess960_daily_data, file)

with open('Collected_Chess_Data/chess_rapid.pkl', 'wb') as file:
    pickle.dump(df_chess_rapid_data, file)

with open('Collected_Chess_Data/chess_bullet.pkl', 'wb') as file:
    pickle.dump(df_chess_bullet_data, file)

with open('Collected_Chess_Data/chess_blitz.pkl', 'wb') as file:
    pickle.dump(df_chess_blitz_data, file)

with open('Collected_Chess_Data/fide.pkl', 'wb') as file:
    pickle.dump(df_fide_data, file)

with open('Collected_Chess_Data/tactics.pkl', 'wb') as file:
    pickle.dump(df_tactics_data, file)

with open('Collected_Chess_Data/puzzle_rush.pkl', 'wb') as file:
    pickle.dump(df_puzzle_rush_data, file)

In [25]:
merged_df = pd.merge(player_profiles, df_chess_daily, on='username', how='outer')
merged_df = pd.merge(merged_df, df_chess960_daily_data, on='username', how='outer')
merged_df = pd.merge(merged_df, df_chess_rapid_data, on='username', how='outer')
merged_df = pd.merge(merged_df, df_chess_bullet_data, on='username', how='outer')
merged_df = pd.merge(merged_df, df_chess_blitz_data, on='username', how='outer')
merged_df = pd.merge(merged_df, df_fide_data, on='username', how='outer')
merged_df = pd.merge(merged_df, df_tactics_data, on='username', how='outer')
merged_df = pd.merge(merged_df, df_puzzle_rush_data, on='username', how='outer')

merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1540 entries, 0 to 1539
Data columns (total 76 columns):
 #   Column                                    Non-Null Count  Dtype  
---  ------                                    --------------  -----  
 0   name                                      1299 non-null   object 
 1   username                                  1540 non-null   object 
 2   title                                     1540 non-null   object 
 3   followers                                 1540 non-null   int64  
 4   country_code                              1540 non-null   object 
 5   country                                   1540 non-null   object 
 6   status                                    1540 non-null   object 
 7   is_streamer                               1540 non-null   bool   
 8   verified                                  1540 non-null   bool   
 9   league                                    1230 non-null   object 
 10  chess_daily_last_rating             