In [15]:
## Imports
from chessdotcom import *
from preprocess import *

import numpy as np
import pandas as pd
import pickle

In [16]:
## Configure Headers, as the Project uses requests package to interact with the API. Headers and paxis can be set
## through the Client Object.
from chessdotcom import Client

Client.request_config["headers"]["User-Agent"] = (
 "Machine learning for chess match outcome Prediction, BSc in Computer Science Dissertation, University of Lincoln"
 "Contact me at conorjackvincent@live.co.uk"
)

# Implement rate limiting to be applied to any method used within the chessdotcom library.
Client.rate_limit_handler.tries = 2
Client.rate_limit_handler.tts = 4

In [19]:
## Below we gather lists of different titled players.
gm_list = titled_player_names("GM") 
wgm_list = titled_player_names("WGM") 

im_list = titled_player_names("IM")
wim_list = titled_player_names("WIM")

fm_list = titled_player_names("FM")
cm_list = titled_player_names("CM")

wfm_list = titled_player_names("WFM")
wcm_list = titled_player_names("WCM")


In [18]:
# Print Statement used to show the amounts of players in each of the title lists
print(f"Number of GMs: {len(gm_list)}   Number of WGMs: {len(wgm_list)}   Number of IMs: {len(im_list)}   Number of FMs: {len(fm_list)}   Number of CMs: {len(cm_list)} Number of WIMs: {len(wim_list)} Number of WFMs: {len(wfm_list)} Number of WCMs: {len(wcm_list)}")

Number of GMs: 1543   Number of WGMs: 222   Number of IMs: 2167   Number of FMs: 3573   Number of CMs: 1481 Number of WIMs: 383 Number of WFMs: 672 Number of WCMs: 383


In [5]:
# Here, we grab the player profile for a random user as a test, "azikom" is the players username 
# and so we print the ChessDotComResponse stored in player_profile to understand the format of the data
player_profile = get_player_profile("azikom")
print(player_profile)

ChessDotComResponse(player=Collection(avatar='https://images.chesscomfiles.com/uploads/v1/user/44058794.2e18aa97.200x200o.854e2adae7a6.jpeg', player_id=44058794, id='https://api.chess.com/pub/player/azikom', url='https://www.chess.com/member/azikom', name='Azer Mirzoev', username='azikom', title='GM', followers=155, country='https://api.chess.com/pub/country/AZ', last_online=1713795847, joined=1520509648, status='premium', is_streamer=False, verified=False, league='Silver', streaming_platforms=[]))


In [6]:
# Gather two lists, one of all WGM player profiles and another with failed WGM usernames.
list1, list1_failed = build_df_player_profiles(wgm_list)

# Gather two lists, one of all GM player profiles and another with failed GM usernames.
list2, list2_failed = build_df_player_profiles(gm_list)

# Gather two lists, one of all IM player profiles and another with failed IM usernames.
list3, list3_failed = build_df_player_profiles(im_list)

# Gather two lists, one of all FM player profiles and another with failed FM usernames.
list4, list4_failed = build_df_player_profiles(fm_list)

# Gather two lists, one of all CM player profiles and another with failed CM usernames.
list5, list5_failed = build_df_player_profiles(cm_list)

# Gather two lists, one of all WIM player profiles and another with failed WIM usernames.
list6, list6_failed = build_df_player_profiles(wim_list)

# Gather two lists, one of all WFM player profiles and another with failed WFM usernames.
list7, list7_failed = build_df_player_profiles(wfm_list)

# Gather two lists, one of all WCM player profiles and another with failed WCM usernames.
list8, list8_failed = build_df_player_profiles(wcm_list)

Error for wizarddesert: <class 'chessdotcom.types.ChessDotComError'>(status_code=404, text={"code":0,"message":"An internal error has occurred. Please contact Chess.com Developer's Forum for further help https://www.chess.com/club/chess-com-developer-community ."})
Retrying for wizarddesert attempt 1...
Error for wizarddesert: <class 'chessdotcom.types.ChessDotComError'>(status_code=404, text={"code":0,"message":"An internal error has occurred. Please contact Chess.com Developer's Forum for further help https://www.chess.com/club/chess-com-developer-community ."})
Retrying for wizarddesert attempt 2...
Error for wizarddesert: <class 'chessdotcom.types.ChessDotComError'>(status_code=404, text={"code":0,"message":"An internal error has occurred. Please contact Chess.com Developer's Forum for further help https://www.chess.com/club/chess-com-developer-community ."})
Retrying for wizarddesert attempt 3...
Error for wizarddesert: <class 'chessdotcom.types.ChessDotComError'>(status_code=404,

In [9]:
# # Create a list with the relevant column names, which can be seen from the original ChessDotCom Response.
column_names = ["name", "username", "title", "followers", "country_code", "country", "status", "is_streamer", "verified", "league"]

# # Put all the player profile listed data into pandas dataframes with the column names.
plist1 = pd.DataFrame(list1, columns=column_names)
plist2 = pd.DataFrame(list2, columns=column_names)
plist3 = pd.DataFrame(list3, columns=column_names)
plist4 = pd.DataFrame(list4, columns=column_names)
plist5 = pd.DataFrame(list5, columns=column_names)
plist6 = pd.DataFrame(list6, columns=column_names)
plist7 = pd.DataFrame(list7, columns=column_names)
plist8 = pd.DataFrame(list8, columns=column_names)

# List all the different pandas dataframes, and then concat this list into a single pandas df.
data_frames = [plist1, plist2, plist3, plist4, plist5, plist6, plist7, plist8]
first_frame = pd.concat(data_frames)

# Sample the dataframe, and show the head to get a visual representation of the data currently.
df_elements = first_frame.sample(n=10)
df_elements.head(10)

Unnamed: 0,name,username,title,followers,country_code,country,status,is_streamer,verified,league
258,Shamil Arslanov,boundless_strike,IM,544,DJ,Djibouti,premium,False,False,Legend
904,CM McLean Handjaba,mclean12,CM,203,,Namibia,premium,False,False,Legend
129,Alexey Krupko,alexey_krupko,FM,6,XX,International,premium,False,False,Elite
1338,Ido Ben Artzi,motivated_bishop,IM,45,IL,Israel,premium,False,False,Crystal
697,,gothamchess4,IM,52,CA,Canada,premium,True,False,
56,Aleksander Czerwoński,alekroth65,IM,21,PL,Poland,premium,False,False,Bronze
197,Annie Wang,thehappyfarmer,WGM,317,US,United States,premium,False,False,
136,,miupiu,WGM,21,GR,Greece,premium,True,False,Silver
2032,,mascotte44,FM,2,FR,France,premium,False,False,Wood
1975,Sabir Umarov,umarovsabir,IM,4,UZ,Uzbekistan,premium,False,False,Bronze


In [10]:
first_frame.info()

<class 'pandas.core.frame.DataFrame'>
Index: 10392 entries, 0 to 380
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   name          8500 non-null   object
 1   username      10392 non-null  object
 2   title         10392 non-null  object
 3   followers     10392 non-null  int64 
 4   country_code  10392 non-null  object
 5   country       10392 non-null  object
 6   status        10392 non-null  object
 7   is_streamer   10392 non-null  bool  
 8   verified      10392 non-null  bool  
 9   league        8702 non-null   object
dtypes: bool(2), int64(1), object(7)
memory usage: 751.0+ KB


In [11]:
# Store the first frame in a pkl file named 'player_profiles_original.pkl'
with open('Collected_Chess_Data/all_player_profiles.pkl', 'wb') as file:
    pickle.dump(first_frame, file)

In [12]:
# Here, both the created pkl files are opened to ensure that the save worked as expected
with open('Collected_Chess_Data/all_player_profiles.pkl', 'rb') as file:
    loaded_first_frame = pickle.load(file)

In [13]:
# Display the open first frame pkl file to check its contents.
loaded_first_frame.info()

<class 'pandas.core.frame.DataFrame'>
Index: 10392 entries, 0 to 380
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   name          8500 non-null   object
 1   username      10392 non-null  object
 2   title         10392 non-null  object
 3   followers     10392 non-null  int64 
 4   country_code  10392 non-null  object
 5   country       10392 non-null  object
 6   status        10392 non-null  object
 7   is_streamer   10392 non-null  bool  
 8   verified      10392 non-null  bool  
 9   league        8702 non-null   object
dtypes: bool(2), int64(1), object(7)
memory usage: 751.0+ KB
