In [20]:
## Imports
from chessdotcom import *
from preprocess import *

import numpy as np
import pandas as pd
import pickle

In [21]:
## Configure Headers, as the Project uses requests package to interact with the API. Headers and paxis can be set
## through the Client Object.
from chessdotcom import Client

Client.request_config["headers"]["User-Agent"] = (
 "Machine learning for chess match outcome Prediction, BSc in Computer Science Dissertation, University of Lincoln"
 "Contact me at conorjackvincent@live.co.uk"
)

# Implement rate limiting to be applied to any method used within the chessdotcom library.
Client.rate_limit_handler.tries = 2
Client.rate_limit_handler.tts = 4

In [23]:
gm_list = titled_player_names("GM")
wgm_list = titled_player_names("WGM") 

In [24]:
print(f"Number of GMs: {len(gm_list)}, Number of WGMs: {len(wgm_list)}")

Number of GMs: 1540, Number of WGMs: 222


In [26]:
# Gather two lists, one of all GM player profiles and another with failed GM usernames.
list1, list1_failed = build_df_player_profiles(gm_list)

In [27]:
# Gather two lists, one of all WGM player profiles and another with failed WGM usernames.
list2, list2_failed = build_df_player_profiles(wgm_list)

In [28]:
# # Create a list with the relevant column names, which can be seen from the original ChessDotCom Response.
column_names = ["name", "username", "title", "followers", "country_code", "country", "status", "is_streamer", "verified", "league"]

# # Put all the player profile listed data into pandas dataframes with the column names.
plist1 = pd.DataFrame(list1, columns=column_names)
plist2 = pd.DataFrame(list2, columns=column_names)

data_frames = [plist1, plist2]

gm_wgm_player_profiles = pd.concat(data_frames)

# Sample the dataframe, and show the head to get a visual representation of the data currently.
df_elements = gm_wgm_player_profiles.sample(n=10)
df_elements.head(10)

Unnamed: 0,name,username,title,followers,country_code,country,status,is_streamer,verified,league
589,todor todorov,gmtodorov,GM,36,FR,France,premium,False,False,Wood
1535,Alexander Zubov,zubov_on_youtube,GM,5,UA,Ukraine,basic,False,False,
1457,Vladimir Belous,vladimirbelous,GM,271,US,United States,premium,False,False,Crystal
452,Bharath Subramaniyam.H Harishankkar,fghsmn,GM,154,IN,India,premium,False,False,Champion
1073,Deivy Vera Siguenas,nuidisvulko,GM,41,PE,Peru,premium,False,False,Stone
196,Tereza Rodshtein,terezarodshtein,WGM,4,CZ,Czech Republic,premium,False,False,
966,Zong-Yuan Zhao,megazz,GM,733,AU,Australia,premium,False,False,Legend
791,Gregory Kaidanov,kaidanov,GM,238,US,United States,premium,False,False,Stone
981,Arman Mikaelyan,mikaelyanarman,GM,153,US,United States,premium,False,False,Champion
1139,Damian Lemos,playpositional,GM,174,XE,England,premium,False,False,


In [29]:
gm_wgm_player_profiles.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1762 entries, 0 to 221
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   name          1485 non-null   object
 1   username      1762 non-null   object
 2   title         1762 non-null   object
 3   followers     1762 non-null   int64 
 4   country_code  1762 non-null   object
 5   country       1762 non-null   object
 6   status        1762 non-null   object
 7   is_streamer   1762 non-null   bool  
 8   verified      1762 non-null   bool  
 9   league        1412 non-null   object
dtypes: bool(2), int64(1), object(7)
memory usage: 127.3+ KB


In [30]:
print(len(list1_failed))

0


In [31]:
print(len(list2_failed))

0


In [32]:
with open('Collected_Chess_Data/gm_wgm_player_profiles.pkl', 'wb') as file:
    pickle.dump(gm_wgm_player_profiles, file)

In [33]:
with open('Collected_Chess_Data/gm_wgm_player_profiles.pkl', 'rb') as file:
    loaded_first_frame = pickle.load(file)

In [34]:
# Display the open first frame pkl file to check its contents.
loaded_first_frame.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1762 entries, 0 to 221
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   name          1485 non-null   object
 1   username      1762 non-null   object
 2   title         1762 non-null   object
 3   followers     1762 non-null   int64 
 4   country_code  1762 non-null   object
 5   country       1762 non-null   object
 6   status        1762 non-null   object
 7   is_streamer   1762 non-null   bool  
 8   verified      1762 non-null   bool  
 9   league        1412 non-null   object
dtypes: bool(2), int64(1), object(7)
memory usage: 127.3+ KB
