In [1]:
## Imports
from chessdotcom import *
from preprocess import *

import numpy as np
import pandas as pd
import pickle

In [2]:
## Configure Headers, as the Project uses requests package to interact with the API. Headers and paxis can be set
## through the Client Object.
from chessdotcom import Client

Client.request_config["headers"]["User-Agent"] = (
 "Machine learning for chess match outcome Prediction, BSc in Computer Science Dissertation, University of Lincoln"
 "Contact me at conorjackvincent@live.co.uk"
)

# Implement rate limiting to be applied to any method used within the chessdotcom library.
Client.rate_limit_handler.tries = 2
Client.rate_limit_handler.tts = 4

In [3]:
wfm_list = titled_player_names("WFM")
wcm_list = titled_player_names("WCM")

In [4]:
print(f"Number of WFMs: {len(wfm_list)}, Number of WCMs: {len(wcm_list)}")

Number of WFMs: 671, Number of WCMs: 381


In [5]:
# Gather two lists, one of all WFM player profiles and another with failed WFM usernames.
list1, list1_failed = build_df_player_profiles(wfm_list)

In [6]:
# Gather two lists, one of all WCM player profiles and another with failed WCM usernames.
list2, list2_failed = build_df_player_profiles(wcm_list)

In [7]:
# # Create a list with the relevant column names, which can be seen from the original ChessDotCom Response.
column_names = ["name", "username", "title", "followers", "country_code", "country", "status", "is_streamer", "verified", "league"]

# # Put all the player profile listed data into pandas dataframes with the column names.
plist1 = pd.DataFrame(list1, columns=column_names)
plist2 = pd.DataFrame(list2, columns=column_names)

data_frames = [plist1, plist2]

wfm_wcm_player_profiles = pd.concat(data_frames)

# Sample the dataframe, and show the head to get a visual representation of the data currently.
df_elements = wfm_wcm_player_profiles.sample(n=10)
df_elements.head(10)

Unnamed: 0,name,username,title,followers,country_code,country,status,is_streamer,verified,league
300,Alserkal Rouda Essa,rxxa_00,WCM,15,AE,United Arab Emirates,premium,False,False,Silver
120,Naledi Marape,chessgm3000_bw,WFM,5,BW,Botswana,premium,False,False,Wood
477,Oksana Kosteniuk,porosen,WFM,27,RU,Russia,premium,False,False,Stone
398,Anni Guo,moldytofu,WFM,42,CA,Canada,premium,False,False,Champion
393,Miryam Calle,miryamcalle,WFM,3,LA,Laos,premium,False,False,
470,Polina Kobak,pikku_myy73,WFM,3,AM,Armenia,premium,False,False,Bronze
277,,pradeepthi,WCM,19,IN,India,premium,False,False,
429,Anastasiia Hnatyshyn,nastyahn,WFM,24,UA,Ukraine,premium,False,False,Bronze
452,Eva Maria Zickelbein,olympedg,WFM,3,DE,Germany,premium,False,False,
194,,estudiar_001,WFM,15,CO,Colombia,premium,False,False,Bronze


In [8]:
wfm_wcm_player_profiles.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1052 entries, 0 to 380
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   name          831 non-null    object
 1   username      1052 non-null   object
 2   title         1052 non-null   object
 3   followers     1052 non-null   int64 
 4   country_code  1052 non-null   object
 5   country       1052 non-null   object
 6   status        1052 non-null   object
 7   is_streamer   1052 non-null   bool  
 8   verified      1052 non-null   bool  
 9   league        854 non-null    object
dtypes: bool(2), int64(1), object(7)
memory usage: 76.0+ KB


In [9]:
print(len(list1_failed))

0


In [10]:
print(len(list2_failed))

0


In [13]:
with open('Collected_Chess_Data/wfm_wcm_player_profiles.pkl', 'wb') as file:
    pickle.dump(wfm_wcm_player_profiles, file)

In [11]:
with open('Collected_Chess_Data/wfm_wcm_player_profiles.pkl', 'rb') as file:
    loaded_first_frame = pickle.load(file)

In [12]:
# Display the open first frame pkl file to check its contents.
loaded_first_frame.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1052 entries, 0 to 380
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   name          831 non-null    object
 1   username      1052 non-null   object
 2   title         1052 non-null   object
 3   followers     1052 non-null   int64 
 4   country_code  1052 non-null   object
 5   country       1052 non-null   object
 6   status        1052 non-null   object
 7   is_streamer   1052 non-null   bool  
 8   verified      1052 non-null   bool  
 9   league        854 non-null    object
dtypes: bool(2), int64(1), object(7)
memory usage: 76.0+ KB
