<a href="https://colab.research.google.com/github/wawill14/Final-Project/blob/main/PFDAFinalProject.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [43]:
# Upload necessary libraries 
from matplotlib import pyplot as plt
import pandas as pd 
import numpy as np
import statistics

In [None]:
# Upload csv from raw github url
df = pd.read_csv("https://raw.githubusercontent.com/wawill14/Final-Project/main/Chess%20Game%20Stats.csv",index_col=["game_id"])
df.head(25)

In [72]:
# Drop columns for opening response, turns, time increment 
df.drop(["opening_response", "turns", "time_increment"],axis=1,inplace=True)

In [None]:
# Analysis of dataframe
df.info()

In [None]:
# Printing number of rows and columns 
print('Number of columns: {}'.format(df.shape[1]))
print('Number of rows: {}'.format(df.shape[0]))

In [None]:
# Summary stats of quantitative variables  
stats_numeric = df.describe().astype(int)
print(stats_numeric)

In [None]:
# Summary stats of categorical variables 
stats_categorical = df[['opening_shortname','victory_status', 'rated']].describe()
print(stats_categorical)

In [None]:
# Piechart showing proportion of results by color 
fig, ax = plt.subplots(figsize = (12,8))
y = df['winner'].value_counts()
mylabels = 'White', 'Black', 'Draw'
# Add % label, set colors for slices 
plt.pie(y, autopct='%1.1f%%', colors = ['lightgrey', 'darkblue', 'cyan'], wedgeprops={'alpha':0.5})
plt.title('Win Percentage by Color',fontsize=25)
plt.legend(mylabels, fontsize=15)
# Change chart background
fig.set_facecolor('whitesmoke')
plt.show()


In [None]:
# Top 5 opening moves for white when they won
df["opening_move_white"]=df["moves"].str.slice(0,3)
white=df[df["winner"]=="White"].opening_move_white.value_counts().nlargest(5)

# Barchart of top 5 of white opening moves in winning matches
white.plot(kind="barh")
# Rotate x-labels 30 degrees, text aligned horizontally
plt.xticks(rotation=45, horizontalalignment="center")
plt.title("Top Opening Moves for White in Winning Matches")
plt.xlabel("Wins")
plt.ylabel("Opening Moves")
plt.show()

# Top 5 opening moves for black in winning matches
df["opening_move_black"]=df["moves"].str.slice(3,6)
black=df[df["winner"]=="Black"].opening_move_black.value_counts().nlargest(5)

# Barchart of top 5 black opening moves in winning matches
black.plot(kind="barh")
plt.xticks(rotation=45, horizontalalignment="center")
plt.title("Top Opening Moves for Black in Winning Matches")
plt.xlabel("Wins")
plt.ylabel("Opening Moves")
plt.show()


In [None]:
# Top 5 opening lines for white in winning matches
df["opening_sn_white"]=df["opening_shortname"]
white_lines=df[df["winner"]=="White"].opening_sn_white.value_counts().nlargest(5)

# Barchart white winningest opening lines
white_lines.plot(kind="barh")
# Rotate the x-labels by 30 degrees, and keep the text aligned horizontally
plt.xticks(rotation=45, horizontalalignment="center")
plt.title("Top Opening Lines for White in Winning Matches")
plt.xlabel("Wins")
plt.ylabel("Opening Name")
plt.show()

# Top 5 opening lines for black in winning matches 
df["opening_sn_white"]=df["opening_shortname"]
black_lines=df[df["winner"]=="Black"].opening_sn_white.value_counts().nlargest(5)

# Barchart black winningest opening lines
black_lines.plot(kind="barh")
# Rotate the x-labels by 30 degrees, and keep the text aligned horizontally
plt.xticks(rotation=45, horizontalalignment="center")
plt.title("Top Opening Lines for Black in Winning Matches")
plt.xlabel("Wins")
plt.ylabel("Opening Name")
plt.show()

In [None]:
# Histogram of ratings with white and black pieces 

# White 
df.hist('white_rating', bins=10)
plt.title("Ratings Distribution of Players with White Pieces")
plt.xlabel("Rating")
plt.ylabel("Number of Players")
plt.show()

# Black 
df.hist('black_rating', bins=10)
plt.title("Ratings Distribution of Players with Black Pieces")
plt.xlabel("Rating")
plt.ylabel("Number of Players")
plt.show()

# Scatterplot of white and black ratings 
fig, ax = plt.subplots(figsize=(8,8))
ax.scatter(x = df['white_rating'], y = df['black_rating'], marker = ".")
plt.xlabel("White Rating")
plt.ylabel("Black Rating")
plt.title("Black and White Rating")
plt.show()

# Correlation bewteen ratings for black and white pieces 
p = df['white_rating']
q = df['black_rating']
correlation = np.corrcoef(p, q)
print("\nThe correlation between white and black ratings is: \n", correlation)

In [None]:
# Finding games with large variance in ratings between players 
df['rating_gap_white'] = True
df['rating_gap_black'] = True 

# Create new columns for games with more than 2 standard deviations (291 for both black and white ratings) between player ratings
df['rating_gap_white'] = np.where((df['white_rating'] - df['black_rating']) > 291, df['rating_gap_white'], np.nan)
df['rating_gap_black'] = np.where((df['black_rating'] - df['white_rating']) > 291, df['rating_gap_black'], np.nan)

# New dataframe with only games with ratings variance in favor of white
rating_gap_white_df= df.loc[df['rating_gap_white']==True] 
print(rating_gap_white_df[['white_id', 'white_rating', 'black_rating']])

# New dataframe with only games with ratings variance in favor of black
rating_gap_black_df= df.loc[df['rating_gap_black']==True]
print(rating_gap_black_df[['black_id', 'black_rating', 'white_rating']])

In [None]:
# Piechart showing proportion of results when rating gap favoring white  
fig, ax = plt.subplots(figsize = (12,8))
y = rating_gap_white_df['winner'].value_counts()
mylabels = 'White', 'Black', 'Draw'
plt.pie(y, autopct='%1.1f%%', colors = ['lightgrey', 'darkblue', 'cyan'], wedgeprops={'alpha':0.5})
plt.title('Win Percentage w/ Rating Gap Favoring White',fontsize=25)
plt.legend(mylabels, fontsize=15)
fig.set_facecolor('whitesmoke')
plt.show()

# Piechart showing proportion of results when rating gap favoring black  
fig, ax = plt.subplots(figsize = (12,8))
y = rating_gap_black_df['winner'].value_counts()
mylabels = 'Black', 'White', 'Draw' 
plt.pie(y, autopct='%1.1f%%', colors = ['darkblue', 'lightgrey', 'cyan'], wedgeprops={'alpha':0.5})
plt.title('Win Percentage w/ Rating Gap Favoring Black',fontsize=25)
plt.legend(mylabels, fontsize=15)
fig.set_facecolor('lightsteelblue')
plt.show()
