# Creating a Chess Engine Using Dask
### Importing dependencies

In [None]:
import numpy as np
import pandas as pd
import chess
import chess.pgn
from ChessFunctions import get_encoded_board, flatten_board
from tqdm import tqdm
import dask
from dask_ml.model_selection import train_test_split as d_train_test_split
from dask_ml.model_selection import GridSearchCV as d_GridSearchCV
from sklearn.neural_network import MLPClassifier
import dask.dataframe as dd
import dask.array as da
from dask import delayed
import joblib

### Setting the number of games we will be using to train our chess engine

In [None]:
GAME_COUNT = 4000

### Instantiating a dask client

In [None]:
from dask.distributed import Client, progress
client = Client()
client

## Reading in the games from the .pgn file

In [None]:
f = open('lichess_elite_2020-06.pgn')

In [None]:
%%time 
my_list = []
Y_labels = []
for i in tqdm(range(GAME_COUNT)):
# while True:
    game = chess.pgn.read_game(f)
    if game is None:
        break  # end of file
    my_list.append(game)

## Flattening Game States

In [None]:
def game_to_df(game):
    """
    Takes a game from a .pgn file and converts every 
    board state into a flattened string. Returns a 
    dataframe with columns '0','gameid', and 'result'.
    '0' is the column of all flattened board states 
    achieved in the game. 'Result' is the result of the game
    where 1 means white won, 0 means a draw, and -1 means 
    white lost.
    """
    board = game.board() 
    board_state = [flatten_board(board)]
    for move in game.mainline_moves():
        board.push(move)
        board_state = np.row_stack((board_state,flatten_board(board)))
    df = pd.DataFrame(board_state)
    df['gameid'] = game.headers["LichessURL"]
    df['result'] = np.where(game.headers["Result"]=='1-0',1,
                                np.where(game.headers["Result"]=='0-1',-1,0))
    df = df[10:]
    return {'df': df, 'term_type':int(game.headers['Termination']=='Normal')}

### Looping over all games and applying the `game_to_df` function. This utilizes the `@dask.delayed` decorator to ensure lazy evalution.

In [None]:
%%time
allgame_df = []
game_terminations = []
for game in tqdm(my_list):
    df = delayed(game_to_df)(game)
    allgame_df.append(df['df'])
    game_terminations.append(df['term_type'])
    

### Combining every game dataframe into a single dataframe.

In [None]:
%%time
allgame_df_concat = delayed(pd.concat)(allgame_df)
display(allgame_df_concat.compute().head())

### Use custom function from the included ChessFuncitons.py file to apply one-hot encoding of board states. Also adds two features that represent the value of the pieces on the chess board for both white and black.

Uses a dask dataframe so we can apply the one hot encoding to each partition in parallel.

In [None]:
ddf = dd.from_delayed(allgame_df)
ddf['features'] = ddf[0].apply(get_encoded_board,meta=('I'))

In [None]:
def func(df):
    return df.features.apply(np.array, 1)

feature_array = np.concatenate(np.array(ddf.map_partitions(func).compute())).reshape(-1,770)
display(feature_array)
print('\n Shape of feature array: ' + str(feature_array.shape))

In [None]:
X = feature_array
Y_labels = ddf.compute()['result']
print("X shape: " + str(X.shape))
print("Y shape: " + str(Y_labels.shape))

### Train-Test splitting our data and targets

In [None]:
%%time
X_train, X_test, y_train, y_test = d_train_test_split(
    X, Y_labels, train_size = 0.9
    ,random_state=1
)

print("Size of training data: " + str(X_train.shape[0]))
print("Size of testing data: " + str(X_test.shape[0]))
print('\n')


## Training the neural network
The idea here is that we can predict whether a board state is winning, losing, or drawing.

In [None]:
%%time
param_grid = {'hidden_layer_sizes':[(50,40,30), (100,50), (10,10,10,10)]
              ,'tol':[0.00001]
              ,'activation':['relu','logistic']
              ,'max_iter':[2000]
              ,'learning_rate_init':[0.0001, 0.001]
             }
mlp = MLPClassifier()
with joblib.parallel_backend('dask'):
    clf = d_GridSearchCV(mlp, param_grid, n_jobs=-1)
clf.fit(X_train, y_train)

### Evaluating the model on the test dataset

In [None]:
print(clf.score(X_test, y_test))
clf.best_estimator_

### Exporting the trained model 

In [None]:
from joblib import dump, load
dump(clf, 'ChessModel.joblib')  

### Closing the dask client

In [None]:
client.close()