In [None]:
# !pip install torch

In [18]:
import numpy as np
import pandas as pd
import chess
import chess.pgn
from ChessFunctions import get_encoded_board, flatten_board
from tqdm import tqdm
import dask
from dask_ml.model_selection import train_test_split as d_train_test_split
from dask_ml.model_selection import GridSearchCV as d_GridSearchCV
import dask.dataframe as dd
import dask.array as da
from dask import delayed
import joblib


In [19]:
from dask.distributed import Client, progress
client = Client()
client

Perhaps you already have a cluster running?
Hosting the HTTP server on port 55978 instead


0,1
Client  Scheduler: tcp://127.0.0.1:55979  Dashboard: http://127.0.0.1:55978/status,Cluster  Workers: 4  Cores: 8  Memory: 17.18 GB


In [20]:
# client.close()

In [21]:
f = open('lichess_elite_2020-06.pgn')

In [22]:
%%time 
my_list = []
Y_labels = []
for i in tqdm(range(4000)):
# while True:
    game = chess.pgn.read_game(f)
    if game is None:
        break  # end of file
    my_list.append(game)

100%|██████████| 4000/4000 [00:09<00:00, 406.91it/s]

CPU times: user 9.67 s, sys: 246 ms, total: 9.91 s
Wall time: 9.94 s





In [23]:
%%time
# @dask.delayed
def make_df(my_list):
    allgame_df = pd.DataFrame()
    for game in tqdm(my_list):
        if game.headers['Termination']=='Normal':
            board = game.board() 
            board_state = [flatten_board(board)]
            for move in game.mainline_moves():
                board.push(move)
                board_state = np.row_stack((board_state,flatten_board(board)))
            df = pd.DataFrame(board_state)
            df['gameid'] = game.headers["LichessURL"]
            df['result'] = np.where(game.headers["Result"]=='1-0',1,
                                        np.where(game.headers["Result"]=='0-1',-1,0))
            df = df[10:]
            allgame_df = pd.concat([allgame_df,df])
        else:
            next
    
    return allgame_df

CPU times: user 9 µs, sys: 8 µs, total: 17 µs
Wall time: 22.2 µs


In [24]:
allgame_df = make_df(my_list)
allgame_df

100%|██████████| 4000/4000 [00:26<00:00, 150.09it/s]


Unnamed: 0,0,gameid,result
10,rnbqk..rppp.nppp....p......pP......P....P.b......,https://lichess.org/kuUOsOML,0
11,rnbqk..rppp.nppp....p......pP......P....P.P......,https://lichess.org/kuUOsOML,0
12,rnbqk..rpp..nppp....p.....ppP......P....P.P......,https://lichess.org/kuUOsOML,0
13,rnbqk..rpp..nppp....p.....ppP......P..Q.P.P......,https://lichess.org/kuUOsOML,0
14,rnb.k..rppq.nppp....p.....ppP......P..Q.P.P......,https://lichess.org/kuUOsOML,0
...,...,...,...
111,.........k..........K................p...........,https://lichess.org/MI25DUcQ,0
112,.........k..........K........................p...,https://lichess.org/MI25DUcQ,0
113,.........k..........K........................N...,https://lichess.org/MI25DUcQ,0
114,.........k..........K........................N...,https://lichess.org/MI25DUcQ,0


In [25]:
ddf = dd.from_pandas(allgame_df,npartitions=1000000)
ddf['features'] = ddf[0].apply(get_encoded_board,meta=('I'))
# ddf.compute()

In [26]:
def func(df):
    return df.features.apply(pd.Series, 1).stack().reset_index(level=1, drop=True)

In [27]:
feature_array = np.array(ddf.map_partitions(func).compute()).reshape(-1,770)

In [28]:
feature_array.shape

(204554, 770)

In [29]:
X = feature_array
Y_labels = allgame_df['result']
print("X: " + str(X.shape))
print("Y: " + str(Y_labels.shape))

X: (204554, 770)
Y: (204554,)


In [30]:
%%time
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split,GridSearchCV
X_train, X_test, y_train, y_test = d_train_test_split(
    X, Y_labels, train_size = 0.9
    ,random_state=1
)

CPU times: user 235 ms, sys: 251 ms, total: 486 ms
Wall time: 462 ms


In [31]:
X_train.shape, y_train.shape

((184098, 770), (184098,))

In [32]:
%%time
param_grid = {'hidden_layer_sizes':[(50,40,30,20,10,10,5,5,5)]
              ,'tol':[0.00001]
              ,'activation':['relu','logistic']
              ,'max_iter':[2000]
#               ,'learning_rate':['constant','invscaling','adaptive']
              ,'learning_rate_init':[0.0001]
#               ,'early_stopping':[False, True]
             }
mlp = MLPClassifier()
with joblib.parallel_backend('dask'):
    clf = GridSearchCV(mlp, param_grid, n_jobs=-1)
clf.fit(X_train, y_train)

CPU times: user 10min 17s, sys: 1min 49s, total: 12min 6s
Wall time: 31min 54s


GridSearchCV(estimator=MLPClassifier(), n_jobs=-1,
             param_grid={'activation': ['relu', 'logistic'],
                         'hidden_layer_sizes': [(50, 40, 30, 20, 10, 10, 5, 5,
                                                 5)],
                         'learning_rate_init': [0.0001], 'max_iter': [2000],
                         'tol': [1e-05]})

In [34]:
print(clf.score(X_train, y_train))
clf.best_estimator_

0.4389021064867625


MLPClassifier(activation='logistic',
              hidden_layer_sizes=(50, 40, 30, 20, 10, 10, 5, 5, 5),
              learning_rate_init=0.0001, max_iter=2000, tol=1e-05)

In [None]:
# clf.predict(X_test)

In [17]:
from joblib import dump, load
dump(clf, 'FinalModel.joblib')  

['FinalModel.joblib']

In [None]:
clf = load('filename.joblib') 
clf.classes_

In [None]:
import torch

In [None]:
# class Feedforward(torch.nn.Module):
#         def __init__(self, input_size, hidden_size):
#             super(Feedforward, self).__init__()
#             self.input_size = input_size
#             self.hidden_size  = hidden_size
#             self.fc1 = torch.nn.Linear(self.input_size, self.hidden_size)
#             self.relu = torch.nn.ReLU()
#             self.fc2 = torch.nn.Linear(self.hidden_size, 1)
#             self.sigmoid = torch.nn.Sigmoid()
#         def forward(self, x):
#             hidden = self.fc1(x)
#             relu = self.relu(hidden)
#             output = self.fc2(relu)
#             output = self.sigmoid(output)
#             return output

In [None]:
# model = Feedforward(770, 100)
# criterion = torch.nn.BCELoss()
# optimizer = torch.optim.SGD(model.parameters(), lr = 0.01)

In [None]:
# import tensorflow as tf
# X_test_tensor = tf.convert_to_tensor(X_test)

In [None]:
# model.eval()
# y_pred = model(X_test)
# before_train = criterion(y_pred.squeeze(), y_test)
# print('Test loss before training' , before_train.item())
