In [None]:
# !pip install torch

In [27]:
import numpy as np
import pandas as pd
import chess
import chess.pgn
from ChessFunctions import get_encoded_board, flatten_board
from tqdm import tqdm
import dask
from dask_ml.model_selection import train_test_split as d_train_test_split
from dask_ml.model_selection import GridSearchCV as d_GridSearchCV
import dask.dataframe as dd
import dask.array as da
from dask import delayed
import joblib


In [2]:
from dask.distributed import Client, progress
client = Client()
client

0,1
Client  Scheduler: tcp://127.0.0.1:52653  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 4  Cores: 8  Memory: 17.18 GB


In [3]:
# client.close()

In [4]:
f = open('lichess_elite_2020-06.pgn')

In [11]:
%%time 
my_list = []
Y_labels = []
for i in tqdm(range(3000)):
# while True:
    game = chess.pgn.read_game(f)
    if game is None:
        break  # end of file
    my_list.append(game)

100%|██████████| 3000/3000 [00:07<00:00, 415.34it/s]

CPU times: user 7.16 s, sys: 67.7 ms, total: 7.23 s
Wall time: 7.23 s





In [12]:
%%time
# @dask.delayed
def make_df(my_list):
    allgame_df = pd.DataFrame()
    for game in tqdm(my_list):
        if game.headers['Termination']=='Normal':
            board = game.board() 
            board_state = [flatten_board(board)]
            for move in game.mainline_moves():
                board.push(move)
                board_state = np.row_stack((board_state,flatten_board(board)))
            df = pd.DataFrame(board_state)
            df['gameid'] = game.headers["LichessURL"]
            df['result'] = np.where(game.headers["Result"]=='1-0',1,
                                        np.where(game.headers["Result"]=='0-1',-1,0))
            df = df[3:]
            allgame_df = pd.concat([allgame_df,df])
        else:
            next
    
    return allgame_df

CPU times: user 12 µs, sys: 1 µs, total: 13 µs
Wall time: 17.2 µs


In [13]:
allgame_df = make_df(my_list)
allgame_df

100%|██████████| 3000/3000 [00:19<00:00, 152.28it/s]


Unnamed: 0,0,gameid,result
3,rnbqkb.rpppppppp.....n............PP.............,https://lichess.org/qCEi5rTW,-1
4,rnbqkb.rpppp.ppp....pn............PP.............,https://lichess.org/qCEi5rTW,-1
5,rnbqkb.rpppp.ppp....pn............PP.............,https://lichess.org/qCEi5rTW,-1
6,rnbqkb.rpp.p.ppp....pn....p.......PP.............,https://lichess.org/qCEi5rTW,-1
7,rnbqkb.rpp.p.ppp....pn....pP......P..............,https://lichess.org/qCEi5rTW,-1
...,...,...,...
99,kb...R..........PK...p...P.....p.................,https://lichess.org/7QTQ7Yau,1
100,kb...R..........PK.......P...p.p.................,https://lichess.org/7QTQ7Yau,1
101,kb...R..P........K.......P...p.p.................,https://lichess.org/7QTQ7Yau,1
102,kb...R..P........K.......P.....p.....p...........,https://lichess.org/7QTQ7Yau,1


In [15]:
ddf = dd.from_pandas(allgame_df,npartitions=1000000)
ddf['features'] = ddf[0].apply(get_encoded_board,meta=('I'))
# ddf.compute()

In [16]:
def func(df):
    return df.features.apply(pd.Series, 1).stack().reset_index(level=1, drop=True)

In [17]:
feature_array = np.array(ddf.map_partitions(func).compute()).reshape(-1,770)

In [18]:
feature_array.shape

(176506, 770)

In [19]:
X = feature_array
Y_labels = allgame_df['result']
print("X: " + str(X.shape))
print("Y: " + str(Y_labels.shape))

X: (176506, 770)
Y: (176506,)


In [20]:
%%time
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split,GridSearchCV
X_train, X_test, y_train, y_test = d_train_test_split(
    X, Y_labels, train_size = 0.9
    ,random_state=1
)

CPU times: user 214 ms, sys: 277 ms, total: 491 ms
Wall time: 491 ms


In [21]:
X_train.shape, y_train.shape

((158855, 770), (158855,))

In [32]:
%%time
param_grid = {'hidden_layer_sizes':[(100,80,50,20,20,10,10,5,5)]
              ,'tol':[0.00001]
              ,'activation':['relu']
              ,'max_iter':[1000]
#               ,'learning_rate':['constant','invscaling','adaptive']
              ,'learning_rate_init':[0.0001]
#               ,'early_stopping':[False, True]
             }
mlp = MLPClassifier()
with joblib.parallel_backend('dask'):
    clf = GridSearchCV(mlp, param_grid, n_jobs=8)
clf.fit(X_train, y_train)

CPU times: user 3h 9min 41s, sys: 42min 2s, total: 3h 51min 43s
Wall time: 1h 20min 36s


GridSearchCV(estimator=MLPClassifier(), n_jobs=8,
             param_grid={'activation': ['relu'],
                         'hidden_layer_sizes': [(100, 80, 50, 20, 20, 10, 10, 5,
                                                 5)],
                         'learning_rate_init': [0.0001], 'max_iter': [1000],
                         'tol': [1e-05]})

In [35]:
print(clf.score(X_test, y_test))
clf.best_estimator_

0.41680358053368083


MLPClassifier(hidden_layer_sizes=(100, 80, 50, 20, 20, 10, 10, 5, 5),
              learning_rate_init=0.0001, max_iter=1000, tol=1e-05)

In [36]:
# clf.predict(X_test)

In [1]:
from joblib import dump, load
dump(clf, 'filename.joblib')  

NameError: name 'clf' is not defined

In [5]:
clf = load('filename.joblib') 
clf.classes_

array([-1,  0,  1])

In [None]:
import torch

In [None]:
# class Feedforward(torch.nn.Module):
#         def __init__(self, input_size, hidden_size):
#             super(Feedforward, self).__init__()
#             self.input_size = input_size
#             self.hidden_size  = hidden_size
#             self.fc1 = torch.nn.Linear(self.input_size, self.hidden_size)
#             self.relu = torch.nn.ReLU()
#             self.fc2 = torch.nn.Linear(self.hidden_size, 1)
#             self.sigmoid = torch.nn.Sigmoid()
#         def forward(self, x):
#             hidden = self.fc1(x)
#             relu = self.relu(hidden)
#             output = self.fc2(relu)
#             output = self.sigmoid(output)
#             return output

In [None]:
# model = Feedforward(770, 100)
# criterion = torch.nn.BCELoss()
# optimizer = torch.optim.SGD(model.parameters(), lr = 0.01)

In [None]:
# import tensorflow as tf
# X_test_tensor = tf.convert_to_tensor(X_test)

In [None]:
# model.eval()
# y_pred = model(X_test)
# before_train = criterion(y_pred.squeeze(), y_test)
# print('Test loss before training' , before_train.item())
