In [26]:
import numpy as np
import pandas as pd
import tensorflow as tf
import pickle
import joblib
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

In [2]:
data = pd.read_csv('games.csv')

In [3]:
data = data[(data["victory_status"] != "draw")]
data = data.drop(["id","turns","rated","opening_name","opening_ply","increment_code","created_at","last_move_at","black_rating","black_id","white_rating","white_id","victory_status","opening_eco"], axis=1)

In [4]:
#GETTING ALL UNIQUE MOVES
unique_moves = set()
len_unique_moves = len(unique_moves)

for move_list in data["moves"]:
    for move in move_list.split(' '):
        unique_moves.add(move)

max_vocab = len(unique_moves)

In [5]:
moves = np.array(data['moves'])
labels = np.array(data["winner"].map(lambda x: 1 if x=="white" else 0))

In [6]:
#GETTING MAXIMIUM LENGTH OF ITEM IN UNIQUE_MOVES
max_len = 0
for move in moves:
    total = 0
    for item in move.split(' '):
        total +=1
    if total > max_len:
        max_len = total

print(max_len)

349


In [7]:
#TOKENIZATION WITH TENSORFLOW
tokenizer = Tokenizer(num_words=max_vocab)
tokenizer.fit_on_texts(moves)
sequences = tokenizer.texts_to_sequences(moves)
word_index = tokenizer.word_index
model_inputs = pad_sequences(sequences, maxlen=max_len)

In [8]:
model_inputs.shape

(19152, 349)

In [9]:
#TRAINING AND TESTING DATA
train_inputs, test_inputs, train_labels, test_labels = train_test_split(model_inputs, labels, train_size=0.7, random_state=25)

In [10]:
test_inputs.shape

(5746, 349)

In [11]:
#MODEL 

embedding_dim = 300
inputs = tf.keras.Input(shape=max_len)

embedding = tf.keras.layers.Embedding(input_dim=max_vocab,output_dim=embedding_dim,input_length=max_len)(inputs)
gru = tf.keras.layers.GRU(units=embedding_dim)(embedding)
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(gru) 
model = tf.keras.Model(inputs=inputs, outputs=outputs)


In [12]:
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy',tf.keras.metrics.AUC(name='auc')]
)

In [13]:
batch_size = 32
epochs = 3

model.fit(
    train_inputs,
    train_labels,
    validation_split=0.1,
    batch_size=batch_size,
    epochs=epochs,
    callbacks=[tf.keras.callbacks.ReduceLROnPlateau()],
    verbose=1
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x2d66e546550>

In [19]:
model.evaluate(test_inputs, test_labels, verbose = 1)



[0.3137526512145996, 0.8644274473190308, 0.940314531326294]

In [20]:
#CONVERT INPUT TESTING DATA INTO PADDED LIST

def prediction(move):
  move_list = move.split()

  for i in move_list:
    b = i.lower()
    a = move_list.index(i)
    move_list[a] = word_index[b]

  for i in range(len(move_list)):
    while len(move_list)!= max_len:
      move_list.insert(i,0) 
  
  return(np.array([move_list]))


In [21]:
testing_input = prediction("b4 e5 a4 d5 Nc3 Bxb4 Ba3 d4 Ne4 Bxa3 Rxa3 Qe7 a5 Qxa3 e3 Qxa5 c3 dxe3 fxe3 Qd5 Nf2 Nf6 e4 Qd6 d4 exd4 Qxd4 Qxd4 cxd4 Bc4 Re8 Ngh3 Nxe4 Nxe4 Rxe4 Kf2 Bxh3 gxh3 Rxd4 Kf3 Rxc4 Rb1 Nc6 h4 Nd4 Ke3 Re8")
#model.predict_classes(func)

In [41]:
test_pred = model.predict(testing_input)
if test_pred[0]>0.5:
    print("White is going to win.")
else:
    print("Black is going to win.")

White is going to win.


In [33]:
model.save("my_model")
model.save_weights("weights.h5")

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: my_model\assets
