In [2]:
### Get all scripts
import os
txt_files = []
def traverse_directory(path):
    files = os.listdir(path)
    for sf in files:
        f = os.path.join(path, sf) 
        if os.path.isdir(f):
            traverse_directory(f)
        else:
            if sf.endswith('.txt'):
                txt_files.append(f)
traverse_directory("scripts/")

In [3]:
# Preprocess 
# filter sequences of moves (X,Y)
import re
import numpy as np
import itertools
regex = r"\([^()]*?,[^)]*\)"

SEQUENCE_LEN = 3 #### number of prior moves
input_dim = 2 #### 2 is (X,Y) if 3 is (X,Y,Z)

moves = []
for txt in txt_files:
    with open(txt, encoding='utf-8',errors='ignore') as f:
        lines = f.readlines()
    coords = []
    for line in lines:
        if "Print" in line or "begin" == line.strip():
            continue
        coord = re.findall(regex, line)
        if len(coord):
            if len(coord[0].split(","))<input_dim+1:
                coord = []
        if len(coord):
            if(re.search(r"[a-zA-Z]", coord[0])==None):
                coords.append(coord[0].replace('(','').replace(')','').split(',')[:input_dim])
        else: coords.append(coord)
    out,seg = [],[]
    for coord in coords:
        if len(coord): seg.append(coord)
        else:
            if len(seg)>=SEQUENCE_LEN:
                out.append([seg[0],seg[0]]+seg)
            seg=[]
    if len(out)>=SEQUENCE_LEN: moves+= out

In [4]:
# Format moves
movesets = []
next_movesets = []
for move_idx, move in enumerate(moves):
    move = move + [move[-1]]
    for i in range(0, len(move) - SEQUENCE_LEN):
        movesets.append(move[i: i + SEQUENCE_LEN])
        next_movesets.append(move[i + SEQUENCE_LEN])

# replicate more moves to improve accuracy
movesets=movesets+movesets+movesets
next_movesets=next_movesets+next_movesets+next_movesets

In [5]:
from keras.callbacks import LambdaCallback, ModelCheckpoint, EarlyStopping
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, LSTM, Bidirectional, Embedding

In [12]:
def generator(moveset_list, next_moveset_list, batch_size):
    index = 0
    while True:
        x = np.zeros((batch_size, SEQUENCE_LEN, input_dim), dtype=np.float64)
        y = np.zeros((batch_size, input_dim), dtype=np.float64)
        for i in range(batch_size):
            for t, tpc in enumerate(moveset_list[index % len(moveset_list)]):
                x[i, t, :] = tpc
            y[i, :] = next_moveset_list[index % len(moveset_list)]
            index = index + 1
        yield x, y


def get_model(dropout=0.5):
    "Constructs an LSTM model and adds different layers to it"
    print('Build model...')
    model = Sequential()
    # model.add(Embedding(input_dim=input_dim, output_dim=128))
    # model.add(Bidirectional(LSTM(150)))
    model.add(Bidirectional(LSTM(100, activation="relu"), input_shape=(SEQUENCE_LEN, input_dim)))  # , activation="relu"
    if dropout > 0:
        model.add(Dropout(dropout))
    model.add(Dense(input_dim))
    # model.add(Activation('softmax'))
    return model

def on_epoch_end(epoch, logs):
    # Function invoked at end of each epoch. Prints generated text.
    examples_file.write('\n----- Generating move after Epoch: %d\n' % epoch)

    # Randomly pick a seed sequence
    seed_index = np.random.randint(len(movesets_train + movesets_test))
    seed = (movesets_train + movesets_test)[seed_index]

    moveset = seed
    # examples_file = moveset.tolist()
    examples_file.write('----- Generating with seed:\n"' + ' ' + str(moveset) + '"\n')
    for i in range(5):
        x_pred = np.zeros((1, SEQUENCE_LEN, input_dim))
        for t, top in enumerate(moveset):
            x_pred[0, t, :] = top

        preds = model.predict(x_pred, verbose=0)[0]

        moveset = moveset[1:]
        moveset = np.vstack([moveset, preds])

        # examples_file.append(preds.tolist())
        # return examples_file
        examples_file.write(" " + str(preds))
        examples_file.write('\n')
    examples_file.write('=' * 80 + '\n')
    examples_file.flush()

In [13]:
if not os.path.isdir('./checkpoints/'):
    os.makedirs('./checkpoints/')

model = get_model()
print(model.summary())
model.compile(loss='mean_squared_error', optimizer="adam",
              metrics=['accuracy'])  # categorical_crossentropy , mean_squared_error, logcosh
''' categorical_cross_entropy'''
file_path = "./checkpoints/LSTM_LDA-epoch{epoch:03d}-movess%d-sequence%d-" \
            "loss{loss:.4f}-acc{accuracy:.4f}-val_loss{val_loss:.4f}-val_acc{val_accuracy:.4f}.keras" % \
            (SEQUENCE_LEN, SEQUENCE_LEN)

checkpoint = ModelCheckpoint(file_path, monitor='val_accuracy', save_best_only=True)
print_callback = LambdaCallback(on_epoch_end=on_epoch_end)
early_stopping = EarlyStopping(monitor='val_accuracy', patience=20)
callbacks_list = [checkpoint, print_callback, early_stopping]

Build model...


None


In [14]:
print(len(movesets))
print('---')
print(next_movesets[0])
cut_index = int((len(movesets)-10) * (1.-(1/100.)))
print(cut_index)
movesets_train, movesets_test = movesets[:cut_index], movesets[cut_index:]
next_movesets_train, next_movesets_test = next_movesets[:cut_index], next_movesets[cut_index:]

# Print the train set
print(movesets_train[0])

34635
---
['82861', ' 148330']
34278
[['83384', ' 147942'], ['83384', ' 147942'], ['83384', ' 147942']]


In [15]:
BATCH_SIZE=32
examples = 'examples_move_vector.txt'
examples_file = open(examples, "w")
# examples_file = []
history = model.fit(generator(movesets_train, next_movesets_train, BATCH_SIZE),
                              steps_per_epoch=int(len(movesets_train) / BATCH_SIZE) + 1,
                              epochs=100,
                              callbacks=callbacks_list,
                              validation_data=generator(movesets_test, next_movesets_test, BATCH_SIZE),
                              validation_steps=int(len(movesets_test) / BATCH_SIZE) + 1)

Epoch 1/100
[1m1072/1072[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - accuracy: 0.8895 - loss: 3134076928.0000 - val_accuracy: 1.0000 - val_loss: 92002752.0000
Epoch 2/100
[1m1072/1072[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.9976 - loss: 683861248.0000 - val_accuracy: 1.0000 - val_loss: 76691288.0000
Epoch 3/100
[1m1072/1072[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.9959 - loss: 605782976.0000 - val_accuracy: 1.0000 - val_loss: 18759622.0000
Epoch 4/100
[1m1072/1072[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.9971 - loss: 615194560.0000 - val_accuracy: 1.0000 - val_loss: 236801040.0000
Epoch 5/100
[1m1072/1072[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.9986 - loss: 623276160.0000 - val_accuracy: 1.0000 - val_loss: 52008508.0000
Epoch 6/100
[1m1072/1072[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accurac

In [24]:
#### EVALUATE
#### Predict moves and compare to actual moves
print('   ','   ',"PREDICT",'                 ',"ACTUAL")
for idx,moveset in enumerate(movesets[:20]):
    x_pred = np.zeros((1, SEQUENCE_LEN, input_dim))
    for t, top in enumerate(moveset):
        x_pred[0, t, :] = top
    "predict the next move"
    preds = model.predict(x_pred, verbose=0)[0]    
    print('move',idx+1,preds,next_movesets[idx])

        PREDICT                   ACTUAL
move 1 [ 85941.016 132009.2  ] ['82861', ' 148330']
move 2 [ 85845.72 132180.05] ['82801', ' 149368']
move 3 [ 85836.29 132635.95] ['81572', ' 149466']
move 4 [ 85384.33 133029.98] ['81331', ' 149685']
move 5 [ 84900.375 133140.39 ] ['81331', ' 149685']
move 6 [ 26151.623 164524.36 ] ['20467', ' 187061']
move 7 [ 25944.8 163758.3] ['19703', ' 185483']
move 8 [ 25547.104 162507.28 ] ['20896', ' 184693']
move 9 [ 25667.293 161352.3  ] ['20896', ' 184693']
move 10 [117203.83 193819.92] ['111364', ' 219123']
move 11 [117162.38 193727.06] ['110271', ' 219030']
move 12 [116812.2 193655.7] ['110075', ' 218989']
move 13 [116416.46 193444.73] ['110080', ' 217990']
move 14 [115993.15 192958.42] ['110032', ' 217279']
move 15 [115801.56 192388.67] ['108880', ' 217539']
move 16 [115522.29 192184.67] ['107862', ' 217546']
move 17 [114909.22 192039.25] ['107123', ' 218150']
move 18 [114223.25 192170.61] ['107122', ' 217833']
move 19 [113764.64 192081.23] ['107