In [64]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras as K
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from dataset import parse_tree_generator, print_stronghold_tree
from anytree import Node, RenderTree, Walker
from anytree.search import find_by_attr
import random

In [44]:
df = pd.read_csv('100k_dataset.csv', delimiter=' ')

In [45]:
df.head()

Unnamed: 0,depth,prev_room,prev_exit,room,exit_0,exit_1,exit_2,exit_3,exit_4,exit_portal
0,0,Start,0,FiveWayCrossing,Corridor,LeftTurn,,Corridor,Stairs,0
1,0,Start,0,FiveWayCrossing,Corridor,RightTurn,,Corridor,RightTurn,0
2,2,RightTurn,0,Corridor,SquareRoom,,Corridor,,,0
3,0,Start,0,FiveWayCrossing,Stairs,,,,Corridor,4
4,1,FiveWayCrossing,4,Corridor,Corridor,SpiralStaircase,Stairs,,,0


In [46]:
# one-hot encode
cols = [
        'prev_room',
        'prev_exit',
        'room',
        'exit_0',
        'exit_1',
        'exit_2',
        'exit_3',
        'exit_4']
onehot = ColumnTransformer([("one-hot", OneHotEncoder(), cols)], remainder='passthrough')
onehot.fit(df)
df_onehot = pd.DataFrame(onehot.transform(df).toarray(), index=df.index, columns=pd.Index(onehot.get_feature_names()))

In [47]:
df_onehot.head()

Unnamed: 0,one-hot__x0_ChestCorridor,one-hot__x0_Corridor,one-hot__x0_FiveWayCrossing,one-hot__x0_LeftTurn,one-hot__x0_PrisonHall,one-hot__x0_RightTurn,one-hot__x0_SpiralStaircase,one-hot__x0_SquareRoom,one-hot__x0_Stairs,one-hot__x0_Start,...,one-hot__x7_Library,one-hot__x7_None,one-hot__x7_PrisonHall,one-hot__x7_RightTurn,one-hot__x7_SmallCorridor,one-hot__x7_SpiralStaircase,one-hot__x7_SquareRoom,one-hot__x7_Stairs,depth,exit_portal
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
4,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [48]:
X_train, X_test, y_train, y_test = train_test_split(
        df_onehot.drop('exit_portal', axis=1),
        df_onehot['exit_portal'],
        test_size=0.1,
        random_state=1337)

In [49]:
print("X_train:", X_train.shape)
print("y_train:", y_train.shape)
print("X_test:", X_test.shape)
print("y_test:", y_test.shape)

X_train: (219037, 79)
y_train: (219037,)
X_test: (24338, 79)
y_test: (24338,)


In [8]:
model = K.Sequential()
model.add(K.layers.InputLayer(input_shape=(X_train.shape[1],)))
model.add(K.layers.Dense(128, activation='relu'))
model.add(K.layers.Dense(64, activation='relu'))
model.add(K.layers.Dense(5, activation='softmax'))
model.compile(
        optimizer='adam',
        loss=K.losses.SparseCategoricalCrossentropy(),
        metrics=['accuracy'])

In [9]:
model.fit(X_train, y_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f2f6cc198e0>

In [10]:
model.save("shnet_1.keras")

In [11]:
model.evaluate(X_test, y_test)



[0.4047541916370392, 0.7950940728187561]

In [66]:
def evaluate_nav(node: Node, model: K.Model, onehot: OneHotEncoder, depth=0, prev_exit=0):
    # skip forced paths
    if len([ v for v in node.children if v.name == 'PortalRoom' ]) > 0:
        return True
    if len(node.children) == 0:
        return False
    if node.name not in ['FiveWayCrossing', 'Corridor', 'SquareRoom']:
        return evaluate_nav(node.children[0], model, onehot, depth + 1, 0)
    branches = [ (i, j) for i, j in enumerate(node.children) if j.name != 'None' ]
    if len(branches) == 0:
        return False
    if len(branches) == 1:
        return evaluate_nav(branches[0][1], model, onehot, depth + 1, branches[0][0])
    
    # actual prediction
    x = pd.DataFrame([(
            depth, node.parent.name, node.exit, node.name, *([child.name for child in node.children] + ['None'] * (5 - len(node.children))), -1)],
            columns=df.columns)
    x_onehot = pd.DataFrame(onehot.transform(x).toarray(), columns=pd.Index(onehot.get_feature_names()))
    x_onehot.drop('exit_portal', axis=1, inplace=True)
    y_hat = model.predict(x_onehot)
    exit_hat = y_hat.argmax(axis=-1)[0]
    
    # random prediction
    #exit_hat = random.choice(branches)[0]
    return evaluate_nav(node.children[exit_hat], model, onehot, depth + 1, exit_hat)

total = 0
hits = 0
for root in parse_tree_generator('100k_strongholds_test.txt'):
    #print_stronghold_tree(root)
    if evaluate_nav(root, model, onehot):
        hits += 1
    total += 1
    if total % 100 == 0:
        print("total:", total)
        print("hits:", hits)
        print("ratio:", hits / total)
        print()

print("---")
print("Total:", total)
print("Hits:", hits)
print("Ratio:", hits / total)

total: 100
hits: 13
ratio: 0.13

total: 200
hits: 36
ratio: 0.18

total: 300
hits: 51
ratio: 0.17

total: 400
hits: 66
ratio: 0.165

total: 500
hits: 81
ratio: 0.162

total: 600
hits: 100
ratio: 0.16666666666666666

total: 700
hits: 114
ratio: 0.16285714285714287

total: 800
hits: 129
ratio: 0.16125

total: 900
hits: 143
ratio: 0.15888888888888889

total: 1000
hits: 157
ratio: 0.157

total: 1100
hits: 173
ratio: 0.1572727272727273

total: 1200
hits: 180
ratio: 0.15

total: 1300
hits: 196
ratio: 0.15076923076923077

total: 1400
hits: 209
ratio: 0.1492857142857143

total: 1500
hits: 222
ratio: 0.148

total: 1600
hits: 242
ratio: 0.15125

total: 1700
hits: 253
ratio: 0.14882352941176472

total: 1800
hits: 273
ratio: 0.15166666666666667

total: 1900
hits: 290
ratio: 0.15263157894736842

total: 2000
hits: 308
ratio: 0.154

total: 2100
hits: 320
ratio: 0.1523809523809524

total: 2200
hits: 335
ratio: 0.15227272727272728

total: 2300
hits: 348
ratio: 0.15130434782608695

total: 2400
hits: 369

KeyboardInterrupt: 