# StrongholdNet: Train an RNN (LSTM) to navigate through a Stronghold

The idea is that we interpret the (shortest) path from any room in the stronghold to the portal room as *sequential data* that we feed to an RNN.

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras as K
#from sklearn.preprocessing import OneHotEncoder
#from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from dataset_rnn import parse_tree_generator, print_stronghold_tree
from anytree import Node, RenderTree, Walker
from anytree.search import find_by_attr, findall_by_attr
import random
import os

In [2]:
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [3]:
df = pd.read_csv('100k_dataset_rnn.csv', delimiter=' ')

In [4]:
df.head(24)

Unnamed: 0,stronghold,room,entry,orientation,parent_room,parent_exit,child_room_1,child_room_2,child_room_3,child_room_4,child_room_5,exit
0,0,RightTurn,1,S,Stairs,1,Stairs,,,,,0
1,0,Stairs,1,S,Corridor,2,RightTurn,,,,,0
2,0,Corridor,2,W,FiveWayCrossing,1,Corridor,Stairs,RightTurn,,,1
3,0,Corridor,0,W,Corridor,1,LeftTurn,Corridor,,,,2
4,0,Corridor,0,S,Corridor,2,Corridor,,,,,1
5,0,Corridor,0,S,Corridor,1,RightTurn,,,,,1
6,0,RightTurn,0,S,Corridor,1,PortalRoom,,,,,1
7,1,RightTurn,1,W,SquareRoom,2,Stairs,,,,,0
8,1,SquareRoom,2,N,ChestCorridor,1,PrisonHall,RightTurn,ChestCorridor,,,0
9,1,ChestCorridor,1,N,RightTurn,1,SquareRoom,,,,,0


Let's apply a custom one-hot encoding…

In [5]:
def onehot(df: pd.DataFrame):
    room_to_vector = {
            'Corridor': 0,
            'PrisonHall': 1,
            'LeftTurn': 2,
            'RightTurn': 3,
            'SquareRoom': 4,
            'Stairs': 5,
            'SpiralStaircase': 6,
            'FiveWayCrossing': 7,
            'ChestCorridor': 8,
            'Library': 9,
            'PortalRoom': 10,
            'SmallCorridor': 11,
            'Start': 12,
            'None': 13}
    orientation_to_vector = {
            'N': 0,
            'S': 1,
            'E': 2,
            'W': 3}
    room_columns = [
            'room',
            'parent_room',
            'child_room_1',
            'child_room_2',
            'child_room_3',
            'child_room_4',
            'child_room_5']
    exit_columns = [
            'entry',
            'parent_exit']
    orientation_columns = [
            'orientation']
    n_exits = 6
    
    df_ = df.copy()

    # one-hot rooms
    for c in room_columns:
        for v in room_to_vector:
            df_[c] = df_[c].replace(v, room_to_vector[v])
        onehot = pd.DataFrame(
                K.utils.to_categorical(df_[c], num_classes=len(room_to_vector)),
                columns=[ c + '_is_' + j for j in room_to_vector ])
        df_ = pd.concat([ onehot, df_ ], axis=1)

    # one-hot exits
    for c in exit_columns:
        onehot = pd.DataFrame(
                K.utils.to_categorical(df_[c], num_classes=n_exits),
                columns=[ c + '_is_' + str(j) for j in range(n_exits) ])
        df_ = pd.concat([ onehot, df_ ], axis=1)

    # one-hot orientations
    for c in orientation_columns:
        for v in orientation_to_vector:
            df_[c] = df_[c].replace(v, orientation_to_vector[v])
        onehot = pd.DataFrame(
                K.utils.to_categorical(df_[c], num_classes=len(orientation_to_vector)),
                columns=[ c + '_is_' + j for j in orientation_to_vector ])
        df_ = pd.concat([ onehot, df_ ], axis=1)
        
    df_.drop([ k for k in room_columns + exit_columns + orientation_columns ], axis=1, inplace=True)
    return df_

In [6]:
df_onehot = onehot(df)

In [7]:
df_onehot.head()

Unnamed: 0,orientation_is_N,orientation_is_S,orientation_is_E,orientation_is_W,parent_exit_is_0,parent_exit_is_1,parent_exit_is_2,parent_exit_is_3,parent_exit_is_4,parent_exit_is_5,...,room_is_SpiralStaircase,room_is_FiveWayCrossing,room_is_ChestCorridor,room_is_Library,room_is_PortalRoom,room_is_SmallCorridor,room_is_Start,room_is_None,stronghold,exit
0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
1,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
2,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1
3,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,2
4,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1


In [8]:
for x in df_onehot:
    print(x)

orientation_is_N
orientation_is_S
orientation_is_E
orientation_is_W
parent_exit_is_0
parent_exit_is_1
parent_exit_is_2
parent_exit_is_3
parent_exit_is_4
parent_exit_is_5
entry_is_0
entry_is_1
entry_is_2
entry_is_3
entry_is_4
entry_is_5
child_room_5_is_Corridor
child_room_5_is_PrisonHall
child_room_5_is_LeftTurn
child_room_5_is_RightTurn
child_room_5_is_SquareRoom
child_room_5_is_Stairs
child_room_5_is_SpiralStaircase
child_room_5_is_FiveWayCrossing
child_room_5_is_ChestCorridor
child_room_5_is_Library
child_room_5_is_PortalRoom
child_room_5_is_SmallCorridor
child_room_5_is_Start
child_room_5_is_None
child_room_4_is_Corridor
child_room_4_is_PrisonHall
child_room_4_is_LeftTurn
child_room_4_is_RightTurn
child_room_4_is_SquareRoom
child_room_4_is_Stairs
child_room_4_is_SpiralStaircase
child_room_4_is_FiveWayCrossing
child_room_4_is_ChestCorridor
child_room_4_is_Library
child_room_4_is_PortalRoom
child_room_4_is_SmallCorridor
child_room_4_is_Start
child_room_4_is_None
child_room_3_is_Corrid

We need to group our sequences by stronghold.

In [9]:
def sequencelify(df, min_sequence=3, max_sequence=12):
    for _, s in df.groupby('stronghold'):
        s = s.drop('stronghold', axis=1).to_numpy()
        for window in range(min_sequence, max_sequence + 1):
            X_batch = []
            y_batch = []
            if s.shape[0] < window:
                break
            for t in range(s.shape[0] - window + 1):
                X = s[t:t+window, :-1]
                X = X.reshape(window, X.shape[1])
                y = np.array(s[t+window-1, -1])
                X_batch.append(X)
                y_batch.append(y)
            X_batch = np.array(X_batch)
            y_batch = np.array(y_batch)
            X_batch = X_batch.reshape(s.shape[0] - window + 1, window, s.shape[1] - 1)
            y_batch = y_batch.reshape(s.shape[0] - window + 1, 1)
            y_batch = K.utils.to_categorical(y_batch, num_classes=6)
            yield X_batch, y_batch

Train/test split our data:

In [10]:
df_train, df_test = train_test_split(
        df_onehot,
        test_size=0.1,
        random_state=1337,
        shuffle=False)

In [11]:
df_train.head()

Unnamed: 0,orientation_is_N,orientation_is_S,orientation_is_E,orientation_is_W,parent_exit_is_0,parent_exit_is_1,parent_exit_is_2,parent_exit_is_3,parent_exit_is_4,parent_exit_is_5,...,room_is_SpiralStaircase,room_is_FiveWayCrossing,room_is_ChestCorridor,room_is_Library,room_is_PortalRoom,room_is_SmallCorridor,room_is_Start,room_is_None,stronghold,exit
0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
1,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
2,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1
3,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,2
4,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1


In [12]:
print("df_train:", df_train.shape)
print("df_test:", df_test.shape)

df_train: (633893, 116)
df_test: (70433, 116)


In [13]:
n_features = df_train.shape[1] - 2

Build and compile the actual model: an LSTM followed by a dense layer with softmax activation for our 6 exit classes.

In [14]:
model = K.Sequential()
#model.add(K.layers.LSTM(
#        128,
#        batch_input_shape=(None, None, n_features),
#        return_sequences=False,
#        stateful=False))
model.add(K.layers.LSTM(
        64,
        batch_input_shape=(None, None, n_features),
        return_sequences=True,
        stateful=False,
        dropout=.1))
model.add(K.layers.LSTM(
        64,
        return_sequences=False,
        stateful=False,
        dropout=.1))
model.add(K.layers.Dense(
        6,
        activation='softmax'))
model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, None, 64)          45824     
_________________________________________________________________
lstm_1 (LSTM)                (None, 64)                33024     
_________________________________________________________________
dense (Dense)                (None, 6)                 390       
Total params: 79,238
Trainable params: 79,238
Non-trainable params: 0
_________________________________________________________________


Fit the model…

In [15]:
model.fit(
        sequencelify(df_train),
        epochs=1,
        steps_per_epoch=100000)



<tensorflow.python.keras.callbacks.History at 0x7fa0a0414c40>

Evaluate the model on test data…

In [16]:
model.evaluate(sequencelify(df_test))



[0.2359531968832016, 0.8852338790893555]

Save/load model weights.

In [17]:
model.save("rnn_9.keras")
#model = K.models.load_model("rnn_7.keras")

Let the model navigate new strongholds and learn from its mistakes:

In [18]:
def navigate_and_train(model, root, stronghold, max_visited=50):
    X = []
    y = []
    n_visited = 0
    
    # locate portal room
    portal = find_by_attr(root, 'PortalRoom')
    
    # skip starter
    next_exit = 1
    entry = 0
    next_room = root.children[0]
    
    while True:
        X.append((
                next_room.name,
                entry,
                next_room.orientation,
                next_room.parent.name,
                next_room.exit,
                *([ c.name for c in next_room.children ] + [ 'None' ] * (5 - len(next_room.children)))))
        
        # find correct portal label
        (upwards, common, downwards) = Walker().walk(next_room, portal)
        label = 0 if len(upwards) > 0 else downwards[0].exit
        y.append(label)
        
        # stop if portal room found
        if len([r for r in next_room.children if r.name == 'PortalRoom']) > 0:
            break
            
        # stop if we reached max visited rooms
        n_visited += 1
        if n_visited == max_visited:
            break
        
        # predict next exit
        next_exit = predict(model, X)
        
        # update stuff
        entry = 0 if next_exit > 0 else next_room.exit
        
        # stop if predicting a bad exit
        if next_exit > len(next_room.children):
            break
        
        next_room = ([ next_room.parent, *next_room.children ])[next_exit]
        
        # stop if predicting a bad room
        if next_room.name in [ 'Start', 'SmallCorridor', 'Library', 'None' ]:
            break
            
    df = pd.DataFrame(X, columns=(
            'room',
            'entry',
            'orientation',
            'parent_room',
            'parent_exit',
            'child_room_1',
            'child_room_2',
            'child_room_3',
            'child_room_4',
            'child_room_5'))
    df['stronghold'] = stronghold
    df['exit'] = pd.Series(y)
    
    # append output to csv file
    df.to_csv('100k_dataset_nav.csv', mode='a', header=False, index=False)
    
    df = onehot(df)
    model.fit(
            sequencelify(df, min_sequence=1, max_sequence=50),
            epochs=1,
            steps_per_epoch=df.shape[0])

In [19]:
def predict(model, X):
    df = pd.DataFrame(X, columns=(
            'room',
            'entry',
            'orientation',
            'parent_room',
            'parent_exit',
            'child_room_1',
            'child_room_2',
            'child_room_3',
            'child_room_4',
            'child_room_5'))
    df = onehot(df)
    y_hat = model.predict(df.to_numpy().reshape(1, len(X), n_features))
    return np.argmax(y_hat)

In [20]:
gen = parse_tree_generator('100k_strongholds_test.txt')
stronghold = 0
for root in gen:
    navigate_and_train(model, root, stronghold)
    stronghold += 1



KeyboardInterrupt: 

Let's evaluate it again…

In [22]:
model.evaluate(sequencelify(df_test))



[1.4340380430221558, 0.27957871556282043]

In [21]:
model.save("rnn_10.keras")