## Imports

In [5]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.optimizers.schedules import ExponentialDecay  # from https://arxiv.org/pdf/1506.02078.pdf
from tensorflow.keras.callbacks import EarlyStopping

from tqdm.notebook import tqdm

In [6]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  0


## Hyper-parameters

In [26]:
tunable_hparams = {
    'stateful_generation' : True,
    'mapping_type' : 'seq2seq',
    'early_stopping' : False,
    'seq_length' : 200,
    'game' : 'mario'
}
fixed_hparams = {
    'hidden_size' : 128,
    'learning_rate' : 2e-3,
    'learning_rate_decay' : 0.95,
    'dropout' : 0.5,
    'batch_size' : 100,
    'num_layers' : 3,
    'max_epochs' : 50
}

In [27]:
for key, val in tunable_hparams.items():
    exec(key + '=val')
for key, val in fixed_hparams.items():
    exec(key + '=val')

## Data

In [28]:
data = open('mario_corpus.txt', 'r').read()
level_strs = data.rstrip().split(')')[:-1]
print(len(level_strs))

37


In [20]:
chars = []
for level_str in level_strs:
    chars.extend(list(level_str))
chars = list(set(chars))
vocab_size = len(chars)
print(chars, vocab_size)

['\n', 'Q', 'B', '[', '>', 'o', 'b', 'S', '<', '?', 'x', '-', ']', 'X', 'E'] 15


In [21]:
char_to_ix = { ch:i for i, ch in enumerate(chars) }
ix_to_char = { i:ch for i, ch in enumerate(chars) }

In [22]:
level_arrays = []
for level_str in level_strs:
    level_arrays.append(np.array([char_to_ix[char] for char in list(level_str)]))

In [23]:
def get_inputs_and_targets_from_level_array(level_array):
    
    inputs, targets = [], []
    
    for i in range(len(level_array) - seq_length):
        inputs.append(level_array[i:i+seq_length])
        targets.append(level_array[i+1:i+seq_length+1])
    
    inputs, targets = map(np.array, [inputs, targets])
    inputs = np.eye(vocab_size)[inputs]
    
    return inputs, targets

In [24]:
inputs, targets = [], []
for level_array in tqdm(level_arrays, leave=False):
    inputs_temp, targets_temp = get_inputs_and_targets_from_level_array(level_array)
    inputs.extend(inputs_temp); targets.extend(targets_temp)
inputs, targets = map(np.array, [inputs, targets])

HBox(children=(IntProgress(value=0, max=37), HTML(value='')))



In [25]:
inputs.shape, targets.shape

((119150, 200, 15), (119150, 200))

## Model callbacks

In [29]:
lr_scheduler = ExponentialDecay(
    initial_learning_rate=learning_rate,
    decay_steps=len(inputs) // batch_size, 
    decay_rate=learning_rate_decay, 
)
optimizer = RMSprop(learning_rate=lr_scheduler)

In [34]:
es_callback = EarlyStopping(
    monitor='val_out_acc_custom_acc', mode='max', patience=5, restore_best_weights=early_stopping
)

In [35]:
def custom_loss(y_true, y_pred):
    scce = tf.keras.losses.SparseCategoricalCrossentropy()
    return scce(
        tf.reshape(y_true, shape=(tf.shape(y_true)[0] * seq_length, )), 
        tf.reshape(y_pred, shape=(tf.shape(y_pred)[0] * seq_length, vocab_size))
    )

In [36]:
def custom_acc(y_true, y_pred):
    return tf.math.reduce_mean(
        tf.cast(
            tf.math.equal(
                tf.math.argmax(tf.reshape(y_pred, shape=(tf.shape(y_pred)[0] * seq_length, vocab_size)), axis=-1), 
                tf.cast(tf.reshape(y_true, shape=(tf.shape(y_true)[0] * seq_length, )), dtype=tf.int64)
            ), 
            dtype=tf.float32
        )
    )

## Model definition

In [15]:
lstm_1_state_h_in = keras.layers.Input(shape=[hidden_size])
lstm_1_state_c_in = keras.layers.Input(shape=[hidden_size])

lstm_2_state_h_in = keras.layers.Input(shape=[hidden_size])
lstm_2_state_c_in = keras.layers.Input(shape=[hidden_size])

lstm_3_state_h_in = keras.layers.Input(shape=[hidden_size])
lstm_3_state_c_in = keras.layers.Input(shape=[hidden_size])

input = keras.layers.Input(shape=[seq_length, vocab_size])

out, lstm_1_state_h_out, lstm_1_state_c_out = keras.layers.LSTM(hidden_size, return_sequences=True, return_state=True)(
    input, initial_state=[lstm_1_state_h_in, lstm_1_state_c_in]
)
out = layers.Dropout(dropout)(out)

out, lstm_2_state_h_out, lstm_2_state_c_out = keras.layers.LSTM(hidden_size, return_sequences=True, return_state=True)(
    out, initial_state=[lstm_2_state_h_in, lstm_2_state_c_in]
)
out = layers.Dropout(dropout)(out)

out, lstm_3_state_h_out, lstm_3_state_c_out = keras.layers.LSTM(hidden_size, return_sequences=True, return_state=True)(
    out, initial_state=[lstm_3_state_h_in, lstm_3_state_c_in]
)
out = layers.Dropout(dropout)(out)

out = layers.Dense(vocab_size)(out)
out = layers.Activation('softmax')(out)

out_acc = layers.Lambda(lambda x:x, name = "out_acc")(out)

model = keras.models.Model(
    inputs=[
        input, 
        lstm_1_state_h_in, lstm_1_state_c_in,
        lstm_2_state_h_in, lstm_2_state_c_in,
        lstm_3_state_h_in, lstm_3_state_c_in
    ], 
    outputs=[
        out_acc,
        lstm_1_state_h_out, lstm_1_state_c_out,
        lstm_2_state_h_out, lstm_2_state_c_out,
        lstm_3_state_h_out, lstm_3_state_c_out
    ]
)

model.compile(
    loss=[custom_loss, None, None, None, None, None, None], 
    loss_weights=[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    metrics={'out_acc':custom_acc},
    optimizer=optimizer
)

## Model training

In [16]:
dummy = np.zeros((len(inputs), hidden_size))

In [17]:
history = model.fit(
    [inputs, dummy, dummy, dummy, dummy, dummy, dummy], 
    [targets, dummy, dummy, dummy, dummy, dummy, dummy], 
    batch_size=batch_size,
    validation_split=0.1,
    shuffle=True,
    epochs=epochs, 
    callbacks=[es_callback]
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
 198/1073 [====>.........................] - ETA: 1:56 - loss: 0.1095 - out_acc_loss: 0.1095 - out_acc_custom_acc: 0.9662

KeyboardInterrupt: 

In [19]:
model.save('lstm_on_mario_corpus.h5')

## Load trained model

In [23]:
model = keras.models.load_model(
    'lstm_on_mario_corpus.h5', 
    custom_objects={'custom_loss':custom_loss, 'custom_acc':custom_acc}
)

In [24]:
model.evaluate(
    [inputs, dummy, dummy, dummy, dummy, dummy, dummy], 
    [targets, dummy, dummy, dummy, dummy, dummy, dummy],
    batch_size=batch_size * 2, verbose=1
)  # sanity check



[0.12770111858844757, 0.12770111858844757, 0.9649109244346619]

## Generate level

In [113]:
def onehot_to_string(onehot):
    ints = np.argmax(onehot, axis=-1)
    chars = [ix_to_char[ix] for ix in ints]
    string = "".join(chars)
    char_array = []
    for line in string.rstrip().split('\n')[:-1]:
        char_array.append(list(line))
    char_array = np.array(char_array).T
    string = ""
    for row in char_array:
        string += "".join(row) + "\n"
    return string

In [114]:
seed = inputs[0][:49].copy()
print(seed.shape)
seed[17+14] = 0
seed[17+14][2] = 1
seed[17*2+14] = 0
seed[17*2+14][2] = 1
print(onehot_to_string(seed))

(49, 15)
--
--
--
--
--
--
--
--
--
--
--
--
--
--
-x
XX



In [122]:
for j in tqdm(range(1, 20+1)):

    seed = inputs[0][:51].copy()
    seed[17+14] = 0
    seed[17+14][2] = 1
    seed[17*2+14] = 0
    seed[17*2+14][2] = 1
    gen = seed.copy()

    # initialize all hidden and cell states to zeros
    lstm1_h = np.zeros((1, hidden_size))
    lstm1_c = np.zeros((1, hidden_size))
    lstm2_h = np.zeros((1, hidden_size))
    lstm2_c = np.zeros((1, hidden_size))
    lstm3_h = np.zeros((1, hidden_size))
    lstm3_c = np.zeros((1, hidden_size))

    for i in tqdm(range(2669), leave=False):

        seed = np.expand_dims(seed, axis=0)

        # predict probas and update hidden and cell states
        probas, lstm1_h, lstm1_c, lstm2_h, lstm2_c, lstm3_h, lstm3_c = model.predict([
            seed, lstm1_h, lstm1_c, lstm2_h, lstm2_c, lstm3_h, lstm3_c
        ])

        probas = probas[0][-1]  # first batch, last timestep

        idx = np.random.choice(np.arange(len(probas)), p=probas)
        seed = np.zeros((1, vocab_size))
        seed[:, idx] = 1.

        gen = np.vstack([gen, seed])
        
    with open(f'./lstm_generations/lstm_gen_{j}.txt', 'w+') as txt_f:
        txt_f.write(onehot_to_string(gen))

HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2669.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2669.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2669.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2669.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2669.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2669.0), HTML(value='')))

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



HBox(children=(FloatProgress(value=0.0, max=2669.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2669.0), HTML(value='')))

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



HBox(children=(FloatProgress(value=0.0, max=2669.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2669.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2669.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2669.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2669.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2669.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2669.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2669.0), HTML(value='')))


