In [16]:
import tensorflow as tf
import numpy as np
from libs.activations import lrelu

class AEModel(object):
    def __init__(self, mean_img = np.zeros([33600])) :
        self.HIDDEN_STATE_SIZE = 128
        self.SCREEN_HEIGHT = 210
        self.SCREEN_WIDTH = 160
        self.n_filters = [1, 16, 32, 64]
        self.filter_sizes = [3, 3, 3, 3]
        self.input_shape = [None, 33600]
        self.keep_prob = 0.8

        mean_img = np.reshape(mean_img, newshape = [33600])
        self.build(mean_img)


    def build(self, mean_img = np.zeros([33600])):
        tf.reset_default_graph()
        self.x = tf.placeholder(tf.float32, self.input_shape, name='x')
        self.mean = tf.Variable(mean_img, trainable=False, dtype=tf.float32)
        current_input = tf.subtract(self.x, self.mean)
        current_input = tf.reshape(current_input, [-1, self.SCREEN_HEIGHT, self.SCREEN_WIDTH, 1])

        
        encoder = []
        shapes = []
        with tf.variable_scope("encoder"):
          with tf.variable_scope("cnn_layers"):
            for layer_i, n_output in enumerate(self.n_filters[1:]):
                n_input = current_input.get_shape().as_list()[3]
                shapes.append(current_input.get_shape().as_list())
                w = tf.Variable(tf.random_normal([self.filter_sizes[layer_i], self.filter_sizes[layer_i], n_input, n_output], stddev=0.1))
                b = tf.Variable(tf.random_normal([n_output], stddev=0.1))
                encoder.append(w)
                conv = tf.nn.relu(tf.add(tf.nn.conv2d(current_input, w, strides=[1, 2, 2, 1], padding='SAME'), b))
                drop = tf.nn.dropout(conv, keep_prob = self.keep_prob)
                current_input = drop
            cnn_output_shape = current_input.get_shape().as_list()
            cnn_output_size = cnn_output_shape[1] * cnn_output_shape[2] * cnn_output_shape[3]
          with tf.variable_scope("dense_layers"):
            flatten = tf.contrib.layers.flatten(inputs = current_input)
            dense1 = tf.contrib.layers.fully_connected(flatten, num_outputs=512, activation_fn=tf.nn.relu)
            dense_drop1 = tf.contrib.layers.dropout(inputs = dense1, keep_prob = self.keep_prob)

            dense2 = tf.contrib.layers.fully_connected(dense_drop1, num_outputs=256, activation_fn=tf.nn.relu)
            dense_drop2 = tf.contrib.layers.dropout(inputs = dense2, keep_prob = self.keep_prob)
                
            dense3 = tf.contrib.layers.fully_connected(inputs = dense_drop2, num_outputs=self.HIDDEN_STATE_SIZE, activation_fn=tf.nn.relu)
            dense_drop3 = tf.contrib.layers.dropout(inputs = dense3, keep_prob = self.keep_prob)
        
        with tf.variable_scope("hidden"):
            self.hidden = tf.cast(dense_drop3, tf.int32)

        encoder.reverse()
        shapes.reverse()
        with tf.variable_scope("decoder"):
          with tf.variable_scope("dense_layers"):
            dense4 = tf.contrib.layers.fully_connected(inputs = dense_drop3, num_outputs = 256, activation_fn=tf.nn.relu)
            dense_drop4 = tf.contrib.layers.dropout(inputs = dense4, keep_prob = self.keep_prob)

            dense5 = tf.contrib.layers.fully_connected(inputs = dense_drop4, num_outputs = 512, activation_fn=tf.nn.relu)
            dense_drop5 = tf.contrib.layers.dropout(inputs = dense5, keep_prob = self.keep_prob)
                
            dense6 = tf.contrib.layers.fully_connected(inputs = dense_drop5, num_outputs = cnn_output_size, activation_fn=tf.nn.relu)
            dense_drop6 = tf.contrib.layers.dropout(inputs = dense6, keep_prob = self.keep_prob)

            reshape = tf.reshape(dense_drop6, [-1, cnn_output_shape[1], cnn_output_shape[2], cnn_output_shape[3]])
          with tf.variable_scope("cnn_transpose_layers"):
            for layer_i, shape in enumerate(shapes):
                w = encoder[layer_i]
                b = tf.Variable(tf.random_normal([w.get_shape().as_list()[2]], stddev=0.1))
                conv_transpose = tf.nn.relu(tf.add(tf.nn.conv2d_transpose(current_input, w,
                        tf.stack([tf.shape(self.x)[0], shape[1], shape[2], shape[3]]),
                        strides=[1, 2, 2, 1],
                        padding='SAME'), b))
                drop = tf.nn.dropout(conv_transpose, self.keep_prob)
                current_input = drop
            current_input = tf.reshape(current_input, [-1, self.input_shape[1]])
            self.predict = tf.add(current_input, self.mean, name = "predict")

        with tf.variable_scope("cost"):
          self.cost = tf.reduce_sum(tf.square(self.predict - self.x), name="cost")
          tf.summary.scalar("cost", self.cost)

        with tf.variable_scope("optimize"):
          learning_rate = 0.01
          self.optimizer = tf.train.AdamOptimizer(learning_rate, name="optimizer").minimize(self.cost)

        self.merged = tf.summary.merge_all()

In [14]:
screen_height = 210
screen_width = 160
batch_size = 128

n_train_screens = 2432
train_screens = np.zeros((n_train_screens, screen_height*screen_width))
n_dev_screens = batch_size
dev_screens = np.zeros((n_dev_screens, screen_height*screen_width))

screen_dir = "./screens/alien/"
def loadData(dir):
    for i in range(0, n_train_screens):
        path = dir + str(i + 1) + ".matrix"
        with open(path, "r") as f:
            pixels = f.read().split(' ')[:-1]
            pixels = list(map(int, pixels))
            train_screens[i] = np.array(pixels)

    for i in range(0, n_dev_screens):
        path = dir + str(n_train_screens + i + 1) + ".matrix"
        with open(path, "r") as f:
            pixels = f.read().split(' ')[:-1]
            pixels = list(map(int, pixels))
            dev_screens[i] = np.array(pixels)


n_batch = n_train_screens//batch_size
def train(mean_img):
    ae = AEModel(mean_img)
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    writer = tf.summary.FileWriter("./AE_nn_log", sess.graph)

    n_epochs = 50
    for epoch_i in range(n_epochs):
        for batch_i in range(n_batch):
            #print(batch_i)
            batch_xs = train_screens[batch_i*batch_size : batch_i*batch_size + batch_size]
            sess.run(ae.optimizer, feed_dict={ae.x: batch_xs})
        print(epoch_i, sess.run(ae.cost, feed_dict={ae.x: dev_screens}))
        summary, cost = sess.run([ae.merged, ae.cost], feed_dict={ae.x: dev_screens})
        writer.add_summary(summary, epoch_i)
    saver.save(sess, './ckpt/model')

    writer.close()
    sess.close()


if __name__ == '__main__':
    loadData(screen_dir)
    mean_img = np.mean(train_screens, 0)
    train(mean_img)


0 453810620.0
1 409917000.0
2 383183230.0
3 383664800.0
4 376113630.0
5 361779400.0
6 354250700.0
7 360330720.0
8 351370900.0
9 350098000.0
10 341069730.0
11 344518850.0
12 341224500.0
13 339192450.0
14 355283070.0
15 342828450.0
16 347324860.0
17 347621920.0
18 401701000.0
19 357315070.0
20 361412700.0
21 339075140.0
22 336587900.0
23 339608400.0
24 329934270.0
25 329711170.0
26 328942140.0
27 349653060.0
28 335359170.0
29 326758660.0
30 326712320.0
31 326795330.0
32 323041630.0
33 324490620.0
34 333449000.0
35 324328580.0
36 322578180.0
37 323876450.0
38 327270850.0
39 329186750.0
40 352376320.0
41 330173300.0
42 327194340.0
43 326802720.0
44 326937180.0
45 333257200.0
46 329769200.0
47 328613120.0
48 339233660.0
49 324896480.0


In [22]:
import matplotlib.pyplot as plt

ae = AEModel()
sess = tf.Session()
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
saver.restore(sess, "./ckpt/model")

def predict(screen_values):
    screen = np.array(screen_values)
    screen = np.reshape(screen, [1, 33600])
    hidden_state = sess.run(ae.z, feed_dict={ae.x: screen})
    hidden_state = np.reshape(hidden_state, [34560])
    return [v for v in hidden_state]


n_train_screens = 2480
n_dev_screens = 4
screen_height = 210
screen_width = 160
dev_screens = np.zeros((n_dev_screens, screen_height*screen_width))
screen_dir = "./screens/alien/"

def loadData(dir):
    for i in range(n_dev_screens):
        path = dir + str(n_train_screens+i+1) + ".matrix"
        with open(path, "r") as f:
            pixels = f.read().split(' ')[:-1]
            pixels = list(map(int, pixels))
            dev_screens[i] = np.array(pixels)
loadData(screen_dir)

n_examples = 4
recon = sess.run(ae.predict, feed_dict={ae.x: dev_screens})


fig, axs = plt.subplots(n_examples, 2, figsize=(210, 160), squeeze=False)
for example_i in range(n_examples):
    axs[example_i][0].imshow(
        np.reshape(dev_screens[example_i, :], (210, 160)))
    axs[example_i][1].imshow(
        np.reshape(recon[example_i, :], (210, 160)))
fig.show()
plt.draw()

sess.close()

INFO:tensorflow:Restoring parameters from ./ckpt/model
392
0
