In [None]:
import sys
sys.path.append('../src')

In [None]:
from importlib import reload
import numpy as np
import matplotlib.pyplot as plt
import time
%matplotlib inline

## Record gameplay.
Call recorder.stop() to interrupt recording.

In [None]:
import gta.recording.unified
recording = gta.recording.unified.UnifiedRecorder(
    includeKeyboard=False,
    gamepadPeriod=.001, visionPeriod=.05
)
recording.start()
time.sleep(10)

In [None]:
print('Images are ... ', end='')
while True:
    X = recording.xrecorder.results
    print('%.2fGB ' % (X.size * X.dtype.itemsize / 1024 / 1024 / 1024,), end='')
    time.sleep(4)

In [None]:
recording.stop()

In [None]:
X = recording.xrecorder.results
print('%.2fGB ' % (X.size * X.dtype.itemsize / 1024 / 1024 / 1024,), end='')
del X

In [None]:
saved = recording.save(compressed=False)

In [None]:
1.69e3/60

In [None]:
saved['Y'].size / 1024

Takes about 4 times as long if compression is indicated.

## Load and use data.

In [None]:
import sys
sys.path.append('../src')

In [None]:
from importlib import reload
import numpy as np
import matplotlib.pyplot as plt
import time
import tqdm
%matplotlib inline

In [None]:
import os
import gta.utils, gta.eventIDs

In [None]:
keepEids = [0, 2, 5]

In [None]:
%%time
fpath = os.path.join(gta.utils.home, 'data', 'gta', 'UnifiedRecorder-1509210169.3261402.npz')

try:
    saved.keys()
except NameError:
    # Data not loaded.
    saved = np.load(fpath)
DT = np.diff(saved['T'])
X = saved['X'][1:]
# Only keep the first 6 features (gamepad; leave out buttons)
Y = saved['Y'][1:][:, keepEids]

Normalize the data.

In [None]:
# data is too big for this
# X = (X.astype('float32') - 128) / 256

In [None]:
def normalize(mat):
    return (mat.astype('float32') - 127.5) / 255

What does the distribution of outputs look like?

In [None]:
fig, ax = plt.subplots()
for col, eid in enumerate(keepEids):
    label = gta.eventIDs.eids2names[eid]
    ax.hist(Y[:, col], label=label, alpha=.2, normed=True, bins=32)
ax.legend();

What does a randomly-chosen image look like?

In [None]:
def imsh(im):
    fig, ax = plt.subplots()
    ax.imshow(im.astype('uint8'))
    ax.set_xticks([]); ax.set_yticks([]);
    return fig, ax

In [None]:
imsh(X[402]);

What does the average image look like??

In [None]:
avImg = np.sum(X, axis=0) / len(X)

In [None]:
imsh(avImg);

A dark road in the middle, hillside on the right, a constant HUD, a faint overlay of a parking lot at the very end, and ... Task Manager at bottom right. Should have taken that out. :/

### Split data

In [None]:
splits = int(len(DT) * .8), int(len(DT) * .9)

def s(a, b):
    return X[a:b], Y[a:b], DT[a:b]

X_train, Y_train, DT_train = s(0, splits[0])
X_test, Y_test, DT_test = s(splits[0], splits[1])
X_valid, Y_valid, DT_valid = s(splits[1], -1)
X_train.shape, X_test.shape, X_valid.shape

In [None]:
n_train = len(DT_train)
n_test = len(DT_test)
n_valid = len(DT_valid)
n_train, n_test, n_valid

In [None]:
image_shape = X_train.shape[1:]

In [None]:
import tensorflow as tf
import gta.nn
reload(gta.nn);

In [None]:
x = tf.placeholder(tf.float32, (None, *image_shape), name='images')
y = tf.placeholder(tf.float32, (None, 3), name='gamepad_axes')

In [None]:
class Arch(gta.nn.ConvNet):
    
    def _addConv2d(self, *args, **kwargs):
        out = super(self.__class__, self)._addConv2d(*args, **kwargs)
        print(out.shape)
        return out
    
    def __call__(self, x, name='predictions'):
        td = self._addConv2d
        fc = self._addFc
        #self.keep_prob = tf.placeholder_with_default(.5, shape=())
        
        x = td(x, (8, 8, self.c, 12), padding='SAME', pooling=False)
        x = td(x, (8, 8, int(x.shape[-1]), 12), padding='VALID')
        #x = tf.nn.dropout(x, self.keep_prob)
        
        x = td(x, (3, 3, int(x.shape[-1]), 16), padding='SAME')#, pooling=False)
        x = td(x, (3, 3, int(x.shape[-1]), 16), padding='VALID')
        
        x = td(x, (3, 3, int(x.shape[-1]), 32), padding='SAME')#, pooling=False)
        x = td(x, (3, 3, int(x.shape[-1]), 32), padding='VALID')
        
        x = td(x, (3, 3, int(x.shape[-1]), 64), padding='SAME')#, pooling=False)
        x = td(x, (3, 3, int(x.shape[-1]), 64), padding='VALID')
        
        x = tf.contrib.layers.flatten(x)
        
        x = fc(x, (int(x.shape[-1]), 32))
        x = fc(x, (int(x.shape[-1]), Y.shape[1]), name=name)
        
        return x

net = Arch(c=image_shape[-1])
z = net(x)
y, z

### Train

In [None]:
def breakBatches(X_data, Y_data, desc=None):
    num_examples = len(X_data)
    assert num_examples == len(Y_data)
    for offset in tqdm.tqdm_notebook(list(range(0, num_examples, BATCH_SIZE)), desc=desc, unit='batch'):
        batch_x, batch_y = X_data[offset:offset+BATCH_SIZE], Y_data[offset:offset+BATCH_SIZE]
        batch_x = normalize(batch_x)
        yield batch_x, batch_y

In [None]:
def evaluate(X_data, y_data, sess=None, extraFeedDict={}, desc='validation', givePredictions=False):
    if hasattr(net, 'keep_prob'):
        extraFeedDict.setdefault(net.keep_prob, 1.0)
    num_examples = len(X_data)
    total_accuracy = 0
    if sess is None: sess = tf.get_default_session()
        
    num = 0
    den = 0
    
    predictions = []
    
    for batch_x, batch_y in breakBatches(X_train, Y_train, desc=desc):
        fd = {x: batch_x, y: batch_y}
        fd.update(extraFeedDict)
        run = lambda inp: sess.run(inp, feed_dict=fd)
        
        # TODO: verify that this reduce_sum operates over rows and columns.
        zeval = run(z)
        if givePredictions:
            predictions.append(zeval)
        num += run(tf.reduce_sum((y - zeval) ** 2))
        den += batch_y.size
        
    if givePredictions:
        return num / den, predictions
    return num / den

[Queue instructions.](http://ischlag.github.io/2016/11/07/tensorflow-input-pipeline-for-large-datasets/)

Evaluate the untrained network.

Build a FIFOQueue

In [None]:
import threading
raw_data = X_train
raw_target = Y_train

In [None]:
EPOCHS = 10
BATCH_SIZE = 16
CHUNK_SIZE = BATCH_SIZE

In [None]:
queue_input_data = tf.placeholder(tf.float32, shape=(CHUNK_SIZE, *image_shape))
queue_input_target = tf.placeholder(tf.float32, shape=[CHUNK_SIZE, Y_train.shape[1]])

queue = tf.FIFOQueue(
    capacity=50, dtypes=[tf.float32, tf.float32], 
    shapes=[image_shape, Y_train.shape[1:]]
)

enqueue_op = queue.enqueue_many([queue_input_data, queue_input_target])
dequeue_op = queue.dequeue()

In [None]:
# tensorflow recommendation:
# capacity = min_after_dequeue + 
#      (num_threads + a small safety margin) * batch_size
data_batch, target_batch = tf.train.batch(
    dequeue_op, batch_size=BATCH_SIZE, capacity=40
)

Start the Threads

In [None]:
def enqueue(sess):
    under = 0
    max = len(raw_data)
    print("starting to write into queue")
    while True:
        upper = under + CHUNK_SIZE
        if upper <= max:
            curr_data = raw_data[under:upper]
            curr_target = raw_target[under:upper]
            under = upper
        else:
            rest = upper - max
            curr_data = np.concatenate((
                raw_data[under:max], raw_data[0:rest]
            ))
            curr_target = np.concatenate((
                raw_target[under:max], raw_target[0:rest]
            ))
            under = rest

        sess.run(enqueue_op, feed_dict={queue_input_data: curr_data,
                                        queue_input_target: curr_target})
    print("finished enqueueing")

In [None]:
loss = tf.losses.mean_squared_error(y, z)
learning_rate = .0001
training_operation = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

In [None]:
saver = tf.train.Saver()

In [None]:
with tf.Session() as sess:
    # start the threads for our FIFOQueue and batch
    enqueue_thread = threading.Thread(target=enqueue, args=[sess])
    enqueue_thread.isDaemon()
    enqueue_thread.start()

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord, sess=sess)
    
    sess.run(tf.global_variables_initializer())
    num_examples = len(X_train)
    nchunks = int(np.ceil(len(X_train) / CHUNK_SIZE))

    MSEs = []
    for iepoch in tqdm.tqdm_notebook(range(EPOCHS), unit='epoch', total=EPOCHS):
        for b in tqdm.tqdm_notebook(
            range(nchunks),
            unit='chunk',
            total=nchunks,
            desc='epoch %d' % (iepoch+1,)
        ):
            # Fetch the data from the pipeline.
            run_options = tf.RunOptions(timeout_in_ms=4000)
            batch_x, batch_y = sess.run(
                [data_batch, target_batch], options=run_options
            )
            batch_x = normalize(batch_x)
            
            # Do the training.
            sess.run(training_operation, feed_dict={x: batch_x, y: batch_y})
        mse = evaluate(X_valid, Y_valid, sess)
        MSEs.append(mse)
        print('MSE=%s' % mse)
    saver.save(sess, './gtaArch')

    # Shutdown everything to avoid zombies.
    # Once we are done with our input 
    # pipeline we should stop all running threads 
    # before closing the session.
    sess.run(queue.close(cancel_pending_enqueues=True))
    coord.request_stop()
    coord.join(threads)

In [None]:
fig, ax = plt.subplots()
ax.plot(MSEs)
ax.set_xlabel('epoch')
ax.set_ylabel('validation MSE');

Evaluate on all the data.

In [None]:
with tf.Session() as sess:
    saver.restore(sess, tf.train.latest_checkpoint('.'))
    extraFeedDict = {}
    if hasattr(net, 'keep_prob'):
        extraFeedDict.setdefault(net.keep_prob, 1.0)
        
    Z = {}
    for k, Xd, Yd in zip(
        ['train', 'test', 'valid'],
        [X_train, X_test, X_valid],
        [Y_train, Y_test, Y_valid],
    ):
#         zk = []
#         for batch_x, batch_y in breakBatches(Xd, Yd, desc=k):
#             fd = {x: batch_x, y: batch_y}
#             fd.update(extraFeedDict)
#             zk.append(sess.run(z, feed_dict=fd))
#         Z[k] = zk
        Z[k] = evaluate(Xd, Yd, desc=k, givePredictions=True)
        print('MSE:', Z[k][0])

In [None]:
for k in Z:
    Z[k] = Z[k][0], np.vstack(Z[k][1])

Did the network predict only zeros??

In [None]:
for k in Z:
    print((Z[k][1] == 0).all())

In [None]:
fig, ax = plt.subplots()
for i, eid in enumerate(keepEids):
#     i = 0
#     eid = keepEids[i]
    label = gta.eventIDs.eids2names[eid]
    color = ['red', 'blue', 'green'][i]
    ax.plot(Y_train[:, i], label=label, color=color)
    if i == 0: label='%s prediction' % label
    else: label=None
    ax.plot(Z['train'][1][:, i], color=color, linestyle='--', label=label)
ax.legend(fontsize=10)