In [1]:
%matplotlib inline 
from __future__ import print_function, division
import numpy as np
import matplotlib.pyplot as plt 
import tensorflow as tf
import IPython.display as ipyd 

from IPython.core.display import HTML
HTML("""<style> .rendered_html code {
            padding: 2px 4px;
            color: #c7254e;
            background-color: #f9f2f4;
            border-radius: 4px;
        } </style>""")

In [15]:
# number of time to loop over the data over and over again
num_epochs = 10

state_size = 4

# one hot output (dimensions of the ouput vector)
num_classes = 2

# total length of the data
total_series_length = 50000

# how much to shift the input to the right to get the output
echo_step = 3

# number of rows for the data
batch_size = 5
# window size for gradient descent
truncated_backprop_length = 15
# total number of batches
num_batches = total_series_length//batch_size//truncated_backprop_length

In [3]:
def generateData():
    # input data
    x = np.array(np.random.choice(2, total_series_length, p=[0.5, 0.5]))
    # shift the data to the the right and fill the new entries with 0
    y = np.roll(x, echo_step)
    y[0:echo_step] = 0

    # reshape to matrix
    x = x.reshape((batch_size, -1))
    y = y.reshape((batch_size, -1))

    return (x, y)

In [4]:
# holds input values for one batch
batchX_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])
# holds output values for one batch
batchY_placeholder = tf.placeholder(tf.int32, [batch_size, truncated_backprop_length])
# state vectors for all the batches
init_state = tf.placeholder(tf.float32, [batch_size, state_size])

In [5]:
# This is W
# since x_t is only one dimensional, dim(U) is 4 x 1, so we can just include one more column in W that
# will represent U
W = tf.Variable(np.random.rand(state_size+1, state_size), dtype=tf.float32)
b = tf.Variable(np.zeros((1,state_size)), dtype=tf.float32)

# this is V
W2 = tf.Variable(np.random.rand(state_size, num_classes), dtype=tf.float32)
b2 = tf.Variable(np.zeros((1,num_classes)), dtype=tf.float32)

In [6]:
# multiple steps will be trained simultaneously
# get column vectors (dimension batch sizes) - window size number of vectors
inputs_series = tf.unstack(batchX_placeholder, axis=1)
labels_series = tf.unstack(batchY_placeholder, axis=1)

In [8]:
# Forward pass
current_state = init_state
states_series = []
for current_input in inputs_series:
    # make column vector (dimension batch size)
    current_input = tf.reshape(current_input, [batch_size, 1])
    # make input and state of the whole batch as one matrix to ease computation
    input_and_state_concatenated = tf.concat([current_input, current_state], 1)

    # next state for all the batches computation all at once
    next_state = tf.tanh(tf.matmul(input_and_state_concatenated, W) + b)
    # add next state to the states array
    states_series.append(next_state)
    # update the current state
    current_state = next_state

In [9]:
# errors are backpropogated only through the specified window that we are currently working on

# get predicted ouputs for all the batches - all the timesteps
logits_series = [tf.matmul(state, W2) + b2 for state in states_series]
predictions_series = [tf.nn.softmax(logits) for logits in logits_series]

In [12]:
# compute losses
losses = [tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels) for logits, labels in zip(logits_series,labels_series)]
total_loss = tf.reduce_mean(losses)
train_step = tf.train.AdagradOptimizer(0.3).minimize(total_loss)

In [17]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    loss_list = []

    for epoch_idx in range(num_epochs):
        x,y = generateData()
        _current_state = np.zeros((batch_size, state_size))

        print("New data, epoch", epoch_idx)

        for batch_idx in range(num_batches):
            start_idx = batch_idx * truncated_backprop_length
            end_idx = start_idx + truncated_backprop_length

            batchX = x[:,start_idx:end_idx]
            batchY = y[:,start_idx:end_idx]

            _total_loss, _train_step, _current_state, _predictions_series = sess.run(
                [total_loss, train_step, current_state, predictions_series],
                feed_dict={
                    batchX_placeholder:batchX,
                    batchY_placeholder:batchY,
                    init_state:_current_state
                })

            loss_list.append(_total_loss)

            if batch_idx%100 == 0:
                print("Step",batch_idx, "Loss", _total_loss)

New data, epoch 0
Step 0 Loss 0.951066
Step 100 Loss 0.450755
Step 200 Loss 0.243598
Step 300 Loss 0.00649469
Step 400 Loss 0.00387137
Step 500 Loss 0.00302281
Step 600 Loss 0.00265746
New data, epoch 1
Step 0 Loss 0.212604
Step 100 Loss 0.00150475
Step 200 Loss 0.0013337
Step 300 Loss 0.00112596
Step 400 Loss 0.000966698
Step 500 Loss 0.000778024
Step 600 Loss 0.000761148
New data, epoch 2
Step 0 Loss 0.347741
Step 100 Loss 0.00178258
Step 200 Loss 0.000953126
Step 300 Loss 0.000862559
Step 400 Loss 0.000733374
Step 500 Loss 0.00068691
Step 600 Loss 0.000598636
New data, epoch 3
Step 0 Loss 0.261991
Step 100 Loss 0.000641246
Step 200 Loss 0.000524248
Step 300 Loss 0.000453691
Step 400 Loss 0.000433275
Step 500 Loss 0.000443277
Step 600 Loss 0.000391661
New data, epoch 4
Step 0 Loss 0.215276
Step 100 Loss 0.000375752
Step 200 Loss 0.000341665
Step 300 Loss 0.000356488
Step 400 Loss 0.00037756
Step 500 Loss 0.000273282
Step 600 Loss 0.000327478
New data, epoch 5
Step 0 Loss 0.285965
Ste