In [1]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

In [2]:
tf.set_random_seed(1)

In [3]:
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

Extracting MNIST_data\train-images-idx3-ubyte.gz
Extracting MNIST_data\train-labels-idx1-ubyte.gz
Extracting MNIST_data\t10k-images-idx3-ubyte.gz
Extracting MNIST_data\t10k-labels-idx1-ubyte.gz


In [4]:
# hyperparameters
lr = 0.001
training_iters = 100000
batch_size = 128

n_inputs = 28   # MNIST data input (img shape: 28*28)
n_steps = 28    # time steps
n_hidden_units = 128   # neurons in hidden layer
n_classes = 10      # MNIST classes (0-9 digits)

In [5]:
# tf Graph input
x = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_classes])

In [6]:
print(x.shape)
print(y.shape)

(?, 28, 28)
(?, 10)


In [7]:
# Define weights
weights = {
    # (28, 128)
    'in': tf.Variable(tf.random_normal([n_inputs, n_hidden_units])),
    # (128, 10)
    'out': tf.Variable(tf.random_normal([n_hidden_units, n_classes]))
}
biases = {
    # (128, )
    'in': tf.Variable(tf.constant(0.1, shape=[n_hidden_units, ])),
    # (10, )
    'out': tf.Variable(tf.constant(0.1, shape=[n_classes, ]))
}

In [8]:
print(weights['in'].shape)
print(weights['out'].shape)
print(biases['in'].shape)
print(biases['out'].shape)

(28, 128)
(128, 10)
(128,)
(10,)


In [9]:
def RNN(X, weights, biases):
    print(X.shape)
    X = tf.reshape(X, [-1, n_inputs])
    print(X.shape)
    X_in = tf.matmul(X, weights['in']) + biases['in']
    print(X_in.shape)
    X_in = tf.reshape(X_in, [-1, n_steps, n_hidden_units])
    print(X_in.shape)
    
    cell = tf.contrib.rnn.BasicLSTMCell(n_hidden_units)
    init_state = cell.zero_state(batch_size, dtype=tf.float32)
    outputs, final_state = tf.nn.dynamic_rnn(cell, X_in, initial_state=init_state, time_major=False)
    print(outputs.shape)
    print(outputs[-1].shape)
    outputs = tf.unstack(tf.transpose(outputs, [1,0,2]))
    #print(outputs.shape)
    print(outputs[-1].shape)
    results = tf.matmul(outputs[-1], weights['out']) + biases['out']
    print(results.shape)
    return results

In [10]:
pred = RNN(x, weights, biases)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=y))
train_op = tf.train.AdamOptimizer(lr).minimize(cost)

(?, 28, 28)
(?, 28)
(?, 128)
(?, 28, 128)
(128, 28, 128)
(28, 128)
(128, 128)
(128, 10)


In [11]:
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [12]:
X, Y = mnist.train.next_batch(batch_size)
print(X.shape)
print(Y.shape)
X = X.reshape([batch_size, n_steps, n_inputs])
print(X.shape)


(128, 784)
(128, 10)
(128, 28, 28)


In [13]:
with tf.Session() as sess:
    # tf.initialize_all_variables() no long valid from
    # 2017-03-02 if using tensorflow >= 0.12
    if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1:
        init = tf.initialize_all_variables()
    else:
        init = tf.global_variables_initializer()
    sess.run(init)
    step = 0
    while step * batch_size < training_iters:
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        batch_xs = batch_xs.reshape([batch_size, n_steps, n_inputs])
        sess.run([train_op], feed_dict={
            x: batch_xs,
            y: batch_ys,
        })
        if step % 20 == 0:
            print(sess.run(accuracy, feed_dict={
            x: batch_xs,
            y: batch_ys,
            }))
        step += 1

0.3046875
0.5625
0.765625
0.8203125
0.828125
0.8671875
0.859375
0.875
0.9375
0.9140625
0.9296875
0.921875
0.875
0.890625
0.96875
0.9296875
0.9609375
0.9375
0.953125
0.9453125
0.96875
0.96875
0.9765625
0.984375
0.9765625
0.9921875
0.9453125
0.953125
0.9765625
0.96875
0.96875
0.9765625
0.9921875
0.9765625
0.9921875
0.9765625
0.9765625
0.9609375
0.984375
0.953125
