# Long Short Term Memory
이번 튜토리얼에서는 LSTM을 이용해 many-to-one model을 만들어 mnist classification을 해보도록 하겠습니다. 이번 튜토리얼의 목적은 tf.nn.rnn_cell.BasicLSTMCell과 tf.nn.dynamic_rnn을 제대로 이해하는 것 입니다.

In [2]:
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import os
%matplotlib inline

mnist = input_data.read_data_sets("./mnist/", one_hot=False)

Extracting ./mnist/train-images-idx3-ubyte.gz
Extracting ./mnist/train-labels-idx1-ubyte.gz
Extracting ./mnist/t10k-images-idx3-ubyte.gz
Extracting ./mnist/t10k-labels-idx1-ubyte.gz


In [2]:
num_epochs = 15
batch_size = 100
seq_length = 28   
dim_input = 28
dim_hidden = 128
dim_out = 10
model_path = './model'
num_layers = 4
keep_prob = 0.7

In [3]:
x = tf.placeholder(dtype=tf.float32, shape=[None, 28, 28], name='inputs')  
y = tf.placeholder(dtype=tf.int64, shape=[None], name='labels')

[tf.nn.rnn_cell](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/rnn_cell.py/#BasicLSTMCell)

[tf.nn.rnn](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/rnn.py)

In [4]:
def lstm(x, mode='train'):
    with tf.variable_scope('lstm') as scope:
        lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=dim_hidden)
        if mode == 'train':
            lstm_cell = tf.nn.rnn_cell.DropoutWrapper(cell=lstm_cell, output_keep_prob=keep_prob)
        lstm_cell = tf.nn.rnn_cell.MultiRNNCell(cells=[lstm_cell]*num_layers, state_is_tuple=True)
        outputs, states = tf.nn.dynamic_rnn(cell=lstm_cell, inputs=x, dtype=tf.float32, scope=scope)
  
    with tf.variable_scope('logits'):
        w = tf.get_variable('w', shape=[dim_hidden, dim_out], initializer=tf.random_normal_initializer())
        b = tf.get_variable('b', shape=[dim_out], initializer=tf.constant_initializer(0.0))
        out = tf.matmul(tf.reshape(outputs[:, -1, :], [-1, dim_hidden]), w) + b
        return out

In [5]:
out = lstm(x, mode='train')
tf.get_variable_scope().reuse_variables()
out2 = lstm(x, mode='test')

loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(out, y))
optimizer = tf.train.RMSPropOptimizer(learning_rate=0.001).minimize(loss)

pred = tf.argmax(out2, 1) # batch_size

correct_pred = tf.equal(pred, y)
incorrect_pred = tf.not_equal(pred, y)

accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [6]:
if not os.path.exists(model_path):
    os.makedirs(model_path)

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
with tf.Session(config=config) as sess:
    # initialize tensor variables
    tf.initialize_all_variables().run()
    saver = tf.train.Saver(max_to_keep=15)
    # training cycle
    for epoch in range(15):
        avg_loss = 0.
        n_iters_per_epoch = int(mnist.train.num_examples / batch_size)
        # loop over all batches
        for i in range(n_iters_per_epoch):
            x_batch, y_batch = mnist.train.next_batch(batch_size)
            # reshape inputs to batch_sizex seq_length x dim_input 
            x_batch = x_batch.reshape(-1, seq_length, dim_input)
            # run optimization op (backprop) and loss op (to get loss value)
            _, c = sess.run([optimizer, loss], feed_dict={x: x_batch, y: y_batch})
            # compute average loss
            avg_loss += c / n_iters_per_epoch
        print "Epoch %d, Loss: %.3f"% (epoch+1, avg_loss)
        saver.save(sess, os.path.join(model_path, 'lstm_model'), global_step=epoch+1)
    print "Finished training!"

Epoch 1, Loss: 0.706
Epoch 2, Loss: 0.155
Epoch 3, Loss: 0.100
Epoch 4, Loss: 0.073
Epoch 5, Loss: 0.059
Epoch 6, Loss: 0.049
Epoch 7, Loss: 0.042
Epoch 8, Loss: 0.037
Epoch 9, Loss: 0.031
Epoch 10, Loss: 0.029
Epoch 11, Loss: 0.026
Epoch 12, Loss: 0.022
Epoch 13, Loss: 0.021
Epoch 14, Loss: 0.019
Epoch 15, Loss: 0.018
Finished training!


In [8]:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
with tf.Session(config=config) as sess:
    saver = tf.train.Saver()
    saver.restore(sess, os.path.join(model_path, 'lstm_model-15'))
    avg_accuracy = 0.0
    n_iters_per_epoch = int(mnist.test.num_examples / batch_size)
    for i in range(n_iters_per_epoch):
        x_batch, y_batch = mnist.train.next_batch(batch_size)
        # reshape inputs to batch_sizex seq_length x dim_input 
        x_batch = x_batch.reshape(-1, seq_length, dim_input)
        # run optimization op (backprop) and loss op (to get loss value)
        acc = sess.run(accuracy, feed_dict={x: x_batch, y: y_batch})
        # compute average loss
        avg_accuracy += acc / n_iters_per_epoch
    print "Test accuracy: ", avg_accuracy

Test accuracy:  0.996099970937
