### Recurrent NN _ mnist

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

In [2]:
from tensorflow.examples.tutorials.mnist import input_data

In [5]:
mnist = input_data.read_data_sets("./mnist/data/", one_hot=True)

Extracting ./mnist/data/train-images-idx3-ubyte.gz
Extracting ./mnist/data/train-labels-idx1-ubyte.gz
Extracting ./mnist/data/t10k-images-idx3-ubyte.gz
Extracting ./mnist/data/t10k-labels-idx1-ubyte.gz


In [6]:
lr = 1e-3
total_epoch = 30
batch_size = 128
dropout_prob = 1
loadModel = False

'''
As RNN handles sequantial data, we need to fix the number of inputs 
(nInput) and how many the inputs are covered (nStep).
- row: nInput
- col: nStep
'''
nInput = 28
nStep = 28

In [8]:
tf.reset_default_graph()

globalStep = tf.Variable(0, trainable=False, name='global_step')

X = tf.placeholder(tf.float32, [None, nStep, nInput])
Y = tf.placeholder(tf.float32, [None, 10])
dropoutProb = tf.placeholder(tf.float32)

# with tf.name_scope('layer1', reuse=tf.AUTO_REUSE):
W = tf.Variable(tf.random_normal([128, 10]))
b = tf.Variable(tf.random_normal([10]))
cell = tf.nn.rnn_cell.BasicRNNCell(128)

'''
Originally, we need to do this:
states = tf.zeros(batch_size)
for i in range(n_step):
    outputs, states = cell(X[[:, i]], states)
    ...
    
This can be done by tf.nn.dynamic_rnn
'''
outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)

'''
Y needs to have the shape of [batch_size, n_class]. Thus,
outputs : [batch_size, n_step, n_hidden]
       -> [n_step, batch_size, n_hidden]
       -> [batch_size, n_hidden]
'''
outputs = tf.transpose(outputs, [1, 0, 2])
outputs = outputs[-1]
output = tf.matmul(outputs, W) + b

#     L1 = tf.nn.relu(tf.matmul(X, W1))
#     L1 = tf.nn.dropout(L1, dropoutProb)
    
#     tf.summary.histogram("X", X)
#     tf.summary.histogram("Weights", W1)
# with tf.name_scope('layer2'):
#     W2 = tf.Variable(tf.random_normal([256, 256], stddev=0.01), name='W2')
#     L2 = tf.nn.relu(tf.matmul(L1, W2))
#     L2 = tf.nn.dropout(L2, dropoutProb)
    
#     tf.summary.histogram("Weights", W2)

# with tf.name_scope('layer3'):
#     W3 = tf.Variable(tf.random_normal([256, 10], stddev=0.01), name='W3')
#     output = tf.matmul(L2, W3)
    
#     tf.summary.histogram("Weights", W3)
#     tf.summary.histogram("Output", output)

with tf.name_scope('optimizer'):
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=output, labels=Y))
    optimizer = tf.train.AdamOptimizer(lr).minimize(loss, global_step=globalStep)
    
    # to collect the data I want to see
    tf.summary.scalar('loss', loss)
    

In [9]:
sess = tf.Session()

# saver ===============================================
saver = tf.train.Saver(tf.global_variables())
ckpt = tf.train.get_checkpoint_state('./model')
if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path) and loadModel:
    saver.restore(sess, ckpt.model_checkpoint_path)
    print(">>> Loaded the model .......")
else:
    sess.run(tf.global_variables_initializer())

# tensorboard =========================================
'''
on terminal,   tensorboard --logdir=./logs  
and then, we can see it on      http://localhost:6006
'''
merged = tf.summary.merge_all()
writer = tf.summary.FileWriter('./logs', sess.graph)

# start training ======================================
total_batch = int(mnist.train.num_examples/batch_size)

for epoch in range(total_epoch):
    total_cost = 0
    
    for i in range(total_batch):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        batch_xs = batch_xs.reshape((batch_size, nStep, nInput))
        
        summary, _, cost, _ = sess.run([merged, optimizer, loss, globalStep], \
                          {X: batch_xs, Y: batch_ys, dropoutProb: dropout_prob})
        total_cost += cost
        
#         summary = sess.run(merged, {X: batch_xs, Y: batch_ys, dropoutProb: dropout_prob})
        writer.add_summary(summary, global_step=sess.run(globalStep))
        
    print('Epoch: {:.1f}  |   avg. cost= {:.3f}'.format(epoch + 1, total_cost/total_batch))

saver.save(sess, './model/mnistModel.ckpt', global_step=globalStep)
print(">>> Complete and saved the model !!!")

Epoch: 1.0  |   avg. cost= 0.550
Epoch: 2.0  |   avg. cost= 0.235
Epoch: 3.0  |   avg. cost= 0.187
Epoch: 4.0  |   avg. cost= 0.151
Epoch: 5.0  |   avg. cost= 0.139
Epoch: 6.0  |   avg. cost= 0.135
Epoch: 7.0  |   avg. cost= 0.111
Epoch: 8.0  |   avg. cost= 0.111
Epoch: 9.0  |   avg. cost= 0.105
Epoch: 10.0  |   avg. cost= 0.103
Epoch: 11.0  |   avg. cost= 0.093
Epoch: 12.0  |   avg. cost= 0.092
Epoch: 13.0  |   avg. cost= 0.090
Epoch: 14.0  |   avg. cost= 0.088
Epoch: 15.0  |   avg. cost= 0.087
Epoch: 16.0  |   avg. cost= 0.079
Epoch: 17.0  |   avg. cost= 0.078
Epoch: 18.0  |   avg. cost= 0.074
Epoch: 19.0  |   avg. cost= 0.075
Epoch: 20.0  |   avg. cost= 0.077
Epoch: 21.0  |   avg. cost= 0.074
Epoch: 22.0  |   avg. cost= 0.069
Epoch: 23.0  |   avg. cost= 0.074
Epoch: 24.0  |   avg. cost= 0.067
Epoch: 25.0  |   avg. cost= 0.063
Epoch: 26.0  |   avg. cost= 0.065
Epoch: 27.0  |   avg. cost= 0.061
Epoch: 28.0  |   avg. cost= 0.064
Epoch: 29.0  |   avg. cost= 0.063
Epoch: 30.0  |   avg. c

In [10]:
is_correct = tf.equal(tf.argmax(output, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))

test_batch_size = len(mnist.test.images)
test_xs = mnist.test.images.reshape(test_batch_size, nStep, nInput)
test_ys = mnist.test.labels

print('Accuracy:', sess.run(accuracy,
                        feed_dict={X: test_xs,
                                   Y: test_ys,
                                   dropoutProb: 1}))

ResourceExhaustedError: OOM when allocating tensor with shape[10000] and type int64 on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node ArgMax_1}} = ArgMax[T=DT_FLOAT, Tidx=DT_INT32, output_type=DT_INT64, _device="/job:localhost/replica:0/task:0/device:GPU:0"](_arg_Placeholder_1_0_1/_53, rnn/TensorArrayUnstack/range/delta)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

	 [[{{node Mean/_61}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_140_Mean", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.


Caused by op 'ArgMax_1', defined at:
  File "/usr/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/wonchul/.virtualenvs/p3/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/wonchul/.virtualenvs/p3/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/wonchul/.virtualenvs/p3/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 505, in start
    self.io_loop.start()
  File "/home/wonchul/.virtualenvs/p3/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "/usr/lib/python3.6/asyncio/base_events.py", line 422, in run_forever
    self._run_once()
  File "/usr/lib/python3.6/asyncio/base_events.py", line 1432, in _run_once
    handle._run()
  File "/usr/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/home/wonchul/.virtualenvs/p3/lib/python3.6/site-packages/tornado/ioloop.py", line 758, in _run_callback
    ret = callback()
  File "/home/wonchul/.virtualenvs/p3/lib/python3.6/site-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/wonchul/.virtualenvs/p3/lib/python3.6/site-packages/tornado/gen.py", line 1233, in inner
    self.run()
  File "/home/wonchul/.virtualenvs/p3/lib/python3.6/site-packages/tornado/gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "/home/wonchul/.virtualenvs/p3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 370, in dispatch_queue
    yield self.process_one()
  File "/home/wonchul/.virtualenvs/p3/lib/python3.6/site-packages/tornado/gen.py", line 346, in wrapper
    runner = Runner(result, future, yielded)
  File "/home/wonchul/.virtualenvs/p3/lib/python3.6/site-packages/tornado/gen.py", line 1080, in __init__
    self.run()
  File "/home/wonchul/.virtualenvs/p3/lib/python3.6/site-packages/tornado/gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "/home/wonchul/.virtualenvs/p3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 357, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/home/wonchul/.virtualenvs/p3/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/home/wonchul/.virtualenvs/p3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 267, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/home/wonchul/.virtualenvs/p3/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/home/wonchul/.virtualenvs/p3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 534, in execute_request
    user_expressions, allow_stdin,
  File "/home/wonchul/.virtualenvs/p3/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/home/wonchul/.virtualenvs/p3/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/wonchul/.virtualenvs/p3/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/wonchul/.virtualenvs/p3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2817, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/home/wonchul/.virtualenvs/p3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2843, in _run_cell
    return runner(coro)
  File "/home/wonchul/.virtualenvs/p3/lib/python3.6/site-packages/IPython/core/async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "/home/wonchul/.virtualenvs/p3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3018, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/wonchul/.virtualenvs/p3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3183, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "/home/wonchul/.virtualenvs/p3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3265, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-10-36b595707af3>", line 1, in <module>
    is_correct = tf.equal(tf.argmax(output, 1), tf.argmax(Y, 1))
  File "/home/wonchul/.virtualenvs/p3/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 488, in new_func
    return func(*args, **kwargs)
  File "/home/wonchul/.virtualenvs/p3/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py", line 88, in argmax
    return gen_math_ops.arg_max(input, axis, name=name, output_type=output_type)
  File "/home/wonchul/.virtualenvs/p3/lib/python3.6/site-packages/tensorflow/python/ops/gen_math_ops.py", line 787, in arg_max
    name=name)
  File "/home/wonchul/.virtualenvs/p3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/wonchul/.virtualenvs/p3/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 488, in new_func
    return func(*args, **kwargs)
  File "/home/wonchul/.virtualenvs/p3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3272, in create_op
    op_def=op_def)
  File "/home/wonchul/.virtualenvs/p3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1768, in __init__
    self._traceback = tf_stack.extract_stack()

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[10000] and type int64 on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node ArgMax_1}} = ArgMax[T=DT_FLOAT, Tidx=DT_INT32, output_type=DT_INT64, _device="/job:localhost/replica:0/task:0/device:GPU:0"](_arg_Placeholder_1_0_1/_53, rnn/TensorArrayUnstack/range/delta)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

	 [[{{node Mean/_61}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_140_Mean", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.



In [None]:
labels = sess.run(output,
                  feed_dict={X: test_xs,
                             Y: test_ys,
                             dropoutProb: 1})

fig = plt.figure()
for i in range(10):
    subplot = fig.add_subplot(2, 5, i + 1)
    subplot.set_xticks([])
    subplot.set_yticks([])
    subplot.set_title('%d' % np.argmax(labels[i]))
    subplot.imshow(mnist.test.images[i].reshape((28, 28)),
                   cmap=plt.cm.gray_r)

plt.savefig('./figs/mnist_results.png')
plt.show()