In [10]:
import tensorflow as tf
import numpy as np
import timeit
from sklearn.utils import shuffle
old_v = tf.logging.get_verbosity()
tf.logging.set_verbosity(tf.logging.ERROR)

<h1>Extract MNIST data</h1>

In [11]:
from tensorflow.examples.tutorials.mnist import input_data
#get mnist data, with one_hot encoding, reshape = False (that means images are not flatten)
mnist = input_data.read_data_sets("MNIST_data/",reshape=False,one_hot=True)
#suppress warnings
tf.logging.set_verbosity(old_v)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


<h1>Prepare training, validation and testing data</h1>

In [12]:
x_train, y_train           = mnist.train.images, mnist.train.labels
x_validation, y_validation = mnist.validation.images, mnist.validation.labels
x_test, y_test             = mnist.test.images, mnist.test.labels

#pad images with 0s (28x28 to 32x32)
x_train = np.pad(x_train, ((0,0),(2,2),(2,2),(0,0)), 'constant')
x_test = np.pad(x_test, ((0,0),(2,2),(2,2),(0,0)), 'constant')

<h1>Define hyperparameter</h1>

In [13]:
lr = 0.001
epochs = 10
batch_size = 32
num_classes = 10
W = 32
H = 32
channel = 1

In [14]:
tf.reset_default_graph()

<h1>Placeholder</h1>

In [15]:
X = tf.placeholder(tf.float32,[None,W, H, channel],name='X')
Y = tf.placeholder(tf.int32,[None,num_classes],name='Y')

<h1>Define LeNet-5</h1>

In [16]:
def LeNet(x):    
    # Conv Layer 1
    conv1_w = tf.Variable(tf.truncated_normal(shape=[5,5,1,6], mean=0, stddev=0.1))
    conv1_b = tf.Variable(tf.zeros(6))
    conv1 = tf.nn.conv2d(x,conv1_w, strides=[1,1,1,1], padding='VALID') + conv1_b
    conv1 = tf.nn.relu(conv1)

    # Pooling Layer 1
    pool_1 = tf.nn.max_pool(conv1, ksize=[1,2,2,1], strides=[1,2,2,1], padding='VALID')
    
    # Conv Layer 2
    conv2_w = tf.Variable(tf.truncated_normal(shape=[5,5,6,16], mean=0, stddev=0.1))
    conv2_b = tf.Variable(tf.zeros(16))
    conv2 = tf.nn.conv2d(pool_1, conv2_w, strides=[1,1,1,1], padding='VALID') + conv2_b
    conv2 = tf.nn.relu(conv2)

    # Pooling Layer 2
    pool_2 = tf.nn.max_pool(conv2, ksize=[1,2,2,1], strides=[1,2,2,1], padding='VALID') 
    
    # Conv Layer 3
    conv3 = tf.contrib.layers.flatten(pool_2)
    conv3_w = tf.Variable(tf.truncated_normal(shape=(400,120), mean=0, stddev=0.1))
    conv3_b = tf.Variable(tf.zeros(120))
    conv3 = tf.matmul(conv3,conv3_w) + conv3_b
    conv3 = tf.nn.relu(conv3)

    # Fully Connected Layer 1
    fc2_w = tf.Variable(tf.truncated_normal(shape=(120,84), mean=0, stddev=0.1))
    fc2_b = tf.Variable(tf.zeros(84))
    fc2 = tf.matmul(conv3,fc2_w) + fc2_b
    fc2 = tf.nn.relu(fc2)
    
    # Fully Connected Layer 2
    fc3_w = tf.Variable(tf.truncated_normal(shape=(84,10), mean=0, stddev=0.1))
    fc3_b = tf.Variable(tf.zeros(10))
    logits = tf.matmul(fc2, fc3_w) + fc3_b
    return logits

<h1>Cost and optimization</h1>

In [17]:
logits = LeNet(X)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate = lr)
train_op = optimizer.minimize(loss)

correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

<h1>Training, validating, testing</h1>
<h2>1. Print out validation accuracy after each training epoch</h2>
<h2>2. Print out training time on each epoch</h2>
<h2>3. Print out testing accuracy</h2>

In [18]:
def next_batch(batch_size, data, labels):
    idx = np.arange(0 , len(x_train))
    np.random.shuffle(idx)
    idx = idx[:batch_size]
    data_shuffle = [data[i] for i in idx]
    labels_shuffle = [labels[i] for i in idx]
    data_shuffle = np.asarray(data_shuffle)
    labels_shuffle = np.asarray(labels_shuffle)
    labels_shuffle = np.squeeze(labels_shuffle)
    return data_shuffle, labels_shuffle

In [19]:
init = tf.global_variables_initializer()

total_batch = int(mnist.train.num_examples / batch_size)
num_steps = (epochs + 1) * total_batch

with tf.Session() as sess:
    sess.run(init)
    num_examples = len(x_train)
    for i in range(num_steps):
        # fetch batch
        batch_x, batch_y = next_batch(batch_size, x_train, y_train)
        # run optimization
        sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
        if (i > 0) and (i % total_batch) == 0:
            acc = sess.run(accuracy, feed_dict={X: batch_x, Y: batch_y})
            print("epoch " + str(int(i/total_batch)) + ", acc: {:.4f}".format(acc))
            
    acc_test = sess.run(accuracy, feed_dict={X: x_test, Y: y_test})
    print("acc_test: {:.2f}".format(acc_test))

epoch 1, acc: 1.0000
epoch 2, acc: 1.0000
epoch 3, acc: 0.9688
epoch 4, acc: 1.0000
epoch 5, acc: 0.9688
epoch 6, acc: 1.0000
epoch 7, acc: 1.0000
epoch 8, acc: 1.0000
epoch 9, acc: 1.0000
epoch 10, acc: 1.0000
acc_test: 0.99
