In [12]:
# Automatically reload changes to external code
%load_ext autoreload
%autoreload 2


#A Convolutional Network implementation example using TensorFlow library.
#This example is using the MNIST database of handwritten digits
#(http://yann.lecun.com/exdb/mnist/)

#Author: Aymeric Damien
#Project: https://github.com/aymericdamien/TensorFlow-Examples/

# Based on above project, modified by James Chan

In [1]:
import tensorflow as tf
from mnist import loader #loader for mnist dataset
from matplotlib import pyplot as plt
import numpy as np
import pdb, time

# Mnist dataset

The MNIST database of handwritten digits. [[website]](http://yann.lecun.com/exdb/mnist/)<br>
There are **60,000** training images and **10,000** testing images in this dataset.<br>
Each digit is a one-channel image. Size of image = 28*28 = 784.

![](imgs/mnist_ex.png)

There are some build-in mnist function can be used in tensorflow.

Ex.<br>
from tensorflow.examples.tutorials.mnist import input_data<br>
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

Instead of using these functions, I'll use the orginal dataset manually in this code.<br>
It's more clear to trace the data-processing.

When we load the data


In [30]:
# load mnist data manually
# loading 'train' or 'test' data
# ex. load_mnist_data('train')
# return images, labels and mean of all images. (But, we'll only use the mean of training data.)
# ims: [N * 784]
# labels: [N]
# ims_mean: [784]

def load_mnist_data(flag, data_path='data'):
    data_loader = loader.MNIST(data_path)
    if flag == 'train':
        ims, labels = data_loader.load_training()
    elif flag == 'test':
        ims, labels = data_loader.load_testing()
    else:
        raise ValueError("Error. Only training or testing data.")
    ims = ims/255.0
    ims_mean = np.mean(ims, axis=0)
    return ims, labels, ims_mean

In [31]:
# Parameters
learning_rate = 0.001
training_epochs = 2
batch_size = 100   # training batch size
test_batch_size = 1000
display_step = 10  # testing 

# Network Parameters
n_input = 784  # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)
stddev=0.01    # standard deviation for random initialization


# Functions of Convolutional Neural Network

In [32]:
# Create some wrappers for simplicity

# convolutional function
# input: 
# x=[batch_size, height, width, channels]
# W(Weights)=tf.Variable(shape=[kernel_size, kernel_size, input_channel, output_channel])
# b(Biases)=tf.Variable(shape=[output_channel])
def conv2d(x, W, b, strides=1):
    # Conv2D wrapper, with bias and relu activation
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)


def maxpool2d(x, k=2):
    # MaxPool2D wrapper
    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME')


In [33]:
def lenet():
    # tf Graph input
    x = tf.placeholder(tf.float32, [None, n_input])  # mnist input images, [batch_size x 784]
    y = tf.placeholder(tf.int32,[None])              # label, [batch_size]
    dropout = tf.placeholder(tf.float32)  #dropout ratio
    
    # Declare Variables 
    # Store layers weight & bias
    weights = {
        # 5x5 conv, 1 input, 32 outputs
        'wc1': tf.Variable(tf.truncated_normal([5, 5, 1, 32], mean=0, stddev=stddev)),
        # 5x5 conv, 32 inputs, 64 outputs
        'wc2': tf.Variable(tf.truncated_normal([5, 5, 32, 64], mean=0, stddev=stddev)),
        # fully connected, 7*7*64 inputs, 1024 outputs
        'wd1': tf.Variable(tf.truncated_normal([7*7*64, 1024], mean=0, stddev=stddev)),
        # 1024 inputs, 10 outputs (class prediction)
        'out': tf.Variable(tf.truncated_normal([1024, n_classes], mean=0, stddev=stddev))
    }

    biases = {
        'bc1': tf.Variable(tf.random_normal([32])),  # default stddev=1.0
        'bc2': tf.Variable(tf.random_normal([64])),
        'bd1': tf.Variable(tf.random_normal([1024])),
        'out': tf.Variable(tf.random_normal([n_classes]))
    }

    # Construct model
    x_reshape = tf.reshape(x, shape=[-1, 28, 28, 1]) # Transfer shape. Prepare for convolution

    # Convolution Layer
    conv1 = conv2d(x_reshape, weights['wc1'], biases['bc1'])
    # Max Pooling (down-sampling)
    conv1 = maxpool2d(conv1, k=2)

    # Convolution Layer
    conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
    # Max Pooling (down-sampling)
    conv2 = maxpool2d(conv2, k=2)

    # Fully connected layer
    # Reshape conv2 output to fit fully connected layer input
    fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
    fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
    fc1 = tf.nn.relu(fc1)
    # Apply Dropout
    fc1 = tf.nn.dropout(fc1, keep_prob = 1-dropout)   # dropout ratio --> keep ratio

    # Output, class prediction
    pred = tf.add(tf.matmul(fc1, weights['out']), biases['out'])

    # Define loss and optimizer
    #one_hot_y = tf.one_hot(y, n_classes, on_value=1, off_value=0, axis=-1)
    #one_hot_y = tf.cast(one_hot_y, tf.float32)
    #cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=one_hot_y))

    

    probs = tf.nn.softmax(pred)
    log_probs = tf.log(probs + 1e-8)

    one_hot_y = tf.one_hot(y, n_classes, on_value=1, off_value=0, axis=-1)
    #print one_hot_y.get_shape()
    #cross_entropy_loss = - tf.mul(y,log_probs)
    cross_entropy_loss = - tf.mul(tf.cast(one_hot_y, tf.float32),log_probs)
    
    loss = tf.reduce_sum(cross_entropy_loss)


    return x, y, dropout, loss, pred, one_hot_y


In [34]:
def eval_model(sess, x, y, dropout, ims, labels, ims_test, labels_test, ims_mean, iter_per_epoch, test_iter):
    Train_Loss = 0
    Test_Loss = 0
    Train_Acc = 0
    Test_Acc = 0
    for idx in xrange(iter_per_epoch):
        batch_xs = ims[order_list[idx*batch_size:(idx+1)*batch_size]] - ims_mean
        batch_ys = labels[order_list[idx*batch_size:(idx+1)*batch_size]]
        C, A = sess.run([cost, accuracy], feed_dict={x: batch_xs, y: batch_ys, dropout: 0.0})
        Train_Loss += C/batch_size   # calculate the loss in average (per image).
        Train_Acc += A
    # Eval testing dataset
    for idx in xrange(test_iter):
        batch_xs = ims_test[order_list[idx*test_batch_size:(idx+1)*test_batch_size]] - ims_mean
        batch_ys = labels_test[order_list[idx*test_batch_size:(idx+1)*test_batch_size]]
        C, A = sess.run([cost, accuracy], feed_dict={x: batch_xs, y: batch_ys, dropout: 0.0})
        Test_Loss += C/test_batch_size
        Test_Acc += A
    return Train_Loss, Train_Acc, Test_Loss, Test_Acc

# Main function



In [35]:
# loading training and testing data
ims, labels, ims_mean = load_mnist_data('train', data_path='data')
ims_test, labels_test, _ = load_mnist_data('test', data_path='data')

order_list = range(len(ims))

# parameters related to mnist dataset 
test_iter = len(ims_test)/test_batch_size # number of testing-minibatch.

iter_per_epoch = len(ims)/batch_size      # number of training-minibatch.

# Launch the graph
with tf.Session() as sess:
    epoch = 0
    
    step = 0
    # Keep training until reach max iterations
    x, y, dropout, cost, pred, one_hot_y = lenet()
    train_loss = cost/batch_size # loss per image
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(train_loss)


    # Evaluate model
    correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(one_hot_y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

    # initialize all variables
    try:
        init = tf.initialize_all_variables()
    except:
        init = tf.global_variables_initializer()
    sess.run(init)
    
    # Before Training (Random initialization), Evaluate the model one-time.
    begin = time.time()
    Train_Loss, Train_Acc, Test_Loss, Test_Acc=eval_model(sess, x, y, dropout, ims, labels, ims_test, labels_test, ims_mean, iter_per_epoch, test_iter)
    print "------After Random Initialization------"
    print "Training: loss=%f, acc=%f.\t\tTesting: loss=%f, acc=%f" %(Train_Loss/iter_per_epoch, Train_Acc/iter_per_epoch,
                                                                     Test_Loss/test_iter, Test_Acc/test_iter)
    duration = time.time()-begin
    print " %f seconds"%(duration)
    
    print "------Start Training------"
    for epoch in xrange(training_epochs):
        begin = time.time()
        Train_Loss = 0
        Test_Loss = 0
        Train_Acc = 0
        Test_Acc = 0
        for idx in xrange(iter_per_epoch):
            batch_xs = ims[order_list[idx*batch_size:(idx+1)*batch_size]] - ims_mean
            batch_ys = labels[order_list[idx*batch_size:(idx+1)*batch_size]]
            # Run optimization op (backprop)
            sess.run([optimizer], feed_dict={x: batch_xs, y: batch_ys, dropout: 0.5})
            if step % display_step == 0:
                Train_Loss, Train_Acc, Test_Loss, Test_Acc=eval_model(sess, x, y, dropout, ims, labels, ims_test, labels_test, ims_mean, iter_per_epoch, test_iter)
                print "Epoch %f, Training: loss=%f, acc=%f.\t\tTesting: loss=%f, acc=%f"%(float(step)/iter_per_epoch, Train_Loss/iter_per_epoch, Train_Acc/iter_per_epoch, 
                                                                                         Test_Loss/test_iter, Test_Acc/test_iter)
            step += 1
        
        # Evaluate after each epoch finished.
        Train_Loss, Train_Acc, Test_Loss, Test_Acc=eval_model(sess, x, y, dropout, ims, labels, ims_test, labels_test, ims_mean, iter_per_epoch, test_iter)
        print "Epoch %d, Training: loss=%f, acc=%f.\t\tTesting: loss=%f, acc=%f"%(epoch+1, Train_Loss/iter_per_epoch, Train_Acc/iter_per_epoch, 
                                                                                         Test_Loss/test_iter, Test_Acc/test_iter)
        duration = time.time()-begin
        print "Cost %f seconds"%(duration)

    print("Optimization Finished!")
 

------After Random Initialization------
Training: loss=3.129127, acc=0.097367.		Testing: loss=3.126546, acc=0.098200
 1.871381 seconds
------Start Training------
Epoch 0.000000, Training: loss=2.475106, acc=0.098717.		Testing: loss=2.475202, acc=0.098000
Epoch 0.016667, Training: loss=2.321483, acc=0.104417.		Testing: loss=2.321482, acc=0.102800
Epoch 0.033333, Training: loss=2.316925, acc=0.098633.		Testing: loss=2.316992, acc=0.095800
Epoch 0.050000, Training: loss=2.312599, acc=0.112367.		Testing: loss=2.312342, acc=0.113500
Epoch 0.066667, Training: loss=2.312945, acc=0.099150.		Testing: loss=2.312562, acc=0.100900
Epoch 0.083333, Training: loss=2.307034, acc=0.112367.		Testing: loss=2.307620, acc=0.113500
Epoch 0.100000, Training: loss=2.281481, acc=0.126617.		Testing: loss=2.281144, acc=0.130500
Epoch 0.116667, Training: loss=2.196689, acc=0.273450.		Testing: loss=2.195588, acc=0.278400
Epoch 0.133333, Training: loss=2.035428, acc=0.262650.		Testing: loss=2.027315, acc=0.264700
E

# Tensorflow python API

### tf.nn.conv2d(input, filter, strides, padding, use_cudnn_on_gpu=None, data_format=None, name=None)

Computes a 2-D convolution given 4-D input and filter tensors.

Given an **input tensor of shape [batch, in_height, in_width, in_channels]** and a **filter / kernel tensor of shape [filter_height, filter_width, in_channels, out_channels]**, this op performs the following:

Flattens the filter to a 2-D matrix with shape [filter_height * filter_width * in_channels, output_channels].

Extracts image patches from the input tensor to form a virtual tensor of shape [batch, out_height, out_width, filter_height * filter_width * in_channels].
For each patch, right-multiplies the filter matrix and the image patch vector.

In detail, with the default NHWC format,

output[b, i, j, k] = sum_{di, dj, q} input[b, strides[1] * i + di, strides[2] * j + dj, q] * filter[di, dj, q, k]


Must have strides[0] = strides[3] = 1. For the most common case of the same horizontal and vertices strides, strides = [1, stride, stride, 1].

**Args:**

> input: A Tensor. Must be one of the following types: half, float32, float64.

> filter: A Tensor. Must have the same type as input.

> strides: A list of ints. 1-D of length 4. The stride of the sliding window for each dimension of input. Must be in the same order as the dimension specified with format.

> padding: A string from: "SAME", "VALID". The type of padding algorithm to use.
use_cudnn_on_gpu: An optional bool. Defaults to True.

> data_format: An optional string from: "NHWC", "NCHW". Defaults to "NHWC". Specify the data format of the input and output data. With the default format "NHWC", the data is stored in the order of: [batch, in_height, in_width,
in_channels]. Alternatively, the format could be "NCHW", the data storage order of: [batch, in_channels, in_height, in_width].

> name: A name for the operation (optional).


**Returns:**

> A Tensor. Has the same type as input.
