# Convolutional Neural Network and MNIST

In this tutorial, we'll be walking through the Tensorflow code behind creating a convolutional neural network. If you'd like more of a conceptual view of how these networks work, check out my this blog post by Adit. A CNN tutorial from the Tensorflow docs can also be found here.

A large part of the coding today will be keeping track of the dimensionality of the inputs, the outputs, the filters, and how they change as we set up the convolutional layers.


## Our Architecture:
INPUT -> CONV1 -> RELU1 -> POOL1 -> CONV2 -> RELU2 -> POOL2 -> FC1 -> FC2 (OUTPUT, SOFTMAX)

In [0]:
# Some imports, as usual:

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import datetime
%matplotlib inline
import math

In [0]:
# Download the MNIST dataset

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

In [0]:
# defining some training hyperparameters
learning_rate = 0.1
num_epochs = 5000
batch_size = 100

In [0]:
# defining placeholders
x = tf.placeholder("float", shape = [None, 28, 28, 1]) # note the change in dimensionality
y_ = tf.placeholder("float", shape = [None, 10])

### Hold up

Why are we using [None, 28, 28, 1] as the input dimension?

Note that this ordering remains consistent throughout the coding example for defining our dimensionalities.

### Define some helper functions


In [0]:
# padding = 1, strides = 1
def conv2d(x, w):
  return tf.nn.conv2d(input=x, filter=w, strides=[ , , , ], padding="SAME")

# padding = none, strides = 2, filter size = 2
# note that the pool layer isn't parameterized by weights
def max_pool_2x2(x):
  return tf.nn.max_pool(x, ksize=[ , , , ], strides=[ , , , ], padding="VALID")

### Define the first Conv-Relu-Pool layers

What size filters should we have for the Convolutional Layer? Recall that we don't downsample in Conv Layer (for now) and instead leave that to 

In [0]:
num_filters1 = 32

w_conv1 = tf.Variable(tf.truncated_normal([ , , , ], stddev=0.1))
b_conv1 = tf.Variable(tf.constant(0.1, shape = [...]))

In [0]:
h_conv1 = ...
h_relu1 = tf.nn.relu(h_conv1)
h_pool1 = ...

# sanity checks for the output volume
print(h_conv1)
print(h_relu1)
print(h_pool1)

### Define the second Conv-Relu-Pool layers

In [0]:
num_filters2 = 64

w_conv2 = tf.Variable(tf.truncated_normal([ , , , ], stddev=0.1))
b_conv2 = tf.Variable(tf.constant(0.1, shape=[...]))

In [0]:
h_conv2 = ...
h_relu2 = tf.nn.relu(h_conv2)
h_pool2 = ...

# sanity checks for the output volume
print(h_conv2)
print(h_relu2)
print(h_pool2)

### Define the first FC layer

Recall the operation for the FC layer is:
\begin{equation*}
h(x) = a(Wx + b)
\end{equation*}

In [0]:
fc_units1 = 1024

h_pool2_flat = tf.reshape(h_pool2, [-1, ...])  # we need to collapse the volume into a vector of pixels (but why -1 again?)

w_fc1 = tf.Variable(tf.truncated_normal([ , ], stddev=0.1)) # expects a vector of pixels
b_fc1 = tf.Variable(tf.constant(0.1, shape=[...]))

h_fc1 = ...

# sanity check for the output volume
print(h_fc1)

### Define the Output Layer

Remember the output layer is returning the class probabilities (10 classes)

In [0]:
w_fc2 = tf.Variable(tf.truncated_normal([ , ], stddev=0.1))
b_fc2 = tf.Variable(tf.constant(0.1, shape=[...]))

logits = ...

print(logits)

### Softmax! Lost Function! Optimization!

In [0]:
cross_entropy_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=logits))
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy_loss)

### Some metrics to track while training

In [0]:
correct_predictions = tf.equal(tf.argmax(logits, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32))

### Let's train our model

In [0]:
# initialize tensorflow variables
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [0]:
# run model
with tf.Session() as sess:
    sess.run(init)
    for i in range(num_epochs):
        x_batch, y_batch = mnist.train.next_batch(batch_size)
        x_batch = x_batch.reshape([batch_size, 28, 28, 1]) # we no longer want a flat vector of pixels
        optimizer.run(feed_dict = {x: x_batch, y_: y_batch})
        if i % 100 == 0:
            acc = accuracy.eval(feed_dict = {x: x_batch, y_: y_batch})
            loss = cross_entropy_loss.eval(feed_dict = {x: x_batch, y_: y_batch})
            print("Epoch: {}, accuracy: {}, loss: {}".format(i, acc, loss))

    acc = accuracy.eval(feed_dict = {x: mnist.test.images.reshape([-1, 28, 28, 1]), y_:mnist.test.labels}) # here too
    print("Test accuracy: {}".format(acc))
    saver.save(sess, "./saved_model.ckpt")

### Print out some pictures like we did for the previous workshops

In [0]:
def gen_image(arr):
    two_d = (np.reshape(arr, (28, 28)) * 255).astype(np.uint8)
    plt.imshow(two_d, interpolation='nearest', cmap='gray')
    return plt

with tf.Session() as sess:
    saver.restore(sess, "./saved_model.ckpt")
    X_new_scaled = mnist.test.images
    z = logits.eval(feed_dict={x: X_new_scaled.reshape([-1, 28, 28, 1])})
    y_pred = np.argmax(z, axis=1)
    correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y_, 1))
    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    print(accuracy.eval(feed_dict={x: mnist.test.images.reshape([-1, 28, 28, 1]), y_: mnist.test.labels}))
    for i in range(100):
        if i%3 == 0:
            gen_image(mnist.test.images[i]).show()
            print("Predition: ", y_pred[i])
            print("Actual label: ", np.argmax(mnist.test.labels[i]))

## Filter Visualization

Let's take a look at how our model "sees" the inputs we give.

In [0]:
imageToUse = np.reshape(mnist.test.images[4], [28,28])
plt.imshow(imageToUse, interpolation="nearest", cmap="gray")

In [0]:
def getActivations(layer,stimuli, sess):
    units = sess.run(layer,feed_dict={x:np.reshape(stimuli,[1, 28,28,1])})
    plotNNFilter(units)

In [0]:
def plotNNFilter(units):
    filters = units.shape[3]
    plt.figure(1, figsize=(20,20))
    n_columns = 6
    n_rows = math.ceil(filters / n_columns) + 1
    for i in range(filters):
        plt.subplot(n_rows, n_columns, i+1)
        plt.title('Filter ' + str(i))
        plt.imshow(units[0,:,:,i], interpolation="nearest", cmap="gray")

In [0]:
with tf.Session() as sess:
  saver.restore(sess, "./saved_model.ckpt")
  getActivations(h_relu1,imageToUse, sess)