# 2-Layer Convolutional Neural Network

## Import dependencies

In [1]:
import tensorflow as tf
import numpy as np

## Load in the dataset

In [2]:
from tensorflow.examples.tutorials.mnist import input_data
data = input_data.read_data_sets('../datasets/MNIST/', one_hot=True)

Extracting ../datasets/MNIST/train-images-idx3-ubyte.gz
Extracting ../datasets/MNIST/train-labels-idx1-ubyte.gz
Extracting ../datasets/MNIST/t10k-images-idx3-ubyte.gz
Extracting ../datasets/MNIST/t10k-labels-idx1-ubyte.gz


In [3]:
print('Training set    = {:,}'.format(len(data.train.labels)))
print('Testing set     = {:,}'.format(len(data.test.labels)))
print('Validation set  =  {:,}'.format(len(data.validation.labels)))

Training set    = 55,000
Testing set     = 10,000
Validation set  =  5,000


## Hyperparameters

In [4]:
# Data dimension
image_size = 28
image_channel = 1
image_shape = (image_size, image_size, image_channel)
image_shape_flat = image_size * image_size
num_classes = 10

# Network
filter_size = 5
filter_1 = 32
filter_2 = 64
fc_size = 256

# Training
batch_size = 32
learning_rate = 1e-3

## Helper functions

#### `weights` and `biases`

In [5]:
def weight(shape):
    initial = tf.truncated_normal(shape=shape, stddev=0.5, mean=0)
    return tf.Variable(initial, name='weight')

def bias(shape):
    initial = tf.zeros(shape=[shape])
    return tf.Variable(initial, name='bias')

#### `convolution` and `pooling`

In [6]:
def conv2d(X, W):
    return tf.nn.conv2d(X, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool(X):
    return tf.nn.max_pool(X, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

#### `flatten` layer

In [7]:
def flatten(layer):
    layer_shape = layer.get_shape()
    num_features = np.array(layer_shape[1:4], dtype=int).prod()
    layer_flat = tf.reshape(layer, [-1, num_features])
    return layer_flat, num_features

## Building the Computational Graph

### Placeholder variables

In [8]:
X = tf.placeholder(tf.float32, [None, image_shape_flat])
y = tf.placeholder(tf.float32, [None, num_classes])

X_image = tf.reshape(X, [-1, image_size, image_size, image_channel])
y_true = tf.argmax(y, axis=1)
X_image

<tf.Tensor 'Reshape:0' shape=(?, 28, 28, 1) dtype=float32>

### Building the `convnet`

#### Input Layer »» Hidden Layer 1

In [9]:
W_hidden1 = weight(shape=[filter_size, filter_size, image_channel, filter_1])
b_hidden1 = bias(shape=filter_1)
hidden1 = tf.nn.relu(conv2d(X_image, W_hidden1) + b_hidden1)
hidden1 = max_pool(hidden1)
hidden1

<tf.Tensor 'MaxPool:0' shape=(?, 14, 14, 32) dtype=float32>

#### Hidden Layer 1 »» Hidden Layer 2

In [10]:
W_hidden2 = weight(shape=[filter_size, filter_size, filter_1, filter_2])
b_hidden2 = bias(shape=filter_2)
hidden2 = tf.nn.relu(conv2d(hidden1, W_hidden2) + b_hidden2)
hidden2 = max_pool(hidden2)
hidden2

<tf.Tensor 'MaxPool_1:0' shape=(?, 7, 7, 64) dtype=float32>

#### Flatten Hidden Layer 2 output

In [11]:
hidden2_flat, num_features = flatten(hidden2)
hidden2_flat, num_features

(<tf.Tensor 'Reshape_1:0' shape=(?, 3136) dtype=float32>, 3136)

#### HIdden Layer 2 »» Fully connected layer

In [12]:
W_fc = weight(shape=[num_features, fc_size])
b_fc = bias(shape=fc_size)
fc_layer = tf.nn.relu(tf.matmul(hidden2_flat, W_fc) + b_fc)
fc_layer

<tf.Tensor 'Relu_2:0' shape=(?, 256) dtype=float32>

#### Fully connected Layer »» Output layer

In [13]:
W_out = weight(shape=[fc_size, num_classes])
b_out = bias(shape=num_classes)
logits = tf.matmul(fc_layer, W_out) + b_out
y_pred = tf.nn.softmax(logits)
y_pred_true = tf.argmax(y_pred, axis=1)
logits

<tf.Tensor 'add_3:0' shape=(?, 10) dtype=float32>

### cost and optimizer

In [14]:
xentropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y)
cost = tf.reduce_mean(xentropy)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_step = optimizer.minimize(cost)

### Evaluate Accuracy

In [15]:
correct = tf.equal(y_true, y_pred_true)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
accuracy

<tf.Tensor 'Mean_1:0' shape=() dtype=float32>

## Running the Computional Graph