# mnist cnn

## imports

In [2]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

## data

In [3]:
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


## helpers

### weight initialization

In [78]:
def init_weights(shape):
    init_random_dist = tf.truncated_normal(shape=shape, stddev=0.1)
    return tf.Variable(initial_value=init_random_dist)

### bias initialization

In [79]:
def init_bias(shape):
    init_bias_vals = tf.constant(0.1, shape=shape)
    return tf.Variable(init_bias_vals)

### 2d convolution layer

In [80]:
padding = "SAME"

In [81]:
def conv2d(x, w):
    # x is features/data, is of shape [batch_size, image_height, image_width, num_color_channels]
    # w is the kernal (convolution window),
        #is of shape [filter_height, filter_width, num_channels_in, num_channels_out]
    return tf.nn.conv2d(input=x, filter=w, strides=[1]*4, padding=padding)

### pooling layer

In [82]:
def max_pool_2by2(x):
    #x i feature matrix, of shape [batch_size, image_height, image_width, num_color_channels]
    return tf.nn.max_pool(value=x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding=padding)

## layers

### convolutional

In [83]:
def convolutional_layer(input_x, shape):
    w = init_weights(shape=shape)
    b = init_bias(shape=[shape[3]])
    return tf.nn.relu(
        conv2d(x=input_x, w=w) + b
    )

### densely connected

In [84]:
def densely_connected_layer(input_layer, size):
    input_size = int(input_layer.get_shape()[1])
    w = init_weights([input_size, size])
    b = init_bias([size])
    return tf.matmul(input_layer, w) + b

## build network

In [85]:
pixels = mnist.train.images[0].shape[0]
pixels

784

In [86]:
image_dim = int(pixels**.5)
image_dim

28

In [87]:
num_classes = mnist.train.labels[0].shape[0]
num_classes

10

### placeholders

In [88]:
mnist.train.labels[0]

array([ 0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.])

In [89]:
x = tf.placeholder(dtype=tf.float32, shape=[None, pixels])
y_true = tf.placeholder(dtype=tf.float32, shape=[None, num_classes])

### layers

#### input layer
No idea what the [-1](https://www.tensorflow.org/api_docs/python/tf/reshape) is, but the 2nd and 3rd elements are image dimensions, and the 4th is color channels (1 for black and white).

In [90]:
input_layer = tf.reshape(x, [-1, image_dim, image_dim, 1]) 

#### conv1 layer
32 features for each 5x5x1 convolution window.

In [91]:
conv1 = convolutional_layer(input_x=input_layer, shape=[5, 5, 1, 32])

#### pool1 layer

In [92]:
pool1 = max_pool_2by2(x=conv1)

#### conv2

In [93]:
conv2 = convolutional_layer(input_x=pool1, shape=[5, 5, 32, 64])

#### pool2

In [94]:
pool2 = max_pool_2by2(x=conv2)

#### layer3_flat
Max pooling layer is 2x2, so 28x28 -> 14x14 (first maxpool) -> 7x7 (2nd max pool)

In [95]:
layer3_flat = tf.nn.relu(
    densely_connected_layer(
        input_layer=tf.reshape(pool2, shape=[-1, 7*7*64]),
        size=1024
    )
)

#### dropout

In [96]:
keep_prob = tf.placeholder(tf.float32)
layer4_dropout = tf.nn.dropout(x=layer3_flat, keep_prob=keep_prob)

#### output layer

In [97]:
y_pred = densely_connected_layer(layer4_dropout, size=num_classes)

## loss function

In [98]:
#cross entropy
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    labels=y_true,
    logits=y_pred
))

## hyperparameters

In [99]:
learning_rate = 0.001
epochs = 1000
batch_size = 16
dropout_hyper = 0.5
epochs_between_ouput = 100

## optimizer

In [100]:
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train = optimizer.minimize(loss)

## session

In [101]:
init = tf.global_variables_initializer()

In [None]:
with tf.Session() as sess:
    sess.run(init)
    for i in range(epochs):
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        sess.run(train, feed_dict={
            x: batch_x,
            y_true: batch_y,
            keep_prob: dropout_hyper
        })
        
        #output performance
        if i % epochs_between_ouput == 0:
            print("ON STEP: {}".format(i))
            matches = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y_true, 1))
            acc = tf.reduce_mean(tf.cast(matches, dtype=tf.float32))
            print("ACCURACY: {}\n".format(
                sess.run(acc, feed_dict={
                    x: mnist.test.images,
                    y_true: mnist.test.labels,
                    keep_prob: 1
                })
            ))

ON STEP: 0
ACCURACY: 0.14180000126361847

ON STEP: 100
ACCURACY: 0.9041000008583069

ON STEP: 200
ACCURACY: 0.949400007724762

ON STEP: 300
ACCURACY: 0.9509999752044678

ON STEP: 400
