In [2]:
import tensorflow as tf

In [3]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets(".", one_hot=True, reshape=False)

Extracting ./train-images-idx3-ubyte.gz
Extracting ./train-labels-idx1-ubyte.gz
Extracting ./t10k-images-idx3-ubyte.gz
Extracting ./t10k-labels-idx1-ubyte.gz


### Hyperparameters

In [41]:
learning_rate = 0.0001
epochs = 10
batch_size = 128

# number of samples to calculate accuracy
test_valid_size = 256

n_classes = 10  # MNIST has 10 classes
dropout = 0.75  # dropout probability to keep units (neurons)

### Weights and biases

In [14]:
weights = {
    'wc1': tf.Variable(tf.random_normal([5, 5, 1, 32])),
    'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
    'wd1': tf.Variable(tf.random_normal([7*7*64, 1024])),
    'out': tf.Variable(tf.random_normal([1024, n_classes]))
}

biases = {
    'bc1': tf.Variable(tf.random_normal([32])),
    'bc2': tf.Variable(tf.random_normal([64])),
    'bd1': tf.Variable(tf.random_normal([1024])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

### Convolution Layer

In [19]:
def conv2d(x, W, b, strides=1):
    # strides = [stride_for_batch, stride_image_y_axis, stride_image_x_axis, stride_for_features]
    # in general strides = [1, stride, stride, 1]
    conv_layer = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
    conv_layer = tf.nn.bias_add(conv_layer, b)
    conv_layer = tf.nn.relu(conv_layer)
    return conv_layer

### Pooling Layer

In [26]:
def maxpool2d(x, k):
    return tf.nn.max_pool(
        x,
        ksize=[1, k, k, 1],
        strides=[1, k, k, 1],
        padding='SAME'
    )

### Model

In [33]:
def conv_net(x, weights, biases, dropout):
    # Layer 1 : 28x28x1 -> 14x14x32
    conv1 = conv2d(x, weights['wc1'], biases['bc1'])
    conv1 = maxpool2d(conv1, k=2)
    
    # Layer 2 : 14x14x32 -> 7x7x64
    conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
    conv2 = maxpool2d(conv2, k=2)
    
    # Fully connected layer
    # 7x7x64 -> 1024
    last_conv_layer_size = weights['wd1'].get_shape().as_list()[0]
    print(last_conv_layer_size)
    fc1 = tf.reshape(conv2, [-1, last_conv_layer_size])
    fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
    fc1 = tf.nn.relu(fc1)
    fc1 = tf.nn.dropout(fc1, dropout)
    
    # Output layer, class prediction
    # 1024 -> 10 classes
    out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])

    return out

### Run

In [40]:
x = tf.placeholder(tf.float32, [None, 28, 28, 1])
y = tf.placeholder(tf.float32, [None, n_classes])
keep_prob = tf.placeholder(tf.float32)

# model
logits = conv_net(x, weights, biases, keep_prob)

# define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

# accuracy
correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# initialize the varialbes
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(epochs):
        for batch in range(mnist.train.num_examples//batch_size):
            batch_x, batch_y = mnist.train.next_batch(batch_size)
            sess.run(optimizer, feed_dict={
                x: batch_x,
                y: batch_y,
                keep_prob: dropout
            })
            
            # calculate batch loss and accuracy
            loss = sess.run(cost, feed_dict={
                x: batch_x,
                y: batch_y,
                keep_prob: 1.0 # don't use drop out in validation, use only in training 
            })
            
            # validation accuracy
            valid_acc = sess.run(accuracy, feed_dict={
                x: mnist.validation.images[:test_valid_size],
                y: mnist.validation.labels[:test_valid_size],
                keep_prob: 1.0
            })
            
            print('Epoch {:>2} Batch {:>3} '
                  'Loss {:>10.4f} Validation accuracy {:.6f}'.format(
                  epoch+1,
                  batch+1,
                  loss,
                  valid_acc))
            
        # calculate test accuracy
        test_acc = sess.run(accuracy, feed_dict={
            x: mnist.test.images[:test_valid_size],
            y: mnist.test.labels[:test_valid_size],
            keep_prob: dropout
        })
        
        print('------------------')
        print('Testing accuracy {}'.format(test_acc))

3136
Epoch  1 Batch   1 Loss 58127.2891 Validation accuracy 0.117188
Epoch  1 Batch   2 Loss 51250.6797 Validation accuracy 0.183594
Epoch  1 Batch   3 Loss 40815.7812 Validation accuracy 0.191406
Epoch  1 Batch   4 Loss 21369.5781 Validation accuracy 0.347656
Epoch  1 Batch   5 Loss 17058.9648 Validation accuracy 0.292969
Epoch  1 Batch   6 Loss  8309.8516 Validation accuracy 0.406250
Epoch  1 Batch   7 Loss  4836.2637 Validation accuracy 0.464844
Epoch  1 Batch   8 Loss  5808.4932 Validation accuracy 0.476562
Epoch  1 Batch   9 Loss  4694.7217 Validation accuracy 0.507812
Epoch  1 Batch  10 Loss  4054.6128 Validation accuracy 0.582031
Epoch  1 Batch  11 Loss  3758.3430 Validation accuracy 0.554688
Epoch  1 Batch  12 Loss  4104.6069 Validation accuracy 0.613281
Epoch  1 Batch  13 Loss  2589.5603 Validation accuracy 0.628906
Epoch  1 Batch  14 Loss  3414.8020 Validation accuracy 0.593750
Epoch  1 Batch  15 Loss  3281.8694 Validation accuracy 0.625000
Epoch  1 Batch  16 Loss  2957.4648 

KeyboardInterrupt: 