In [1]:
# looking at https://www.youtube.com/watch?v=u4alGiomYP4&t=1832s
# this is the forth version, where we reconfigure the network to use
# 3 layers of convolution and then two layers of fully connected layers.
# https://codelabs.developers.google.com/codelabs/cloud-tensorflow-mnist/#10 
# is great for explaining what is going on

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

# we need math for learning rate calculation
import math

# one_hot = True means we represent the data in a vector way, see below
mnist = input_data.read_data_sets('/tmp/data', one_hot=True, reshape=False, validation_size=0)

# random seeding 0 means we will get the same random numbers each time we run the program
tf.set_random_seed(0)

# 99% accuracy baby! Woohooo :D 

Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting /tmp/data/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [2]:
# Probability of keeping a node during dropout = 1.0 at test time (no dropout) and 0.75 at training time
pkeep = tf.placeholder(tf.float32, name='dropout_rate')

# input X: 28x28 grayscale images, the first dimension (None) will index the images in the mini-batch
X = tf.placeholder(tf.float32, [None, 28, 28, 1])

# correct answers will go here
Y_ = tf.placeholder(tf.float32, [None, 10])

# variable learning rate
lr = tf.placeholder(tf.float32)

In [3]:
# Convolutional layers configuration

# three convolutional layers with their channel counts, and a
# fully connected layer (tha last layer has 10 softmax neurons)
K = 4  # first convolutional layer output depth
L = 8  # second convolutional layer output depth
M = 12  # third convolutional layer
N = 200  # fully connected layer

W1 = tf.Variable(tf.truncated_normal([6, 6, 1, K], stddev=0.1))  # 5x5 patch, 1 input channel, K output channels
B1 = tf.Variable(tf.ones([K])/10)

W2 = tf.Variable(tf.truncated_normal([6, 6, K, L], stddev=0.1))
B2 = tf.Variable(tf.ones([L])/10)

W3 = tf.Variable(tf.truncated_normal([5, 5, L, M], stddev=0.1))
B3 = tf.Variable(tf.ones([M])/10)

W4 = tf.Variable(tf.truncated_normal([7 * 7 * M, N], stddev=0.1))
B4 = tf.Variable(tf.ones([N])/10)

W5 = tf.Variable(tf.truncated_normal([N, 10], stddev=0.1))
B5 = tf.Variable(tf.ones([10])/10)

In [4]:
# we'll use the truncated_normal as it is the recomended random to use for the relu 
# activation function
# The model
stride = 1  # output is 28x28
Y1 = tf.nn.relu(tf.nn.conv2d(X, W1, strides=[1, stride, stride, 1], padding='SAME') + B1)

stride = 2  # output is 14x14
Y2 = tf.nn.relu(tf.nn.conv2d(Y1, W2, strides=[1, stride, stride, 1], padding='SAME') + B2)

stride = 2  # output is 7x7
Y3 = tf.nn.relu(tf.nn.conv2d(Y2, W3, strides=[1, stride, stride, 1], padding='SAME') + B3)

# the output from Y3 is a matrix. The fully connected layer will consume a 1D vector.
# we must unwrap the matrix into an array so that the W4 weight matrix applies the
# weights as it should, one for each output from Y3. 
YY = tf.reshape(Y3, shape=[-1, 7 * 7 * M])

Y4 = tf.nn.relu(tf.matmul(YY, W4) + B4)

Ylogits = tf.matmul(Y4, W5) + B5
Y = tf.nn.softmax(Ylogits)

In [5]:
# cross-entropy loss function (= -sum(Y_i * log(Yi)) ), normalised for batches of 100  images
# TensorFlow provides the softmax_cross_entropy_with_logits function to avoid numerical stability
# problems with log(0) which is NaN
cost = tf.nn.softmax_cross_entropy_with_logits(logits=Ylogits, labels=Y_)
cost = tf.reduce_mean(cost)*100

In [6]:
#learning rate input
lr = tf.placeholder(tf.float32)

max_learning_rate = 0.003
min_learning_rate = 0.0001
decay_speed = 20.0 # 0.003-0.0001-2000=>0.9826 done in 5000 iterations

# learning rate decay
def get_learning_rate(i):
    learning_rate = min_learning_rate + (max_learning_rate - min_learning_rate) * math.exp(-i/decay_speed)
    return learning_rate

In [7]:
# the optimizer taking the learning rate input which will
# be configured at each step and will minimize the corss entropy function
train_step = tf.train.AdamOptimizer(lr).minimize(cost)

In [8]:
# Same as first version:

# % of correct answers found in batch
# based on the Y and Y_ which will be geneated during the runtime of the tests
# we compose this is_correct to compute how right we are during training
is_correct = tf.equal(tf.argmax(Y,1), tf.argmax(Y_,1))

accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))

In [9]:
# we start our trainig session
with tf.Session() as sess:
    tf.global_variables_initializer().run()

    for _ in range(30):
        
        learning_rate = get_learning_rate(_)
        
        print('Epoch', _, 'learning rate', learning_rate)
        
        for i in range(int(mnist.train.num_examples/100)):
            # load a btach of images, thanks to mnist
            batch_X, batch_Y = mnist.train.next_batch(100)
            
            train_data = {X:batch_X, Y_:batch_Y, lr: learning_rate, pkeep: 0.75}
            
            # train the network one time
            sess.run(train_step, feed_dict=train_data)


        # get the accuracy after training
        a, c = sess.run([accuracy, cost], feed_dict=train_data)
        print('Current training accuracy', a, ' current error', c)
        print('Current testing accuracy', accuracy.eval({X:mnist.test.images, Y_:mnist.test.labels, lr: learning_rate, pkeep: 0.75}))

    a, c = sess.run([accuracy, cost], feed_dict=train_data)
    print('Final training accuracy', a)        
    print('Final testing accuracy', accuracy.eval({X:mnist.test.images, Y_:mnist.test.labels, lr: learning_rate, pkeep: 0.75}))

('Epoch', 0, 'learning rate', 0.003)
('Current training accuracy', 0.99000001, ' current error', 7.1498723)
('Current testing accuracy', 0.97939998)
('Epoch', 1, 'learning rate', 0.0028585653310520707)
('Current training accuracy', 0.99000001, ' current error', 3.586096)
('Current testing accuracy', 0.98250002)
('Epoch', 2, 'learning rate', 0.0027240285123042826)
('Current training accuracy', 1.0, ' current error', 0.92810059)
('Current testing accuracy', 0.986)
('Epoch', 3, 'learning rate', 0.0025960531316326675)
('Current training accuracy', 0.99000001, ' current error', 3.1193864)
('Current testing accuracy', 0.98640001)
('Epoch', 4, 'learning rate', 0.0024743191839261473)
('Current training accuracy', 1.0, ' current error', 0.45928714)
('Current testing accuracy', 0.98710001)
('Epoch', 5, 'learning rate', 0.002358522270907074)
('Current training accuracy', 0.98000002, ' current error', 6.9839916)
('Current testing accuracy', 0.9813)
('Epoch', 6, 'learning rate', 0.00224837283997698

KeyboardInterrupt: 