In [1]:
# looking at https://www.youtube.com/watch?v=u4alGiomYP4&t=1832s
# this is the third version, adding dropout via tensorflow implementation
# which achieves 98.4% accuracy on previous step 

# in this 'improvement' we get 97% only, but this will allow us to learn
# more complex problems. If we care about rotation, scale and the stuch
# then this mgiht be a helpful addition

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

# we need math for learning rate calculation
import math

# one_hot = True means we represent the data in a vector way, see below
mnist = input_data.read_data_sets('/tmp/data', one_hot=True)

# random seeding 0 means we will get the same random numbers each time we run the program
tf.set_random_seed(0)

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [9]:
# Probability of keeping a node during dropout = 1.0 at test time (no dropout) and 0.75 at training time
pkeep = tf.placeholder(tf.float32, name='dropout_rate')

In [10]:
# we'll use the truncated_normal as it is the recomended random to use for the relu 
# activation function
X = tf.placeholder(tf.float32, [None, 784], 'image')


# first layer - relu activation
W1 = tf.Variable(tf.truncated_normal([784,200], stddev=0.1), 'l1_weights')
B1 = tf.Variable(tf.truncated_normal([200], stddev=0.1), 'l1_baiases')
Y1 = tf.nn.relu(tf.add(tf.matmul(X, W1), B1))
Y1d = tf.nn.dropout(Y1, pkeep)


# second - relu activation
W2 = tf.Variable(tf.truncated_normal([200,100], stddev=0.1), 'l2_weights')
B2 = tf.Variable(tf.truncated_normal([100], stddev=0.1), 'l2_baiases')
Y2 = tf.nn.relu(tf.add(tf.matmul(Y1d, W2), B2))
Y2d = tf.nn.dropout(Y2, pkeep)


# hird - relu activation
W3 = tf.Variable(tf.truncated_normal([100,60], stddev=0.1), 'l3_weights')
B3 = tf.Variable(tf.truncated_normal([60], stddev=0.1), 'l3_baiases')
Y3 = tf.nn.relu(tf.add(tf.matmul(Y2d, W3), B3))
Y3d = tf.nn.dropout(Y3, pkeep)


# forth - relu activation
W4 = tf.Variable(tf.truncated_normal([60,30], stddev=0.1), 'l4_weights')
B4 = tf.Variable(tf.truncated_normal([30], stddev=0.1), 'l4_baiases')
Y4 = tf.nn.relu(tf.matmul(Y3d, W4) + B4)
Y4d = tf.nn.dropout(Y4, pkeep)

# last (output one) - softmax activation
W5 = tf.Variable(tf.truncated_normal([30,10], stddev=0.1), 'output_weights')
B5 = tf.Variable(tf.truncated_normal([10], stddev=0.1), 'output_baiases')
# this was the mistake I made in the previous improvement.
# TODO think of why I need to feed YLogits in the cost functiojn
Ylogits = tf.matmul(Y4, W5) + B5
Y = tf.nn.softmax(Ylogits)


# we delcare the expected correct answers placeholder which will be used to provide the expected results
# for each set of images. This will be used in the flow to calculate the error value
Y_ = tf.placeholder(tf.float32, shape=[None, 10], name='expected_value')

In [11]:
# cross-entropy loss function (= -sum(Y_i * log(Yi)) ), normalised for batches of 100  images
# TensorFlow provides the softmax_cross_entropy_with_logits function to avoid numerical stability
# problems with log(0) which is NaN
cost = tf.nn.softmax_cross_entropy_with_logits(logits=Ylogits, labels=Y_)
cost = tf.reduce_mean(cost)*100

In [12]:
#learning rate input
lr = tf.placeholder(tf.float32)

max_learning_rate = 0.003
min_learning_rate = 0.0001
decay_speed = 20.0 # 0.003-0.0001-2000=>0.9826 done in 5000 iterations

# learning rate decay
def get_learning_rate(i):
    learning_rate = min_learning_rate + (max_learning_rate - min_learning_rate) * math.exp(-i/decay_speed)
    return learning_rate

In [13]:
# the optimizer taking the learning rate input which will
# be configured at each step and will minimize the corss entropy function
train_step = tf.train.AdamOptimizer(lr).minimize(cost)

In [14]:
# Same as first version:

# % of correct answers found in batch
# based on the Y and Y_ which will be geneated during the runtime of the tests
# we compose this is_correct to compute how right we are during training
is_correct = tf.equal(tf.argmax(Y,1), tf.argmax(Y_,1))

accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))

In [16]:
# we start our trainig session
with tf.Session() as sess:
    tf.global_variables_initializer().run()

    for _ in range(30):
        
        learning_rate = get_learning_rate(_)
        
        print('Epoch', _, 'learning rate', learning_rate)
        
        for i in range(int(mnist.train.num_examples/100)):
            # load a btach of images, thanks to mnist
            batch_X, batch_Y = mnist.train.next_batch(100)
            
            train_data = {X:batch_X, Y_:batch_Y, lr: learning_rate, pkeep: 0.75}
            
            # train the network one time
            sess.run(train_step, feed_dict=train_data)


        # get the accuracy after training
        a, c = sess.run([accuracy, cost], feed_dict=train_data)
        print('Current training accuracy', a, ' current error', c)
        print('Current testing accuracy', accuracy.eval({X:mnist.test.images, Y_:mnist.test.labels, lr: learning_rate, pkeep: 0.75}))

    a, c = sess.run([accuracy, cost], feed_dict=train_data)
    print('Final training accuracy', a)        
    print('Final testing accuracy', accuracy.eval({X:mnist.test.images, Y_:mnist.test.labels, lr: learning_rate, pkeep: 0.75}))

('Epoch', 0, 'learning rate', 0.003)
('Current training accuracy', 0.92000002, ' current error', 27.197239)
('Current testing accuracy', 0.93940002)
('Epoch', 1, 'learning rate', 0.0028585653310520707)
('Current training accuracy', 0.94999999, ' current error', 17.574772)
('Current testing accuracy', 0.94809997)
('Epoch', 2, 'learning rate', 0.0027240285123042826)
('Current training accuracy', 0.98000002, ' current error', 7.4490466)
('Current testing accuracy', 0.95840001)
('Epoch', 3, 'learning rate', 0.0025960531316326675)
('Current training accuracy', 0.94999999, ' current error', 15.699354)
('Current testing accuracy', 0.96060002)
('Epoch', 4, 'learning rate', 0.0024743191839261473)
('Current training accuracy', 0.95999998, ' current error', 9.6945591)
('Current testing accuracy', 0.96530002)
('Epoch', 5, 'learning rate', 0.002358522270907074)
('Current training accuracy', 0.98000002, ' current error', 5.834671)
('Current testing accuracy', 0.96600002)
('Epoch', 6, 'learning rate'