In [1]:
from utils import *

# load data

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("tmp/")

Extracting tmp/train-images-idx3-ubyte.gz
Extracting tmp/train-labels-idx1-ubyte.gz
Extracting tmp/t10k-images-idx3-ubyte.gz
Extracting tmp/t10k-labels-idx1-ubyte.gz


When applying dropout, at each training step, each neuron has a certain probability $p$ of not firing during that step. This helps avoid overfitting during training, as the network cannot rely solely on a small set of neurons when adjusting its weights.

The absence of certain neurons during training means that during testing - when all neurons will be firing - each neuron will receive an unexpectedly strong signal from the lower layer. This is adjusted for by multiplying the total incoming signal by $(1-p)$. Therefore, dropout is a technique where the network needs to know whether or not it is training. 

In [2]:
reset_graph()

n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 50
n_outputs = 10

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")

In [3]:
# a boolean variable to indicate to the network whether or not it's training
training = tf.placeholder_with_default(False, shape=(), name='training')

In [4]:
# dropout rate
dropout_rate = 0.5

# when dropout is applied to the inputs
X_drop = tf.layers.dropout(X, dropout_rate, training=training)

with tf.name_scope("dnn"):
    # dropout applied "between" layers
    hidden1 = tf.layers.dense(X_drop, n_hidden1, activation=tf.nn.relu, name="hidden1")
    hidden1_drop = tf.layers.dropout(hidden1, dropout_rate, training=training)
    hidden2 = tf.layers.dense(hidden1_drop, n_hidden2, activation=tf.nn.relu, name="hidden2")
    hidden2_drop = tf.layers.dropout(hidden2, dropout_rate, training=training)
    logits = tf.layers.dense(hidden2_drop, n_outputs, name="outputs")

In [5]:
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")
    
learning_rate = 0.01
    
with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)
    
init = tf.global_variables_initializer()
saver = tf.train.Saver()
file_writer = tf.summary.FileWriter("to_tensorboard/11_19_dropout", tf.get_default_graph())

In [6]:
n_epochs = 20
batch_size = 50

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            # if training, feed training_variable with True
            sess.run(training_op, feed_dict= {training: True, X:X_batch, y:y_batch})
        accuracy_val = accuracy.eval(feed_dict={X:mnist.validation.images, y:mnist.validation.labels})
        print(epoch, "Validation accuracy:", accuracy_val)
    accuracy_test = accuracy.eval(feed_dict={X:mnist.test.images, y:mnist.test.labels})
    print("Final test accuracy:", accuracy_test)
    
file_writer.close()

0 Validation accuracy: 0.8196
1 Validation accuracy: 0.8702
2 Validation accuracy: 0.8898
3 Validation accuracy: 0.9008
4 Validation accuracy: 0.9136
5 Validation accuracy: 0.9214
6 Validation accuracy: 0.9262
7 Validation accuracy: 0.9296
8 Validation accuracy: 0.9326
9 Validation accuracy: 0.9366
10 Validation accuracy: 0.9366
11 Validation accuracy: 0.9406
12 Validation accuracy: 0.944
13 Validation accuracy: 0.9458
14 Validation accuracy: 0.949
15 Validation accuracy: 0.949
16 Validation accuracy: 0.951
17 Validation accuracy: 0.9536
18 Validation accuracy: 0.9528
19 Validation accuracy: 0.9552
Final test accuracy: 0.9492
