In [1]:
from utils import *

In [2]:
# load data

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("tmp/")

Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting tmp/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting tmp/train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting tmp/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting tmp/t10k-labels-idx1-ubyte.gz


With Batch Normalization we saw how extra ops were added when batch normalization layers put into the graph. We have to evaluate these ops during execution. Another approach is to use `tf.control_dependencies` during the construction phase. By doing this, you can designate that certain ops must be called before other ops.

In [None]:
from functools import partial

reset_graph()

n_inputs = 28*28
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

learning_rate = 0.01
batch_norm_momentum = 0.9

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")
training = tf.placeholder_with_default(False, shape=(), name='training')

with tf.name_scope("dnn"):
    he_init = tf.contrib.layers.variance_scaling_initializer()

    my_batch_norm_layer = partial(tf.layers.batch_normalization, training=training, momentum=batch_norm_momentum)
    my_dense_layer = partial(tf.layers.dense, kernel_initializer=he_init)
    
    hidden1 = my_dense_layer(X, n_hidden1, name="hidden1")
    bn1 = tf.nn.elu(my_batch_norm_layer(hidden1))
    hidden2 = my_dense_layer(bn1, n_hidden2, name="hidden2")
    bn2 = tf.nn.elu(my_batch_norm_layer(hidden2))
    logits_before_bn = my_dense_layer(bn2, n_outputs, name="outputs")
    logits = my_batch_norm_layer(logits_before_bn)

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    
with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    # get update ops here instead of in execution
    extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    # use with to indicate that training_op depends on extra_update_ops
    # e.g. extra_upate_ops must execute first
    with tf.control_dependencies(extra_update_ops):
        training_op = optimizer.minimize(loss)
    
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y , 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
init = tf.global_variables_initializer()

In [None]:
# execution

n_epochs = 40
batch_size = 50

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            # don't have to evaluate extra_update_ops due to dependency
            sess.run(training_op, feed_dict={training: True, X:X_batch, y:y_batch})
        if epoch % 5 == 0 or epoch == n_epochs - 1:
            acc_train = accuracy.eval(feed_dict={X:mnist.train.images, y:mnist.train.labels})
            acc_val = accuracy.eval(feed_dict={X:mnist.validation.images, y:mnist.validation.labels})
            print(epoch, "train acc:", acc_train, "val acc:", acc_val)
    acc_test = accuracy.eval(feed_dict={X:mnist.test.images, y:mnist.test.labels})
    print("Test acc:", acc_test)

0 train acc: 0.912382 val acc: 0.9166
5 train acc: 0.970945 val acc: 0.9664
10 train acc: 0.984364 val acc: 0.9744
15 train acc: 0.990418 val acc: 0.9768
20 train acc: 0.992727 val acc: 0.9774
25 train acc: 0.996 val acc: 0.9786
30 train acc: 0.996218 val acc: 0.9784
