In [1]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os
from functools import partial
import tensorflow as tf

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "deep"

def save_fig(fig_id, tight_layout=True):
    path = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID, fig_id + ".png")
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format='png', dpi=300)
    
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

n_inputs = 28 * 28
hidden_size=100
n_outputs=10

reset_graph()

he_init = tf.contrib.layers.variance_scaling_initializer()
my_dense_layer = partial(tf.layers.dense,activation=tf.nn.elu,
                        kernel_initializer = he_init)

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")
training = tf.placeholder_with_default(False, shape=(), name='training')

with tf.name_scope("dnn"):
    hidden1 = my_dense_layer(X, hidden_size, name="hidden1")
    bn1 = tf.layers.batch_normalization(hidden1, training=training, momentum=0.9)
    bn1_act = tf.nn.elu(bn1)
    hidden2 = my_dense_layer(bn1_act, hidden_size, name="hidden2")
    bn2 = tf.layers.batch_normalization(hidden2, training=training, momentum=0.9)
    bn2_act = tf.nn.elu(bn2)
    hidden3 = my_dense_layer(bn2_act, hidden_size, name="hidden3")
    bn3 = tf.layers.batch_normalization(hidden3, training=training, momentum=0.9)
    bn3_act = tf.nn.elu(bn3)
    hidden4 = my_dense_layer(bn3_act, hidden_size, name="hidden4")
    bn4 = tf.layers.batch_normalization(hidden4, training=training, momentum=0.9)
    bn4_act = tf.nn.elu(bn4)
    hidden5 = my_dense_layer(bn4_act, hidden_size, name="hidden5")
    bn5 = tf.layers.batch_normalization(hidden5, training=training, momentum=0.9)
    bn5_act = tf.nn.elu(bn5)
    logits_before_bn = tf.layers.dense(bn5_act, n_outputs, name = "outputs")
    logits = tf.layers.batch_normalization(logits_before_bn, training=training,
                                          momentum=0.9)
    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    
with tf.name_scope("train"):
    optimizer = tf.train.AdamOptimizer()
    training_op = optimizer.minimize(loss)

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
sess = tf.InteractiveSession()

with tf.name_scope('summary'):
    tf.summary.scalar('loss', loss)
    merged = tf.summary.merge_all()
    writer = tf.summary.FileWriter('./logs', sess.graph)

init= tf.global_variables_initializer()
saver = tf.train.Saver()

n_epochs = 1000
batch_size = 20

X_train1 = mnist.train.images[mnist.train.labels < 5]
y_train1 = mnist.train.labels[mnist.train.labels < 5]
X_valid1 = mnist.validation.images[mnist.validation.labels < 5]
y_valid1 = mnist.validation.labels[mnist.validation.labels < 5]
X_test1 = mnist.test.images[mnist.test.labels < 5]
y_test1 = mnist.test.labels[mnist.test.labels < 5]

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train1))
        for rnd_indices in np.array_split(rnd_idx, len(X_train1) // batch_size):
            X_batch, y_batch = X_train1[rnd_indices], y_train1[rnd_indices]
            sess.run(training_op, {X:X_batch, y:y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X:X_valid1, y:y_valid1})
        if loss_val < best_loss:
            save_path = saver.save(sess,  "./my_mnist_model_0_to_4.ckpt")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    saver.restore(sess, "./my_mnist_model_0_to_4.ckpt")
    acc_test = accuracy.eval(feed_dict={X: X_test1, y: y_test1})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting /tmp/data/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
0	Validation loss: 0.053278	Best loss: 0.053278	Accuracy: 98.36%
1	Validation loss: 0.043999	Best loss: 0.043999	Accuracy: 98.55%
2	Validation loss: 0.066032	Best loss: 0.043999	Accuracy: 98.28%
3	Validation loss: 0.030823	Best loss: 0.030823	Accuracy: 99.02%
4	Validation loss: 0.030794	Best loss: 0.030794	Accuracy: 99.02%
5	Validation loss: 0.033374	Best loss: 0.030794	Accuracy: 99.18%
6	Validation loss: 0.040358	Best loss: 0.030794	Accuracy: 98.94%
7	Validation loss: 0.034893	Best loss: 0.030794	Accuracy: 99.45%
8	Validation loss: 0.039098	Best 