In [1]:
from utils import *

# load data

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("tmp/")

Extracting tmp/train-images-idx3-ubyte.gz
Extracting tmp/train-labels-idx1-ubyte.gz
Extracting tmp/t10k-images-idx3-ubyte.gz
Extracting tmp/t10k-labels-idx1-ubyte.gz


Here, we make a small, simple MNIST neural network and implement exponential scheduling using existing tensorflow tools. 

In exponential scheduling, the learning rate at a given iteration is a function of the iteration number. Specifically:

$$\eta(t) = \eta_0 d^{-t/r}$$

Where $\eta_0$ is the initial learning rate, $t$ is the iteration index, $d$ is the decay rate, and $r$ represent decay steps. In $r$ steps of $t$, the learning rate decreases by a factor of $d$.

In [2]:
reset_graph()

n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 50
n_outputs = 10

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.relu, name="hidden1")
    hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, name="hidden2")
    logits = tf.layers.dense(hidden2, n_outputs, name="outputs")
    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")    

In [3]:
with tf.name_scope("train"):
    # set the parameters for exponential decay
    initial_learning_rate = 0.1
    decay_steps = 10000
    # decay rate is a reciprical in tf function, equivalent here to d = 10
    decay_rate = 1/10
    # keep track of every step using variable
    global_step = tf.Variable(0, trainable=False, name="global_step")
    # use tf.train_exponential_decay
    learning_rate = tf.train.exponential_decay(initial_learning_rate,
                                              global_step,
                                              decay_steps,
                                              decay_rate)
    # pass exponential_decay object to optimizer
    optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9)
    # pass global step to minimize op; it'll increment
    training_op = optimizer.minimize(loss, global_step=global_step)

In [4]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()
file_writer = tf.summary.FileWriter("to_tensorboard/11_14_scheduling", tf.get_default_graph())

In [5]:
n_epochs = 5
batch_size = 50

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict={X:mnist.validation.images, y:mnist.validation.labels})
        print(epoch, "Validation accuracy:", accuracy_val)
    test_val = accuracy.eval(feed_dict={X:mnist.test.images, y:mnist.test.labels})
    print("Test accuracy:", test_val)

0 Validation accuracy: 0.9624
1 Validation accuracy: 0.9672
2 Validation accuracy: 0.9744
3 Validation accuracy: 0.9836
4 Validation accuracy: 0.9794
Test accuracy: 0.9792


In [6]:
file_writer.close()