# Credits
Code adapted from: https://github.com/decentralion/tf-dev-summit-tensorboard-tutorial/blob/master/mnist.py
# Hyper parameter search

In [1]:
import os
import os.path
import shutil
import tensorflow as tf

LOGDIR = "./mnist_demo/"
### MNIST EMBEDDINGS ###
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets(train_dir=LOGDIR + "data", one_hot=True)

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  return f(*args, **kwds)


Extracting ./mnist_demo/data/train-images-idx3-ubyte.gz
Extracting ./mnist_demo/data/train-labels-idx1-ubyte.gz
Extracting ./mnist_demo/data/t10k-images-idx3-ubyte.gz
Extracting ./mnist_demo/data/t10k-labels-idx1-ubyte.gz


In [2]:
def conv_layer(input, size_in, size_out, name='conv'):
    with tf.name_scope(name):
        #w = tf.Variable(tf.zeros([5, 5, size_in, size_out]), name='W')
        w = tf.Variable(tf.truncated_normal([5, 5, size_in, size_out], stddev=0.1), name="W")
        #b = tf.Variable(tf.zeros([size_out]), name='B')
        b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B")
        conv = tf.nn.conv2d(input, w, strides=[1, 1, 1, 1], padding="SAME")
        act = tf.nn.relu(conv + b)
        tf.summary.histogram("weights", w)
        tf.summary.histogram("biases", b)
        tf.summary.histogram("activations", act)
        return tf.nn.max_pool(act, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")

def fc_layer(input, size_in, size_out, name='fc'):
    with tf.name_scope(name):
        # w = tf.Variable(tf.zeros([size_in, size_out]), name='W')
        w = tf.Variable(tf.truncated_normal([size_in, size_out], stddev=0.1), name="W")
        # b = tf.Variable(tf.zeros([size_out]), name='B')
        b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B")
        act = tf.matmul(input, w) + b
        tf.summary.histogram("weights", w)
        tf.summary.histogram("biases", b)
        tf.summary.histogram("activations", act)
        return act

In [6]:
def mnist_model(learning_rate, use_two_fc, use_two_conv, hparam):
    tf.reset_default_graph()
    sess = tf.Session()
    
    # Setup placeholders, and reshape the data
    x = tf.placeholder(tf.float32, shape=[None, 784], name="x")
    y = tf.placeholder(tf.float32, shape=[None, 10], name="labels")
    x_image = tf.reshape(x, [-1, 28, 28, 1])
    tf.summary.image('input', x_image, 3)
    # create the network 

    if use_two_conv:
        conv1 = conv_layer(x_image, 1, 32, "conv1")
        conv_out = conv_layer(conv1, 32, 64, "conv2")
    else:
        conv_out = conv_layer(x_image, 1, 16, "conv")

    flattened = tf.reshape(conv_out, [-1, 7 * 7 * 64])


    if use_two_fc:
        fc1 = fc_layer(flattened, 7 * 7 * 64, 1024, "fc1")
        relu = tf.nn.relu(fc1)
        embedding_input = relu
        tf.summary.histogram("fc1/relu", relu)
        embedding_size = 1024
        logits = fc_layer(relu, 1024, 10, "fc2")
    else:
        embedding_input = flattened
        embedding_size = 7*7*64
        logits = fc_layer(flattened, 7*7*64, 10, "fc")
    
    
    #compute cross entropy as our loss function 
    with tf.name_scope("xent"):
        xent = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
                logits=logits, labels=y))
    tf.summary.scalar("xent", xent)

    # use and AdapOptimizer to train the network
    with tf.name_scope("train"):
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(xent)
    
    # compute the accuracy
    with tf.name_scope("accuracy"):
        correct_prediction = tf.equal(tf.argmax(logits,1), tf.argmax(y,1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        tf.summary.scalar("accuracy", accuracy)

    # Initialize all the variables 
    sess.run(tf.global_variables_initializer())
    merged_summary = tf.summary.merge_all()
    writer = tf.summary.FileWriter("./mnist_demo/" + hparam)
    writer.add_graph(sess.graph)
    # Train for 1000 steps
    for i in range(1000):
        batch = mnist.train.next_batch(100)
        # Occasionally report accuracy 
        if i % 50 == 0:
            [train_accuracy, s] = sess.run([accuracy, merged_summary], feed_dict={x: batch[0], y: batch[1]})
            writer.add_summary(s, i)
            print("step %d, training accuracy %g" % (i, train_accuracy))

        # Run the training step
        sess.run(train_step, feed_dict={x: batch[0], y: batch[1]})

In [7]:
def make_hparam_string(learning_rate, use_two_fc, use_two_conv):
    conv_param = "conv=2" if use_two_conv else "conv=1"
    fc_param = "fc=2" if use_two_fc else "fc=1"
    return "lr_%.0E,%s,%s" % (learning_rate, conv_param, fc_param)

In [8]:
# You can try adding some more learning rates
for learning_rate in [1E-3, 1E-4]:
    # Include "False" as a value to try different model architectures
    for use_two_fc in [True]:
        for use_two_conv in [False, True]:
            # Construct a hyperparameter string for each one (example: "lr_1E-3,fc=2,conv=2")
            hparam = make_hparam_string(learning_rate, use_two_fc, use_two_conv)
            print('Starting run for %s' % hparam)

            # Actually run with the new settings
            mnist_model(learning_rate, use_two_fc, use_two_conv, hparam)
print('Done training!')

Starting run for lr_1E-03,conv=1,fc=2
step 0, training accuracy 0.04
step 50, training accuracy 0.95
step 100, training accuracy 0.95
step 150, training accuracy 0.94
step 200, training accuracy 0.92
step 250, training accuracy 0.95
step 300, training accuracy 0.96
step 350, training accuracy 0.99
step 400, training accuracy 0.99
step 450, training accuracy 0.95
step 500, training accuracy 0.95
step 550, training accuracy 0.97
step 600, training accuracy 0.99
step 650, training accuracy 0.98
step 700, training accuracy 0.99
step 750, training accuracy 0.97
step 800, training accuracy 0.97
step 850, training accuracy 1
step 900, training accuracy 0.99
step 950, training accuracy 1
Starting run for lr_1E-03,conv=2,fc=2
step 0, training accuracy 0.1
step 50, training accuracy 0.91
step 100, training accuracy 0.96
step 150, training accuracy 1
step 200, training accuracy 0.95
step 250, training accuracy 0.97
step 300, training accuracy 0.96
step 350, training accuracy 0.93
step 400, traini

KeyboardInterrupt: 

step 0, training accuracy 0.1  
step 500, training accuracy 0.08  
step 1000, training accuracy 0.12


## Let visualize the Tensorflow graph
tensorboard --logdir ./mnist_demo