Importing Essential Libraries and the dataset

In [1]:
import os
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

  return f(*args, **kwds)


In [2]:
LOGDIR = "MNIST_data/"
mnist = input_data.read_data_sets(LOGDIR + "data", one_hot=True)

Extracting MNIST_data/data/train-images-idx3-ubyte.gz
Extracting MNIST_data/data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/data/t10k-labels-idx1-ubyte.gz


## Building Layers

Convolutional Layer

In [3]:
def conv_layer(input, size_in, size_out, name="conv"):
    with tf.name_scope(name):
        w = tf.Variable(tf.truncated_normal([5, 5, size_in, size_out], stddev=0.1), name="W")
        b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B")
        conv = tf.nn.conv2d(input, w, strides=[1, 1, 1, 1], padding="SAME")
        act = tf.nn.relu(conv + b)
        return tf.nn.max_pool(act, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")

Fully Connected Layer 

In [4]:
def fc_layer(input, size_in, size_out, name="fc"):
    with tf.name_scope(name):
        w = tf.Variable(tf.truncated_normal([size_in, size_out], stddev=0.1), name="W")
        b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B")
        act = tf.matmul(input, w) + b
        
        return act

## Building the model

In [5]:
def mnist_model_1(learning_rate):
    tf.reset_default_graph()
    sess = tf.Session()
    
    x = tf.placeholder(tf.float32, shape=[None, 784], name="x")
    x_image = tf.reshape(x, [-1, 28, 28, 1])
    y = tf.placeholder(tf.float32, shape=[None, 10], name="labels")
    
    conv1 = conv_layer(x_image, 1, 32, "conv1")
    conv2 = conv_layer(conv1, 32, 64, "conv2")
    
    flatten = tf.reshape(conv2, [-1, 7 * 7 * 64])
    fc1 = fc_layer(flatten, 7 * 7 * 64, 1024, "fc1")
    logits = fc_layer(fc1, 1024, 10, "fc2")
    
    with tf.name_scope("cost"):
        loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y)
        cost = tf.reduce_mean(loss, name="cost")
        
    with tf.name_scope("train"):
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(cost)
        
    with tf.name_scope("accuracy"):
        correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        
    sess.run(tf.global_variables_initializer())
    writer = tf.summary.FileWriter("MNIST_data/demo2") #Writes the tf.graph into DIR
    writer.add_graph(sess.graph)
    
    for i in range(2000):
        if i % 10 == 0:
            print(i)
        batch = mnist.train.next_batch(100)
        if i % 500 == 0:
            [train_accuracy] = sess.run([accuracy], feed_dict={x: batch[0], y: batch[1]})
            print("Step {0}, TrainAccuracy = {1}".format(i, train_accuracy))
        
        sess.run(train_step, feed_dict={x: batch[0], y: batch[1]})

### Visualize the graph on Tensorboard

`tf.summary.FileWriter` is the python class that writes data for tensorboard

`writer = tf.summary.FileWriter(DIR)
writer.add_graph(sess.graph)`

In [None]:
def main():
    mnist_model_1(learning_rate = 1e-3)
    
if __name__ == '__main__':
    main()

## Tensorboard Summaries

**summary()** is a tensorflow op that output protocol buffers containing *summarized data*.
There are 4 types of summaries available:
1. `tf.summary.scalar` : Outputs Scalar Summaries
2. `tf.summary.image` : Outputs Image Summaries
3. `tf.summary.audio` : Outputs Audio Summaries
4. `tf.summary.histogram` : Outputs Histogram Summaries

## Building the model with Summaries

### As the first step, Scalar Summaries are considered.

Let's modify our mnist_model to display scalar summaries

for an example, We want to add scalar summary to cost. This is how to do it,
`tf.summary.scalar("cost", cost)`

After adding summaries, They should merge. To merge all summaries collected in the default graph, Following line of code is needed to be added.
`summ = tf.summary.merge_all()`

Next,
`writer.add_summary()` is used to add summaries to Writer

In [6]:
def mnist_model_2(learning_rate):
    tf.reset_default_graph()
    sess = tf.Session()
    
    x = tf.placeholder(tf.float32, shape=[None, 784], name="x")
    x_image = tf.reshape(x, [-1, 28, 28, 1])
    y = tf.placeholder(tf.float32, shape=[None, 10], name="labels")
    
    conv1 = conv_layer(x_image, 1, 32, "conv1")
    conv2 = conv_layer(conv1, 32, 64, "conv2")
    
    flatten = tf.reshape(conv2, [-1, 7 * 7 * 64])
    fc1 = fc_layer(flatten, 7 * 7 * 64, 1024, "fc1")
    logits = fc_layer(fc1, 1024, 10, "fc2")
    
    with tf.name_scope("cost"):
        loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y)
        cost = tf.reduce_mean(loss, name="cost")
        ## NEWLY ADDED
        tf.summary.scalar("cost", cost) # scalar Summary for cost
        
    with tf.name_scope("train"):
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(cost)
        
    with tf.name_scope("accuracy"):
        correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        ## NEWLY ADDED
        tf.summary.scalar("accuracy", accuracy) # Scalar Summary for accuracy
        
    ## NEWLY ADDED
    summ = tf.summary.merge_all() # Merging all summaries collected in the default graph
    
    sess.run(tf.global_variables_initializer())
    writer = tf.summary.FileWriter("MNIST_data/demo2") #Writes the tf.graph into DIR
    writer.add_graph(sess.graph)
    
    for i in range(2000):
        batch = mnist.train.next_batch(100)
        if i % 5 == 0:
            [train_accuracy, s] = sess.run([accuracy, summ], feed_dict={x: batch[0], y: batch[1]})
            writer.add_summary(s, i)
        if i % 500 == 0:
            [train_accuracy] = sess.run([accuracy], feed_dict={x: batch[0], y: batch[1]})
            print("Step {0}, TrainAccuracy = {1}".format(i, train_accuracy))
        
        sess.run(train_step, feed_dict={x: batch[0], y: batch[1]})

In [7]:
def main():
    mnist_model_2(learning_rate = 1e-2)
    
if __name__ == '__main__':
    main()

Step 0, TrainAccuracy = 0.07000000029802322
Step 500, TrainAccuracy = 0.949999988079071
Step 1000, TrainAccuracy = 0.9399999976158142
Step 1500, TrainAccuracy = 0.9800000190734863


## Now let's work with histograms
To do that, Conv and FC layer defining functions are need to be modified.
So, Modified functions as follows.

In [8]:
def conv_layer(input, size_in, size_out, name="conv"):
    with tf.name_scope(name):
        w = tf.Variable(tf.truncated_normal([5, 5, size_in, size_out], stddev=0.1), name="W")
        b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B")
        conv = tf.nn.conv2d(input, w, strides=[1, 1, 1, 1], padding="SAME")
        act = tf.nn.relu(conv + b)
        tf.summary.histogram("Weights", w)
        tf.summary.histogram("Biases", b)
        tf.summary.histogram("activations", act)
        return tf.nn.max_pool(act, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
    
def fc_layer(input, size_in, size_out, name="fc"):
    with tf.name_scope(name):
        w = tf.Variable(tf.truncated_normal([size_in, size_out], stddev=0.1), name="W")
        b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B")
        act = tf.matmul(input, w) + b
        tf.summary.histogram("Weights", w)
        tf.summary.histogram("Biases", b)
        tf.summary.histogram("activations", act)
        return act

In [11]:
def mnist_model_3(learning_rate, DIR):
    tf.reset_default_graph()
    sess = tf.Session()
    
    x = tf.placeholder(tf.float32, shape=[None, 784], name="x")
    x_image = tf.reshape(x, [-1, 28, 28, 1])
    y = tf.placeholder(tf.float32, shape=[None, 10], name="labels")
    
    conv1 = conv_layer(x_image, 1, 32, "conv1")
    conv2 = conv_layer(conv1, 32, 64, "conv2")
    
    flatten = tf.reshape(conv2, [-1, 7 * 7 * 64])
    fc1 = fc_layer(flatten, 7 * 7 * 64, 1024, "fc1")
    logits = fc_layer(fc1, 1024, 10, "fc2")
    
    with tf.name_scope("cost"):
        loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y)
        cost = tf.reduce_mean(loss, name="cost")
        ## NEWLY ADDED
        tf.summary.scalar("cost", cost) # scalar Summary for cost
        
    with tf.name_scope("train"):
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(cost)
        
    with tf.name_scope("accuracy"):
        correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        ## NEWLY ADDED
        tf.summary.scalar("accuracy", accuracy) # Scalar Summary for accuracy
        
    ## NEWLY ADDED
    summ = tf.summary.merge_all() # Merging all summaries collected in the default graph
    
    sess.run(tf.global_variables_initializer())
    writer = tf.summary.FileWriter(DIR) #Writes the tf.graph into DIR
    writer.add_graph(sess.graph)
    
    for i in range(2001):
        batch = mnist.train.next_batch(100)
        if i % 5 == 0:
            [train_accuracy, s] = sess.run([accuracy, summ], feed_dict={x: batch[0], y: batch[1]})
            writer.add_summary(s, i)
        if i % 500 == 0:
            [train_accuracy] = sess.run([accuracy], feed_dict={x: batch[0], y: batch[1]})
            print("Step {0}, TrainAccuracy = {1}".format(i, train_accuracy))
        
        sess.run(train_step, feed_dict={x: batch[0], y: batch[1]})

### Let's do a little trick here. We will iterate 2 learning rates.

In [12]:
def main():
    for lr in [1E-3, 1E-4]:
        print("Training with learning rate = ",lr)
        directory = "MNIST_data/demo3/" + str(lr)
        mnist_model_3(learning_rate = lr, DIR = directory)
        
if __name__ == '__main__':
    main()

Training with learning rate =  0.001
Step 0, TrainAccuracy = 0.07999999821186066
Step 500, TrainAccuracy = 0.9800000190734863
Step 1000, TrainAccuracy = 0.9800000190734863
Step 1500, TrainAccuracy = 0.9800000190734863
Step 2000, TrainAccuracy = 1.0
Training with learning rate =  0.0001
Step 0, TrainAccuracy = 0.05999999865889549
Step 500, TrainAccuracy = 0.9700000286102295
Step 1000, TrainAccuracy = 0.9399999976158142
Step 1500, TrainAccuracy = 0.9900000095367432
Step 2000, TrainAccuracy = 0.9599999785423279
