In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

import tensorflow as tf
old_v = tf.logging.get_verbosity()
tf.logging.set_verbosity(tf.logging.ERROR)

import os

os.environ['KMP_DUPLICATE_LIB_OK']='True'

<h2>Extract MNIST data</h2>
<p style="font-size:20px">You can change the option of one_hot encoding.

In [2]:
from tensorflow.examples.tutorials.mnist import input_data
#get mnist data, with one_hot encoding
mnist = input_data.read_data_sets("MNIST_data/",one_hot=True)
#suppress warnings
tf.logging.set_verbosity(old_v)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


<h2>Define hyperparameters</h2>

In [3]:
# Learning rate
lr = 0.01

# Number of training steps
num_steps = 500

# Number of batch_size
batch_size = 128

# Network parameters
n_hidden_1 = 500
n_hidden_2 = 300
num_input = 784
num_classes = 10

<h2>Define placeholder and Variables</h2>

In [4]:
tf.reset_default_graph()

#tf graph input
X = tf.placeholder(tf.float32,[None,num_input],name='X')
Y = tf.placeholder(tf.int32,[None,num_classes],name='Y')

#Layers weight & bias
weights = {
    'W1': tf.Variable(tf.random_normal([num_input, n_hidden_1]),name='W1'),
    'W2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2]),name='W2'),
    'Wout': tf.Variable(tf.random_normal([n_hidden_2, num_classes]),name='Wout')
}

biases = {
    'b1': tf.Variable(tf.zeros(shape=[n_hidden_1]),name='b1'),
    'b2': tf.Variable(tf.zeros(shape=[n_hidden_2]),name='b2'),
    'bout': tf.Variable(tf.zeros(shape=[num_classes]),name='bout')
}

Instructions for updating:
Colocations handled automatically by placer.


<h2>Define neural network</h2>

In [5]:
#define a neural net model
def neural_net(x):
    layer_1_out = tf.add(tf.matmul(x,weights['W1']),biases['b1'])
    r1 = tf.nn.relu(layer_1_out)
    layer_2_out = tf.add(tf.matmul(r1,weights['W2']),biases['b2'])
    r2 = tf.nn.relu(layer_2_out)
    out = tf.add(tf.matmul(layer_2_out,weights['Wout']),biases['bout'])
    return out

<h2>Define cost function and accuracy</h2>

In [6]:
#predicted labels
logits = neural_net(X)

#define loss
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits,labels=Y),name='loss')
#define optimizer
optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr)
train_op = optimizer.minimize(loss)

#compare the predicted labels with true labels
correct_pred = tf.equal(tf.argmax(logits,1),tf.argmax(Y,1))

#compute the accuracy by taking average
accuracy = tf.reduce_mean(tf.cast(correct_pred,tf.float32),name='accuracy')

#Initialize the variables
init = tf.global_variables_initializer()

<h2>Execute training</h2>

In [7]:
with tf.Session() as sess:
    sess.run(init)
    
    for i in range(num_steps):
        #fetch batch
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        #run optimization
        sess.run(train_op, feed_dict={X:batch_x, Y:batch_y})
        if i % 100 == 0:
            acc = sess.run(accuracy,feed_dict={X:batch_x, Y:batch_y})
            print("step "+str(i)+", Accuracy= {:.3f}".format(acc))
    
    print("Training finished!")
    
    print("Testing Accuracy:", sess.run(accuracy, feed_dict={X:mnist.test.images, Y:mnist.test.labels}))

step 0, Accuracy= 0.219
step 100, Accuracy= 0.914
step 200, Accuracy= 0.898
step 300, Accuracy= 0.922
step 400, Accuracy= 0.969
Training finished!
Testing Accuracy: 0.9186


<h2>Your results</h2>

| n_hidden_1 | n_hidden_2 | batch_size |  lr  | num_steps | Acc.  | Testing Acc.| Activation F. |
|------------|------------|------------|------|-----------|-------|-------------|---------------|
|    300     |     100    |     128    | 0.01 |    500    | 0.91  |    0.87     |     None      |
|    300     |     200    |     128    | 0.01 |    500    | 0.93  |    0.87     |     None      |
|    500     |     300    |     128    | 0.01 |    500    | 0.93  |    0.88     |     None      |
|    500     |     300    |     128    | 0.01 |    500    | 0.90  |    0.88     |     None      |
|    500     |     300    |     128    | 0.01 |    500    | 0.97  |    0.92     |   ReLu (h1)   |
|    500     |     300    |     128    | 0.01 |    500    | 0.93  |    0.89     |   ReLu(h1&h2) |

<p style="font-size:15px">I tried varying all of the network parameters and found that increasing the number of nodes of the first hidden layer to 500 and the second to 300, helped increase the accuracy. Increasing the nodes further, dropped down the accuracy. I also tried modifying the learning rate, but increase or decrease did not help with accuarcy. Then i tried using activation functions on the hidden layer and if we apply ReLu to only the first hiden layer we can increase the accuracy to 97% and when running testing the accuracy was 92%</p>