In [42]:
import tensorflow as tf

In [43]:
from tensorflow.examples.tutorials.mnist import input_data

In [44]:
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)  # y labels are Onehot-encoded

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


# About MNIST dataset

To represent the actual images themselves, the 28x28 pixels are flattened into a 1D vector which is 784 pixels in size. 
Each of the 784 pixels making up the image is stored as a value between 0 and 255. This determines the grayscale of the pixel,
as our images are presented in black and white only. So a black pixel is represented by 255, and a white pixel by 0, with the
various shades of gray somewhere in between.

We can use the mnist variable to find out the size of the dataset we have just imported. Looking at the num_examples for each 
of the three subsets, we can determine that the dataset has been split into 55,000 images for training, 5000 for validation, 
and 10,000 for testing. Add the following lines to your file:

In [49]:
n_train = mnist.train.num_examples  # 55,000
n_validation = mnist.validation.num_examples  # 5000
n_test = mnist.test.num_examples  # 10,000

In [25]:
n_input = 784  
n_hidden1 = 512
n_hidden2 = 256
n_hidden3 = 128
n_output = 10  # output layer (0-9 digits)

In [26]:
# Hyper parameters

learning_rate = 1e-4
n_iterations = 1000
batch_size = 128
dropout = 0.5

In [27]:
X = tf.placeholder("float", [None, n_input]) #Rank =2 shape =55000*784
Y = tf.placeholder("float", [None, n_output]) #Rank-2 shape = 55000*1
keep_prob = tf.placeholder(tf.float32)

# about the input data shape

For X we use a shape of [None, 784], where None represents any amount, as we will be feeding in an undefined number of 
784-pixel images. The shape of Y is [None, 10] as we will be using it for an undefined number of label outputs, with 10 
possible classes.

The parameters that the network will update in the training process are the weight and bias values, so for these we need to set an initial value rather than an empty placeholder. These values are essentially where the network does its learning, as they are used in the activation functions of the neurons, representing the strength of the connections between units.

In [28]:
weights = {
    'w1': tf.Variable(tf.truncated_normal([n_input, n_hidden1], stddev=0.1)),
    'w2': tf.Variable(tf.truncated_normal([n_hidden1, n_hidden2], stddev=0.1)),
    'w3': tf.Variable(tf.truncated_normal([n_hidden2, n_hidden3], stddev=0.1)),
    'out': tf.Variable(tf.truncated_normal([n_hidden3, n_output], stddev=0.1)),
}

In [29]:

biases = {
    'b1': tf.Variable(tf.constant(0.1, shape=[n_hidden1])),
    'b2': tf.Variable(tf.constant(0.1, shape=[n_hidden2])),
    'b3': tf.Variable(tf.constant(0.1, shape=[n_hidden3])),
    'out': tf.Variable(tf.constant(0.1, shape=[n_output]))
}

In [30]:
layer_1 = tf.add(tf.matmul(X, weights['w1']), biases['b1'])
layer_2 = tf.add(tf.matmul(layer_1, weights['w2']), biases['b2'])
layer_3 = tf.add(tf.matmul(layer_2, weights['w3']), biases['b3'])
#50% of the neuron with dropout using keep_prob parameter
layer_drop = tf.nn.dropout(layer_3, keep_prob)
output_layer = tf.matmul(layer_3, weights['out']) + biases['out']

In [36]:
cross_entropy = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(
        labels=Y, logits=output_layer
        ))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

In [37]:
correct_pred = tf.equal(tf.argmax(output_layer, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [38]:
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

In [39]:
for i in range(n_iterations):
    batch_x, batch_y = mnist.train.next_batch(batch_size)
    sess.run(train_step, feed_dict={
        X: batch_x, Y: batch_y, keep_prob: dropout
        })

    # printing loss and accuracy (per minibatch)
    if i % 100 == 0:
        minibatch_loss, minibatch_accuracy = sess.run(
            [cross_entropy, accuracy],
            feed_dict={X: batch_x, Y: batch_y, keep_prob: 1.0}
            )
        print("Iteration",str(i),"\t| Loss =",str(minibatch_loss),"\t| Accuracy =",str(minibatch_accuracy))

Iteration 0 	| Loss = 2.9534502 	| Accuracy = 0.171875
Iteration 100 	| Loss = 0.42810452 	| Accuracy = 0.8671875
Iteration 200 	| Loss = 0.47735146 	| Accuracy = 0.875
Iteration 300 	| Loss = 0.40716785 	| Accuracy = 0.890625
Iteration 400 	| Loss = 0.3635945 	| Accuracy = 0.90625
Iteration 500 	| Loss = 0.3687181 	| Accuracy = 0.8984375
Iteration 600 	| Loss = 0.37390578 	| Accuracy = 0.890625
Iteration 700 	| Loss = 0.17071684 	| Accuracy = 0.9375
Iteration 800 	| Loss = 0.24327417 	| Accuracy = 0.921875
Iteration 900 	| Loss = 0.2234478 	| Accuracy = 0.9375


In [40]:
test_accuracy = sess.run(accuracy, feed_dict={X: mnist.test.images, Y: mnist.test.labels, keep_prob: 1.0})
print("\nAccuracy on test set:", test_accuracy)


Accuracy on test set: 0.9184
