<h3>The MNIST data</h3>

In [2]:
mnist = tf.keras.datasets.mnist

In [3]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

In [4]:
import numpy as np

In [5]:
x_train.shape

(60000, 28, 28)

In [6]:
x_train = x_train.reshape(-1,784)
x_test = x_test.reshape(-1,784)

In [7]:
import pandas as pd
y_train = pd.get_dummies(y_train).values
y_test = pd.get_dummies(y_test).values

In [8]:
y_train.shape, y_test.shape

((60000, 10), (10000, 10))

In [9]:
x_test.shape

(10000, 784)

In [10]:
y_train.shape

(60000, 10)

<h3>Initial parameters</h3>

Create general parameters for the model

In [11]:
width = 28 # width of the image in pixels 
height = 28 # height of the image in pixels
flat = width * height # number of pixels in one image 
class_output = 10 # number of possible classifications for the problem

<h3>Input and output</h3>

Create place holders for inputs and outputs

In [16]:
graph = tf.Graph()

with graph.as_default():
    
    x  = tf.compat.v1.placeholder(tf.float32, shape=[None, flat])
    y_ = tf.compat.v1.placeholder(tf.float32, shape=[None, class_output])
    
    x_image = tf.reshape(x, [-1,28,28,1])  

    W_conv1 = tf.Variable(tf.compat.v1.truncated_normal([5, 5, 1, 32], stddev=0.1))
    b_conv1 = tf.Variable(tf.constant(0.1, shape=[32])) # need 32 biases for 32 outputs

    convolve1= tf.nn.conv2d(x_image, W_conv1, strides=[1, 1, 1, 1], padding='SAME') + b_conv1
    
    h_conv1 = tf.nn.relu(convolve1)
    
    conv1 = tf.nn.max_pool(h_conv1, ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding='SAME') #max_pool_2x2
    
    W_conv2 = tf.Variable(tf.compat.v1.truncated_normal([5, 5, 32, 64], stddev=0.1))
    b_conv2 = tf.Variable(tf.constant(0.1, shape=[64])) #need 64 biases for 64 outputs
    
    convolve2= tf.nn.conv2d(conv1, W_conv2, strides=[1, 2, 2, 1], padding='SAME') + b_conv2
    
    h_conv2 = tf.nn.relu(convolve2)
    
    conv2 = tf.nn.max_pool(h_conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') #max_pool_2x2

    layer2_matrix = tf.reshape(conv2, [-1, 7 * 7 * 64])
    
    W_fc1 = tf.Variable(tf.compat.v1.truncated_normal([7 * 7 * 64, 1024], stddev=0.1))
    b_fc1 = tf.Variable(tf.constant(0.1, shape=[1024])) # need 1024 biases for 1024 outputs
    
    fcl = tf.matmul(layer2_matrix, W_fc1) + b_fc1
    
    h_fc1 = tf.nn.relu(fcl)
    
    keep_prob = tf.compat.v1.placeholder(tf.float32)
    layer_drop = tf.nn.dropout(h_fc1, keep_prob)

    W_fc2 = tf.Variable(tf.compat.v1.truncated_normal([1024, 10], stddev=0.1)) #1024 neurons
    b_fc2 = tf.Variable(tf.constant(0.1, shape=[10])) # 10 possibilities for digits [0,1,2,3,4,5,6,7,8,9]
    
    fc=tf.matmul(layer_drop, W_fc2) + b_fc2
    
    y_CNN= tf.nn.softmax(fc)
    
    cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.compat.v1.log(y_CNN), axis=1))
    
    train_step = tf.compat.v1.train.AdamOptimizer(1e-4).minimize(cross_entropy)
    
    correct_prediction = tf.equal(tf.argmax(y_CNN, 1), tf.argmax(y_, 1))
    
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    init_op = tf.compat.v1.global_variables_initializer()

In [17]:
sess = tf.compat.v1.Session(graph=graph)
sess.run(init_op)

In [18]:
x_train.shape

(60000, 784)

In [None]:
from tqdm import tqdm
j=0
for i in tqdm(range(200)):
#     print(i)
    if j > x_train.shape[0]:
        j = 0
    tr_x = x_train[j:j+5000]
    tr_y = y_train[j:j+5000]
    j+=5000
    if i%100 == 0:
        train_accuracy = sess.run(accuracy, feed_dict={
            x:tr_x, y_: tr_y, keep_prob: 1.0})
        print("step %d, training accuracy %g"%(i, train_accuracy))
    sess.run(train_step, feed_dict={x: tr_x, y_: tr_y, keep_prob: 0.5})

  0%|          | 0/200 [00:00<?, ?it/s]

step 0, training accuracy 0.0958


  6%|▌         | 11/200 [02:41<44:10, 14.02s/it] 

<a id="ref9"></a>
<h2>Evaluate the model</h2>

Print the evaluation to the user

In [None]:
# evaluate in batches to avoid out-of-memory issues
n_batches = mnist.test.images.shape[0] // 50
cumulative_accuracy = 0.0
for index in range(n_batches):
    batch = mnist.test.next_batch(50)
    cumulative_accuracy += accuracy.eval(feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0})
print("test accuracy {}".format(cumulative_accuracy / n_batches))