In [None]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

# import tensorflow
import tensorflow as tf
# import MNIST (MNIST is a dataset containing correctly labeled images of handwritten numbers)
from tensorflow.examples.tutorials.mnist import input_data

In [None]:
# create a variable containing MNIST data
# There's more than 30,000 images here 
# Just imagine the man hours :P
mnist = input_data.read_data_sets("/tmp/data/", one_hot=False)
# Ignore the warnings :P

In [None]:
# set our parameters

# How much a step we take. Higher learning rates means faster learning but less stable performance
learning_rate = 0.001 
# How many images we are training on at every step. 
# Remember this is gradient descent so we are gonna take small steps of just 128 images towards the goal
batch_size = 128
# runn 2000 steps
n_steps = 2000

# How many neurons for each layer
n_inputs = 28 * 28 # its a 28 by 28 pixel image so input is gonna be 28 * 28
n_hidden1 = 300
n_hidden2 = 100
n_output = 10 # output is probabilities of the image being a particular number

In [None]:
# create placeholders
# this is where we will feed in the images and the labels
# x is where we will feed in the image
x = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
# y is where we will feed in the label for the corresponding image
# i.e. if the image is a 3, y is 3
y = tf.placeholder(tf.int64, shape=None, name="y")

In [None]:
# create the neural network
# hidden1 layer takes in our image input (x)
# we will be using the relu activation function
# for more about activation functions:
# https://towardsdatascience.com/activation-functions-and-its-types-which-is-better-a9a5310cc8f
hidden1_layer = tf.layers.dense(x, n_hidden1, name="hidden1", activation=tf.nn.relu)
hidden2_layer = tf.layers.dense(hidden1_layer, n_hidden2, name="hidden2", activation=tf.nn.relu)
output_layer = tf.layers.dense(hidden2_layer, n_output, name="outputs")
# if you get some weird error about reuse, restart the kernel

In [None]:
# training
# instead of using squared error like the 3Blue1Brown video, we use cross entropy error
# cross entropy is much better: https://jamesmccaffrey.wordpress.com/2013/11/05/why-you-should-use-cross-entropy-error-instead-of-classification-error-or-mean-squared-error-for-neural-network-classifier-training/
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=output_layer)
# add up all the error
loss = tf.reduce_mean(cross_entropy, name="loss")

In [None]:
# we will use Adam Optimizer to do all the backpropagation heavy lifting for us
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) 
# create an operator that we can call on to minimize loss
training_op = optimizer.minimize(loss)

In [None]:
# create operators for evaluating the network
with tf.name_scope("eval"):
    # correct variable is the number of correct labels
    correct = tf.nn.in_top_k(output_layer, y, 1)
    # count up the number of correct labels
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [None]:
# we built the network and training stuff
# so now lets start training the network

# creates a tensorflow session data
# the session will keep all of our values and current state of the network
with tf.Session() as sess:
    # first initialize the values in the network
    tf.global_variables_initializer().run()
    for step in range(n_steps):
        # creates a batch of images and labels
        x_batch, y_batch = mnist.train.next_batch(batch_size)
        # feed in the batch of images and batch of labels
        # and run the training operator on them
        sess.run(training_op, feed_dict={x: x_batch, y: y_batch})
        # feed in the test images and the test labels (which the network has never seen)
        # and evaluate the accuracy
        accuracy_val = sess.run(accuracy, feed_dict={x: mnist.test.images, y: mnist.test.labels})
        print(step, "Test accuracy:", accuracy_val)

# Watch Its Accuracy Take Off Like a Rocket!