## Imports

In [1]:
from numpy.random import seed
seed(888)
import tensorflow
tensorflow.random.set_seed(404)

In [2]:
import os
import numpy as np
# import tensorflow as tf

import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

from time import strftime

Instructions for updating:
non-resource variables are not supported in the long term


## Constants

In [3]:
X_TRAIN_PATH = 'MNIST/digit_xtrain.csv'
X_TEST_PATH = 'MNIST/digit_xtest.csv'
Y_TRAIN_PATH = 'MNIST/digit_ytrain.csv'
Y_TEST_PATH = 'MNIST/digit_ytest.csv'

LOGGING_PATH = '/tensorboard_mnist_digit_logs'

NR_CLASSES = 10
VALIDATION_SIZE = 10000
IMAGE_WIDTH = 28
IMAGE_HEIGHT = 28
CHANNELS = 1
TOTAL_INPUTS = IMAGE_WIDTH * IMAGE_HEIGHT * CHANNELS

## Get the Data

In [4]:
%%time

y_train_all = np.loadtxt(Y_TRAIN_PATH, delimiter=',', dtype=int)

Wall time: 441 ms


In [5]:
y_train_all.shape

(60000,)

In [6]:
y_test = np.loadtxt(Y_TEST_PATH, delimiter=',', dtype=int)

In [7]:
%%time

x_train_all = np.loadtxt(X_TRAIN_PATH, delimiter=',', dtype=int)

Wall time: 49.2 s


In [8]:
%%time

x_test = np.loadtxt(X_TEST_PATH, delimiter=',', dtype=int)

Wall time: 11.4 s


## Explore

In [9]:
x_train_all.shape

(60000, 784)

In [10]:
y_train_all.shape

(60000,)

In [11]:
x_test.shape

(10000, 784)

In [12]:
y_train_all[:5]

array([5, 0, 4, 1, 9])

## Data Preprocessing

In [13]:
# Re-scale 

x_train_all, x_test = x_train_all / 255.0, x_test / 255.0

### Convert these values to One Hot Encoding

In [14]:
values = y_train_all[:5]
np.eye(10)[values]

array([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])

In [15]:
np.eye(10)

array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])

In [16]:
np.eye(10)[2] #extract the third row

array([0., 0., 1., 0., 0., 0., 0., 0., 0., 0.])

In [17]:
values

array([5, 0, 4, 1, 9])

In [18]:
values[4]

9

In [19]:
y_train_all = np.eye(NR_CLASSES)[y_train_all]

In [20]:
y_train_all.shape

(60000, 10)

In [21]:
y_test = np.eye(NR_CLASSES)[y_test]
y_test.shape

(10000, 10)

### Create a validation dataset from training data

**Challenge:** Split the training dataset into a smaller traiing dataset and a validation dataset for the features and the labels. Create four arrays: x_val, y_val, x_train and y_train from x_train_all and y_train_all. Use the validation size of 10000

In [22]:
x_val = x_train_all[:VALIDATION_SIZE]
y_val = y_train_all[:VALIDATION_SIZE]

In [23]:
x_train = x_train_all[VALIDATION_SIZE:]
y_train = y_train_all[VALIDATION_SIZE:]

In [24]:
x_train.shape

(50000, 784)

In [25]:
x_val.shape

(10000, 784)

In [26]:
y_train.shape

(50000, 10)

In [27]:
y_val.shape

(10000, 10)

## Setup Tensor Graph

In [28]:
X = tf.placeholder(tf.float32, shape=[None, TOTAL_INPUTS])
Y = tf.placeholder(tf.float32, shape=[None, NR_CLASSES])

### Neural Network Architecture

##### Hyperparameters

In [29]:
nr_epochs = 5
learning_rate = 1e-4

n_hidden1 = 512
n_hidden2 = 64

In [30]:
initial_w1 = tf.truncated_normal(shape=[TOTAL_INPUTS, n_hidden1], stddev=0.1, seed=42)
w1 = tf.Variable(initial_value=initial_w1)

In [31]:
initial_b1 = tf.constant(value=0.0, shape=[n_hidden1])
b1 = tf.Variable(initial_value=initial_b1)

In [32]:
layer1_in = tf.matmul(X, w1) + b1

In [33]:
layer1_out = tf.nn.relu(layer1_in)

**Challenge:** Setup the second hidden layer. This layer has 64 neurons and needs to work off the out of the first hidden layer (see above). Then setup the output layer. Remember, the output layer will use the softmax function.

In [34]:
initial_w2 = tf.truncated_normal(shape=[n_hidden1, n_hidden2], stddev=0.1, seed=42)
w2 = tf.Variable(initial_value=initial_w2)

initial_b2 = tf.constant(value=0.0, shape=[n_hidden2])
b2 = tf.Variable(initial_value=initial_b2)

layer2_in = tf.matmul(layer1_out, w2) + b2
layer2_out = tf.nn.relu(layer2_in)

In [35]:
initial_w3 = tf.truncated_normal(shape=[n_hidden2, NR_CLASSES], stddev=0.1, seed=42)
w3 = tf.Variable(initial_value=initial_w3)

initial_b3 = tf.constant(value=0.0, shape=[NR_CLASSES])
b3 = tf.Variable(initial_value=initial_b3)

layer3_in = tf.matmul(layer2_out, w3) + b3
output = tf.nn.softmax(layer3_in)

## Tensorboard Setup

In [36]:
# Folder for tensorboard

folder_name = f'Model 1 at {strftime("%H:%M")}'
directory = os.path.join(LOGGING_PATH, folder_name)

try:
    os.makedirs(directory)
except OSError as exception:
    print(exception.strerror)
else:
    print('Successfully created directories!')

The directory name is invalid


## Loss, Optimisation, and Metrics

In [37]:
loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits_v2(labels=Y, logits=output))

In [38]:
optimizer = tf.train.AdamOptimizer(learning_rate)
train_step = optimizer.minimize(loss)

### Accuracy Metric

In [39]:
correct_pred = tf.equal(tf.argmax(output, axis=1), tf.argmax(Y, axis=1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [40]:
tf.summary.scalar('accuracy', accuracy)
tf.summary.scalar('cost', loss)

<tf.Tensor 'cost:0' shape=() dtype=string>

### Run Session

In [41]:
sess = tf.Session()

##### Setup Filewriter and Merge Summaries

In [42]:
merged_summary = tf.summary.merge_all()

train_writer = tf.summary.FileWriter(directory + '/train')

train_writer.add_graph(sess.graph)

validation_writer = tf.summary.FileWriter(directory + '/validation')

InvalidArgumentError: Failed to create a directory: /tensorboard_mnist_digit_logs/Model 1 at 12:32; Invalid argument

In [None]:
# Initialize all the variables
init = tf.global_variables_initializer()
sess.run(init)

In [None]:
b3.eval(sess)

In [None]:
size_of_batch = 1000
num_examples = y_train.shape[0]
nr_iterations = int(num_examples / size_of_batch)

index_in_epoch = 0

In [None]:
def next_batch(batch_size, data, labels):
    
    global num_examples
    global index_in_epoch
    
    start = index_in_epoch
    index_in_epoch += batch_size
    
    if index_in_epoch > num_examples:
        start=0
        index_in_epoch = batch_size
    
    end = index_in_epoch
    
    return data[start:end], labels[start:end]

### Training Loop

In [None]:
for epoch in range(nr_epochs):
    
    # ========== Training Dataset ===========
    for i in range(nr_iterations):
        
        batch_x, batch_y = next_batch(batch_size=size_of_batch, 
                                      data=x_train, labels=y_train)
        
        feed_dictionary = {X:batch_x, Y:batch_y}
        
        sess.run(train_step, feed_dict=feed_dictionary)
        
    s, batch_accuracy = sess.run(fetches=[merged_summary, accuracy], 
                                  feed_dict=feed_dictionary)
    
    train_writer.add_summary(s, epoch)
    
    print(f'Epoch {epoch} \t| Training Accuracy = {batch_accuracy}')
    
    # =================Validation =====================
    summary = sess.run(fetches=merged_summary, feed_dict={X:x_val, Y: y_val})
    validation_writer.add_summary(summary, epoch)
print('Done training!')

### Reset for the Next Run

In [None]:
train_writer.close()
sess,close()
tf.reset_default_graph()