**Deep Learning Notebook 3**

Dataset: MNIST digits dataset available as part of Keras

Objectives: Develop a FC NN model using tensorflow

In [2]:
%tensorflow_version 1.x

TensorFlow 1.x selected.


In [3]:
# Import the required libraries, modules

import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.python.framework import ops
from sklearn.model_selection import train_test_split
import math
from tensorflow.examples.tutorials.mnist import input_data
import matplotlib.pyplot as plt

In [4]:
# Check the version details

print("TF Version: ", tf.__version__)

TF Version:  1.15.2


In [5]:
# Load a subset of records from the MNIST data set

mnist = keras.datasets.mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [6]:
# Normalize X by dividing by 255

X_train = X_train.T / 255.0
X_test = X_test.T / 255.0

In [7]:
# Convert y to one-hot matrix

y_train_temp = tf.one_hot(y_train, 6, axis = 0)
y_test_temp = tf.one_hot(y_test, 6, axis = 0)

sess = tf.Session()
y_train = sess.run(y_train_temp)
y_test = sess.run(y_test_temp)
sess.close()

In [8]:
# Flatten X

X_train = X_train.reshape(X_train.T.shape[0], -1).T
X_test = X_test.reshape(X_test.T.shape[0], -1).T

In [9]:
# Verify shapes of X and y

print ("Number of training examples = " + str(X_train.shape[1]))
print ("Number of test examples = " + str(X_test.shape[1]))
print ("X_train shape: " + str(X_train.shape))
print ("y_train shape: " + str(y_train.shape))
print ("X_test shape: " + str(X_test.shape))
print ("y_test shape: " + str(y_test.shape))

Number of training examples = 60000
Number of test examples = 10000
X_train shape: (784, 60000)
y_train shape: (6, 60000)
X_test shape: (784, 10000)
y_test shape: (6, 10000)


In [10]:
# Create placeholder tensors for the session

def create_placeholders(n_x, n_y):

  X = tf.placeholder(tf.float32, [n_x, None], name = "X")
  y = tf.placeholder(tf.float32, [n_y, None], name = "y")

  return X, y

In [11]:
# Initialize weights of the NN

def initialize_parameters():

  # Input layer    : 784 input nodes
  # Hidden layer 1 : 200 neurons + Xavier initialization
  # Hidden layer 2 : 100 neurons + Xavier initialization
  # Output layer   : 6 output nodes

  W1 = tf.get_variable("W1", [200,784], initializer = tf.contrib.layers.xavier_initializer())
  b1 = tf.get_variable("b1", [200,1], initializer = tf.zeros_initializer())
  W2 = tf.get_variable("W2", [100,200], initializer = tf.contrib.layers.xavier_initializer())
  b2 = tf.get_variable("b2", [100,1], initializer = tf.zeros_initializer())
  W3 = tf.get_variable("W3", [6,100], initializer = tf.contrib.layers.xavier_initializer())
  b3 = tf.get_variable("b3", [6,1], initializer = tf.zeros_initializer())

  parameters = {"W1": W1, "b1": b1, "W2": W2, "b2": b2, "W3": W3, "b3": b3}

  return parameters

In [12]:
# Forward propagation

def forward_propagation(X, parameters):

  W1 = parameters['W1']
  b1 = parameters['b1']
  W2 = parameters['W2']
  b2 = parameters['b2']
  W3 = parameters['W3']
  b3 = parameters['b3']

  Z1 = tf.add(tf.matmul(W1, X), b1) 
  A1 = tf.nn.relu(Z1)
  tf.nn.dropout(A1, 0.8)
  Z2 = tf.add(tf.matmul(W2, A1), b2) 
  A2 = tf.nn.relu(Z2)
  tf.nn.dropout(A2, 0.8)
  Z3 = tf.add(tf.matmul(W3, A2), b3)

  return Z3

In [13]:
# Define cost function

def compute_cost(Z3, y):

  cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = tf.transpose(Z3), labels = tf.transpose(y)))
  return cost

In [14]:
# Util for extracting mini batches

def random_mini_batches(X, Y, mini_batch_size = 64):
    """
    Creates a list of random minibatches from (X, Y)
    
    Arguments:
    X -- input data, of shape (input size, number of examples)
    Y -- true "label" vector (containing 0 if cat, 1 if non-cat), of shape (1, number of examples)
    mini_batch_size - size of the mini-batches, integer
    
    Returns:
    mini_batches -- list of synchronous (mini_batch_X, mini_batch_Y)
    """
    
    m = X.shape[1]                  # number of training examples
    mini_batches = []
    
    # Step 1: Shuffle (X, Y)
    permutation = list(np.random.permutation(m))
    shuffled_X = X[:, permutation]
    shuffled_Y = Y[:, permutation].reshape((Y.shape[0],m))

    # Step 2: Partition (shuffled_X, shuffled_Y). Minus the end case.
    num_complete_minibatches = math.floor(m/mini_batch_size) # number of mini batches of size mini_batch_size in your partitionning
    for k in range(0, num_complete_minibatches):
        mini_batch_X = shuffled_X[:, k * mini_batch_size : k * mini_batch_size + mini_batch_size]
        mini_batch_Y = shuffled_Y[:, k * mini_batch_size : k * mini_batch_size + mini_batch_size]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)
    
    # Handling the end case (last mini-batch < mini_batch_size)
    if m % mini_batch_size != 0:
        mini_batch_X = shuffled_X[:, num_complete_minibatches * mini_batch_size : m]
        mini_batch_Y = shuffled_Y[:, num_complete_minibatches * mini_batch_size : m]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)
    
    return mini_batches

In [15]:
# Implement model

def model(X_train, y_train, X_test, y_test, learning_rate = 0.00001, num_epochs = 1500, minibatch_size = 2048, print_cost = True):

  # Rerun model without overriding tf variables
  ops.reset_default_graph()

  # Extract the shape of X & y
  (n_x, m) = X_train.shape
  n_y = y_train.shape[0]

  # Create placeholders
  X, y = create_placeholders(n_x, n_y)

  # Initialize parameters
  parameters = initialize_parameters()

  # Initialize cost list
  costs = []

  # Run forward propagation
  Z3 = forward_propagation(X, parameters)

  # Calculate cost
  cost = compute_cost(Z3, y)

  # Specify optimizer
  optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost)

  # Initialize variables
  init = tf.global_variables_initializer()

  # Initialize & invoke TF session
  with tf.Session() as sess:
    sess.run(init)

    # For loop to iterate through the epochs
    for epoch in range(num_epochs):
      
      # Initialize epoch cost
      epoch_cost = 0.0

      # Calculate No. of minibatches
      num_minibatches = int( m / minibatch_size ) 
  
      # Evaluate minibatches
      minibatches = random_mini_batches(X_train, y_train, minibatch_size)

      # For loop to iterate through every minibatch
      for minibatch in minibatches:

        # Read the minibatch
        (minibatch_X, minibatch_Y) = minibatch

        # Evaluate the minibatch cost
        _ , minibatch_cost = sess.run([optimizer, cost], feed_dict={X: minibatch_X, y: minibatch_Y})

        # Accumulate average minibatch cost to epoch cost
        epoch_cost += minibatch_cost / minibatch_size

      # For every epoch, print the total cost
      if print_cost == True and epoch % 100 == 0:
        print ("Cost after epoch %i: %f" % (epoch, epoch_cost))

      if print_cost == True and epoch % 5 == 0:
        costs.append(epoch_cost)

    # Plot the learning curve as a function of No. of epochs
    plt.plot(np.squeeze(costs))
    plt.ylabel('cost')
    plt.xlabel('iterations (per fives)')
    plt.title("Learning rate =" + str(learning_rate))
    plt.show()

    # Save the final parameters
    parameters = sess.run(parameters)
    print ("Parameters have been trained!")

    # Evaluate model
    correct_prediction = tf.equal(tf.argmax(Z3), tf.argmax(y))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

    # Print train and test accuracies
    print ("Train Accuracy:", accuracy.eval({X: X_train, y: y_train}))
    print ("Test Accuracy:", accuracy.eval({X: X_test, y: y_test}))

  return parameters

In [16]:
# Run the model for the train and test data sets

parameters = model(X_train, y_train, X_test, y_test)

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

Cost after epoch 0: 0.015954
Cost after epoch 100: 0.016176


KeyboardInterrupt: ignored

In [17]:
X_train.shape

(784, 60000)