## Part II Run experiments using tf.keras model subclassing and eager

### Set up

In [0]:
!pip install -U tensorboardcolab

In [0]:
from tensorboardcolab import *

In [0]:
tbc=TensorBoardColab()

In [0]:
import tensorflow as tf
tf.enable_eager_execution()
tfe = tf.contrib.eager
import numpy as np

In [0]:
mnist = tf.keras.datasets.mnist

# Dataset will be cached locally after you run this code
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalize pixel values to [0, 1]
x_train, x_test = x_train / 255.0, x_test / 255.0

# These types are required for the operation we use to compute
# loss. Omit, and you shall receive a cryptic error message.
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)

In [0]:
buffer_size = 5000
batch_size = 100

train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(buffer_size)
train_dataset = train_dataset.batch(batch_size)

In [0]:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)

In [0]:
def loss(logits, labels):
  # FIX ME
  # You will need to modify this function, of course.
  # Best bet, use tf.nn.sparse_softmax_cross_entropy_with_logits
  # though if you're interested, you can write your own.
  return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels = labels, logits = logits))

In [0]:
def compute_accuracy(logits, labels):
  # You shoud not need to modify this function
  predictions = tf.argmax(logits, axis=1)
  batch_size = int(logits.shape[0])
  return tf.reduce_sum(
      tf.cast(tf.equal(predictions, labels), dtype=tf.float32)) / batch_size

In [0]:
def train(model, images, labels):
  # You should not need to modify this function
  with tf.GradientTape() as tape:
    logits = model(images)
    loss_value = loss(logits, labels)  
  grads = tape.gradient(loss_value, model.variables)
  optimizer.apply_gradients(zip(grads, model.variables))
  return loss_value

### 3. Complete the linear model sketched in your starter code. Your finished model should be >80% accurate.

In [0]:
class Model(tf.keras.Model): # Linear Model
  def __init__(self):
    super(Model, self).__init__()
    self.flatten = tf.keras.layers.Flatten()
    # FIX ME
    # add some layers to your model
    self.dense = tf.keras.layers.Dense(10)

  def call(self, x):
    x = self.flatten(x)
    # FIX ME
    # use your layers (don't forget to add activation functions here as well
    # if you haven't specified them in your layer definintions)
    x = self.dense(x)
    return x # be sure to return logits, not softmax output

In [0]:
# The first time you run the below block it will crash
# with an error 'ValueError: No variables provided.''
# This is because the call method of your model
# is not using any trainable variables.
# (As written, the model just flattens the images.)

model = Model()

epochs = 10
step_counter = 0
my_loss = []

for epoch_n in range(epochs):
    print('Epoch #%d' % (epoch_n))
    for (batch, (images, labels)) in enumerate(train_dataset):
        loss_value = train(model, images, labels)
        my_loss.append(loss_value)
        step_counter +=1
  
        if step_counter % 100 == 0:
          print('Step #%d\tLoss: %.4f' % (step_counter, loss_value))

    test_accuracy = compute_accuracy(model(x_test), y_test)
    print('Accuracy #%.2f\n' % (test_accuracy))

### 4. Visualizating Loss for Linear Model with TensorBoard

In [0]:
writer = tf.contrib.summary.create_file_writer(logdir='./Graph', flush_millis=1000)
with writer.as_default():
    with tf.contrib.summary.always_record_summaries():
        for i in range(len(my_loss)):
            tf.contrib.summary.scalar("loss_linear_model", my_loss[i], step=i)
writer.close()

In [0]:
from google.colab import files
from IPython.display import Image

In [0]:
from io import BytesIO
uploaded = files.upload()

In [0]:
Image('loss_Linear_model.png',width=1200, height=500)

### 5. Deep Model with Subclassing and Loss Visualization with Tensorboard


In [0]:
class Deep_Model(tf.keras.Model): # Deep Model
  def __init__(self):
    super(Deep_Model, self).__init__()
    self.flatten = tf.keras.layers.Flatten()
    # FIX ME
    # add some layers to your model
    self.dense1 = tf.keras.layers.Dense(512)
    self.drop1 = tf.keras.layers.Dropout(rate = 0.5)
    self.dense2 = tf.keras.layers.Dense(512)
    self.drop2 = tf.keras.layers.Dropout(rate = 0.5)
    self.dense3 = tf.keras.layers.Dense(10)
    
  def call(self, x):
    x = self.flatten(x)
    # FIX ME
    # use your layers (don't forget to add activation functions here as well
    # if you haven't specified them in your layer definintions)
    x = self.dense1(x)
    x = tf.nn.relu(x)
    x = self.drop1(x)
    x = self.dense2(x)
    x = tf.nn.relu(x)
    x = self.drop2(x)
    x = self.dense3(x)
    return x # be sure to return logits, not softmax output

In [0]:
deep_model = Deep_Model()

epochs = 10
step_counter = 0
my_loss_deep = []

for epoch_n in range(epochs):
    print('Epoch #%d' % (epoch_n))
    for (batch, (images, labels)) in enumerate(train_dataset):
         loss_value = train(deep_model, images, labels)
         my_loss_deep.append(loss_value)
         step_counter +=1
  
         if step_counter % 100 == 0:
            print('Step #%d\tLoss: %.4f' % (step_counter, loss_value))

    test_accuracy = compute_accuracy(deep_model(x_test), y_test)
    print('Accuracy #%.2f\n' % (test_accuracy))
   

#### Visualizing loss for Deep Model

In [0]:
writer = tf.contrib.summary.create_file_writer(logdir='./Graph', flush_millis=1000)
with writer.as_default():
    with tf.contrib.summary.always_record_summaries():
        for i in range(len(my_loss_deep)):
            tf.contrib.summary.scalar("loss_deep_model", my_loss_deep[i], step=i)
writer.close()

In [0]:
from google.colab import files
from IPython.display import Image

In [0]:
from io import BytesIO
uploaded = files.upload()

In [0]:
Image('loss_Deep_model.png',width=1200, height=500)

### 6. Design and run experients to compare

#### a. High, low, and reasonable learning rate

**High Learning Rate 0.5**

In [0]:
optimizer_high = tf.train.GradientDescentOptimizer(learning_rate=0.5)

In [0]:
def train_high(model, images, labels):
  # You should not need to modify this function
  with tf.GradientTape() as tape:
    logits = model(images)
    loss_value = loss(logits, labels)  
  grads = tape.gradient(loss_value, model.variables)
  optimizer_high.apply_gradients(zip(grads, model.variables))
  return loss_value

In [0]:
deep_model = Deep_Model()

epochs = 10
step_counter = 0
my_loss_deep = []

for epoch_n in range(epochs):
    print('Epoch #%d' % (epoch_n))
    for (batch, (images, labels)) in enumerate(train_dataset):
         loss_value = train_high(deep_model, images, labels)
         my_loss_deep.append(loss_value)
         step_counter +=1
  
         if step_counter % 100 == 0:
            print('Step #%d\tLoss: %.4f' % (step_counter, loss_value))

    test_accuracy = compute_accuracy(deep_model(x_test), y_test)
    print('Accuracy #%.2f\n' % (test_accuracy))

**Visualizing Loss for Deep Model with High Learning Rate**

In [0]:
writer = tf.contrib.summary.create_file_writer(logdir='./Graph', flush_millis=1000)
with writer.as_default():
    with tf.contrib.summary.always_record_summaries():
        for i in range(len(my_loss_deep)):
            tf.contrib.summary.scalar("loss_deep_model_with_high_rate", my_loss_deep[i], step=i)
writer.close()

In [0]:
uploaded = files.upload()

In [0]:
Image('loss_deep_model_with_high_rate.png',width=1200, height=500)

**Low Learning Rate 0.01**

In [0]:
zhoptimizer_low = tf.train.GradientDescentOptimizer(learning_rate=0.01)

In [0]:
def train_low(model, images, labels):
  # You should not need to modify this function
  with tf.GradientTape() as tape:
    logits = model(images)
    loss_value = loss(logits, labels)  
  grads = tape.gradient(loss_value, model.variables)
  optimizer_low.apply_gradients(zip(grads, model.variables))
  return loss_value

In [0]:
deep_model = Deep_Model()

epochs = 10
step_counter = 0
my_loss_deep = []

for epoch_n in range(epochs):
    print('Epoch #%d' % (epoch_n))
    for (batch, (images, labels)) in enumerate(train_dataset):
         loss_value = train_low(deep_model, images, labels)
         my_loss_deep.append(loss_value)
         step_counter +=1
  
         if step_counter % 100 == 0:
            print('Step #%d\tLoss: %.4f' % (step_counter, loss_value))

    test_accuracy = compute_accuracy(deep_model(x_test), y_test)
    print('Accuracy #%.2f\n' % (test_accuracy))

**Visualizing Loss for Deep Model with Low Learning Rate**

In [0]:
writer = tf.contrib.summary.create_file_writer(logdir='./Graph', flush_millis=1000)
with writer.as_default():
    with tf.contrib.summary.always_record_summaries():
        for i in range(len(my_loss_deep)):
            tf.contrib.summary.scalar("loss_deep_model_with_low_rate", my_loss_deep[i], step=i)
writer.close()

In [0]:
from google.colab import files
from IPython.display import Image
from io import BytesIO
uploaded = files.upload()

In [0]:
Image('loss_deep_model_with_low_rate.png',width=1200, height=500)

**Reasonable Learning Rate 0.2**

In [0]:
optimizer_reasonable = tf.train.GradientDescentOptimizer(learning_rate=0.2)

In [0]:
def train_reasonable(model, images, labels):
  # You should not need to modify this function
  with tf.GradientTape() as tape:
    logits = model(images)
    loss_value = loss(logits, labels)  
  grads = tape.gradient(loss_value, model.variables)
  optimizer_reasonable.apply_gradients(zip(grads, model.variables))
  return loss_value

In [0]:
deep_model = Deep_Model()

epochs = 10
step_counter = 0
my_loss_deep = []

for epoch_n in range(epochs):
    print('Epoch #%d' % (epoch_n))
    for (batch, (images, labels)) in enumerate(train_dataset):
         loss_value = train_reasonable(deep_model, images, labels)
         my_loss_deep.append(loss_value)
         step_counter +=1
  
         if step_counter % 100 == 0:
            print('Step #%d\tLoss: %.4f' % (step_counter, loss_value))

    test_accuracy = compute_accuracy(deep_model(x_test), y_test)
    print('Accuracy #%.2f\n' % (test_accuracy))

**Visualizing Loss for Deep Model with Reasonable Learning Rate**

In [0]:
writer = tf.contrib.summary.create_file_writer(logdir='./Graph', flush_millis=1000)
with writer.as_default():
    with tf.contrib.summary.always_record_summaries():
        for i in range(len(my_loss_deep)):
            tf.contrib.summary.scalar("loss_deep_model_with_reasonable_rate", my_loss_deep[i], step=i)
writer.close()

In [0]:
from google.colab import files
from IPython.display import Image
from io import BytesIO
uploaded = files.upload()

In [0]:
Image('loss_deep_model_with_reasonable_rate.png',width=1200, height=500)

#### b. Different activation functions

**Sigmoid Activation Function**


In [0]:
class Deep_Model_sigmoid(tf.keras.Model): # Deep Model
  def __init__(self):
    super(Deep_Model_sigmoid, self).__init__()
    self.flatten = tf.keras.layers.Flatten()
    # FIX ME
    # add some layers to your model
    self.dense1 = tf.keras.layers.Dense(512)
    self.drop1 = tf.keras.layers.Dropout(rate = 0.5)
    self.dense2 = tf.keras.layers.Dense(512)
    self.drop2 = tf.keras.layers.Dropout(rate = 0.5)
    self.dense3 = tf.keras.layers.Dense(10)
    
  def call(self, x):
    x = self.flatten(x)
    # FIX ME
    # use your layers (don't forget to add activation functions here as well
    # if you haven't specified them in your layer definintions)
    x = self.dense1(x)
    x = tf.nn.sigmoid(x)
    x = self.drop1(x)
    x = self.dense2(x)
    x = tf.nn.sigmoid(x)
    x = self.drop2(x)
    x = self.dense3(x)
    return x # be sure to return logits, not softmax output

In [0]:
deep_model_sigmoid = Deep_Model_sigmoid()

epochs = 10
step_counter = 0
my_loss_deep = []

for epoch_n in range(epochs):
    print('Epoch #%d' % (epoch_n))
    for (batch, (images, labels)) in enumerate(train_dataset):
         loss_value = train(deep_model_sigmoid, images, labels)
         my_loss_deep.append(loss_value)
         step_counter +=1
  
         if step_counter % 100 == 0:
            print('Step #%d\tLoss: %.4f' % (step_counter, loss_value))

    test_accuracy = compute_accuracy(deep_model_sigmoid(x_test), y_test)
    print('Accuracy #%.2f\n' % (test_accuracy))
   

**Visualize Loss for Deep Model with Sigmoid Activation Function**

In [0]:
writer = tf.contrib.summary.create_file_writer(logdir='./Graph', flush_millis=1000)
with writer.as_default():
    with tf.contrib.summary.always_record_summaries():
        for i in range(len(my_loss_deep)):
            tf.contrib.summary.scalar("loss_deep_model_with_sigmoid", my_loss_deep[i], step=i)
writer.close()

In [0]:
from google.colab import files
from IPython.display import Image
from io import BytesIO
uploaded = files.upload()

In [0]:
Image('loss_deep_model_with_sigmoid.png',width=1200, height=500)

**Tanh Activation Function**

In [0]:
class Deep_Model_tanh(tf.keras.Model): # Deep Model
  def __init__(self):
    super(Deep_Model_tanh, self).__init__()
    self.flatten = tf.keras.layers.Flatten()
    # FIX ME
    # add some layers to your model
    self.dense1 = tf.keras.layers.Dense(512)
    self.drop1 = tf.keras.layers.Dropout(rate = 0.5)
    self.dense2 = tf.keras.layers.Dense(512)
    self.drop2 = tf.keras.layers.Dropout(rate = 0.5)
    self.dense3 = tf.keras.layers.Dense(10)
    
  def call(self, x):
    x = self.flatten(x)
    # FIX ME
    # use your layers (don't forget to add activation functions here as well
    # if you haven't specified them in your layer definintions)
    x = self.dense1(x)
    x = tf.nn.sigmoid(x)
    x = self.drop1(x)
    x = self.dense2(x)
    x = tf.nn.sigmoid(x)
    x = self.drop2(x)
    x = self.dense3(x)
    return x # be sure to return logits, not softmax output

In [0]:
deep_model_tanh = Deep_Model_tanh()

epochs = 10
step_counter = 0
my_loss_deep = []

for epoch_n in range(epochs):
    print('Epoch #%d' % (epoch_n))
    for (batch, (images, labels)) in enumerate(train_dataset):
         loss_value = train(deep_model_tanh, images, labels)
         my_loss_deep.append(loss_value)
         step_counter +=1
  
         if step_counter % 100 == 0:
            print('Step #%d\tLoss: %.4f' % (step_counter, loss_value))

    test_accuracy = compute_accuracy(deep_model_tanh(x_test), y_test)
    print('Accuracy #%.2f\n' % (test_accuracy))
   

****Visualize Loss for Deep Model with Tanh Activation Function****

In [0]:
writer = tf.contrib.summary.create_file_writer(logdir='./Graph', flush_millis=1000)
with writer.as_default():
    with tf.contrib.summary.always_record_summaries():
        for i in range(len(my_loss_deep)):
            tf.contrib.summary.scalar("loss_deep_model_with_tanh", my_loss_deep[i], step=i)
writer.close()

In [0]:
from google.colab import files
from IPython.display import Image
from io import BytesIO
uploaded = files.upload()

In [0]:
Image('loss_deep_model_with_tanh.png',width=1200, height=500)

#### c. Different gradient descent optimizers

**AdamOptimizer**

In [0]:
Adamoptimizer = tf.train.AdamOptimizer(learning_rate=0.1)

In [0]:
def train(model, images, labels):
  # You should not need to modify this function
  with tf.GradientTape() as tape:
    logits = model(images)
    loss_value = loss(logits, labels)  
  grads = tape.gradient(loss_value, model.variables)
  Adamoptimizer.apply_gradients(zip(grads, model.variables))
  return loss_value

In [0]:
class Deep_Model(tf.keras.Model): # Deep Model
  def __init__(self):
    super(Deep_Model, self).__init__()
    self.flatten = tf.keras.layers.Flatten()
    # FIX ME
    # add some layers to your model
    self.dense1 = tf.keras.layers.Dense(512)
    self.drop1 = tf.keras.layers.Dropout(rate = 0.5)
    self.dense2 = tf.keras.layers.Dense(512)
    self.drop2 = tf.keras.layers.Dropout(rate = 0.5)
    self.dense3 = tf.keras.layers.Dense(10)
    
  def call(self, x):
    x = self.flatten(x)
    # FIX ME
    # use your layers (don't forget to add activation functions here as well
    # if you haven't specified them in your layer definintions)
    x = self.dense1(x)
    x = tf.nn.relu(x)
    x = self.drop1(x)
    x = self.dense2(x)
    x = tf.nn.relu(x)
    x = self.drop2(x)
    x = self.dense3(x)
    return x # be sure to return logits, not softmax output

In [0]:
deep_model = Deep_Model()

epochs = 10
step_counter = 0
my_loss_deep = []

for epoch_n in range(epochs):
    print('Epoch #%d' % (epoch_n))
    for (batch, (images, labels)) in enumerate(train_dataset):
         loss_value = train(deep_model, images, labels)
         my_loss_deep.append(loss_value)
         step_counter +=1
  
         if step_counter % 100 == 0:
            print('Step #%d\tLoss: %.4f' % (step_counter, loss_value))

    test_accuracy = compute_accuracy(deep_model(x_test), y_test)
    print('Accuracy #%.2f\n' % (test_accuracy))

**Visualizing Loss for Deep Model with AdamOptimizer**

In [0]:
writer = tf.contrib.summary.create_file_writer(logdir='./Graph', flush_millis=1000)
with writer.as_default():
    with tf.contrib.summary.always_record_summaries():
        for i in range(len(my_loss_deep)):
            tf.contrib.summary.scalar("loss_deep_model_with_AdamOptimizer", my_loss_deep[i], step=i)
writer.close()

In [0]:
from google.colab import files
from IPython.display import Image
from io import BytesIO
uploaded = files.upload()

In [0]:
Image('loss_deep_model_with_adamoptimizer.png',width=1200, height=500)

**AdagradOptimizer**

In [0]:
AdagradOptimizer = tf.train.AdagradOptimizer(learning_rate=0.1)

In [0]:
def train(model, images, labels):
  # You should not need to modify this function
  with tf.GradientTape() as tape:
    logits = model(images)
    loss_value = loss(logits, labels)  
  grads = tape.gradient(loss_value, model.variables)
  AdagradOptimizer.apply_gradients(zip(grads, model.variables))
  return loss_value

In [0]:
class Deep_Model(tf.keras.Model): # Deep Model
  def __init__(self):
    super(Deep_Model, self).__init__()
    self.flatten = tf.keras.layers.Flatten()
    # FIX ME
    # add some layers to your model
    self.dense1 = tf.keras.layers.Dense(512)
    self.drop1 = tf.keras.layers.Dropout(rate = 0.5)
    self.dense2 = tf.keras.layers.Dense(512)
    self.drop2 = tf.keras.layers.Dropout(rate = 0.5)
    self.dense3 = tf.keras.layers.Dense(10)
    
  def call(self, x):
    x = self.flatten(x)
    # FIX ME
    # use your layers (don't forget to add activation functions here as well
    # if you haven't specified them in your layer definintions)
    x = self.dense1(x)
    x = tf.nn.relu(x)
    x = self.drop1(x)
    x = self.dense2(x)
    x = tf.nn.relu(x)
    x = self.drop2(x)
    x = self.dense3(x)
    return x # be sure to return logits, not softmax output

In [0]:
deep_model = Deep_Model()

epochs = 10
step_counter = 0
my_loss_deep = []

for epoch_n in range(epochs):
    print('Epoch #%d' % (epoch_n))
    for (batch, (images, labels)) in enumerate(train_dataset):
         loss_value = train(deep_model, images, labels)
         my_loss_deep.append(loss_value)
         step_counter +=1
  
         if step_counter % 100 == 0:
            print('Step #%d\tLoss: %.4f' % (step_counter, loss_value))

    test_accuracy = compute_accuracy(deep_model(x_test), y_test)
    print('Accuracy #%.2f\n' % (test_accuracy))

**Visualizing Loss for Deep Model with MomentumOptimizer**

In [0]:
writer = tf.contrib.summary.create_file_writer(logdir='./Graph', flush_millis=1000)
with writer.as_default():
    with tf.contrib.summary.always_record_summaries():
        for i in range(len(my_loss_deep)):
            tf.contrib.summary.scalar("loss_deep_model_with_AdagradOptimizer", my_loss_deep[i], step=i)
writer.close()

In [0]:
from google.colab import files
from IPython.display import Image
from io import BytesIO
uploaded = files.upload()

In [0]:
Image('loss_deep_model_with_AdagradOptimizer.png',width=1200, height=500)

#### d. Different weight initialization strategies

**With Zeros Initializer**

In [0]:
class Deep_Model(tf.keras.Model): # Deep Model
  def __init__(self):
    super(Deep_Model, self).__init__()
    self.flatten = tf.keras.layers.Flatten()
    # FIX ME
    # add some layers to your model
    self.dense1 = tf.keras.layers.Dense(512, kernel_initializer = "Zeros")
    self.drop1 = tf.keras.layers.Dropout(rate = 0.5)
    self.dense2 = tf.keras.layers.Dense(512, kernel_initializer = "Zeros")
    self.drop2 = tf.keras.layers.Dropout(rate = 0.5)
    self.dense3 = tf.keras.layers.Dense(10)
    
  def call(self, x):
    x = self.flatten(x)
    # FIX ME
    # use your layers (don't forget to add activation functions here as well
    # if you haven't specified them in your layer definintions)
    x = self.dense1(x)
    x = tf.nn.relu(x)
    x = self.drop1(x)
    x = self.dense2(x)
    x = tf.nn.relu(x)
    x = self.drop2(x)
    x = self.dense3(x)
    return x # be sure to return logits, not softmax output

In [0]:
deep_model = Deep_Model()

epochs = 10
step_counter = 0
my_loss_deep = []

for epoch_n in range(epochs):
    print('Epoch #%d' % (epoch_n))
    for (batch, (images, labels)) in enumerate(train_dataset):
         loss_value = train(deep_model, images, labels)
         my_loss_deep.append(loss_value)
         step_counter +=1
  
         if step_counter % 100 == 0:
            print('Step #%d\tLoss: %.4f' % (step_counter, loss_value))

    test_accuracy = compute_accuracy(deep_model(x_test), y_test)
    print('Accuracy #%.2f\n' % (test_accuracy))

**Visualizing Loss for Deep Model with kernel.initialier.Zeros**

In [0]:
writer = tf.contrib.summary.create_file_writer(logdir='./Graph', flush_millis=1000)
with writer.as_default():
    with tf.contrib.summary.always_record_summaries():
        for i in range(len(my_loss_deep)):
            tf.contrib.summary.scalar("loss_deep_model_with_kernel.initialier.Zeros", my_loss_deep[i], step=i)
writer.close()

In [0]:
from google.colab import files
from IPython.display import Image
from io import BytesIO
uploaded = files.upload()

In [0]:
Image('loss_deep_model_with_kernel.initializer.Zeros.png',width=1200, height=500)

**RandomNormal Initializer**

In [0]:
class Deep_Model(tf.keras.Model): # Deep Model
  def __init__(self):
    super(Deep_Model, self).__init__()
    self.flatten = tf.keras.layers.Flatten()
    # FIX ME
    # add some layers to your model
    self.dense1 = tf.keras.layers.Dense(512, kernel_initializer = "RandomNormal")
    self.drop1 = tf.keras.layers.Dropout(rate = 0.5)
    self.dense2 = tf.keras.layers.Dense(512, kernel_initializer = "RandomNormal")
    self.drop2 = tf.keras.layers.Dropout(rate = 0.5)
    self.dense3 = tf.keras.layers.Dense(10)
    
  def call(self, x):
    x = self.flatten(x)
    # FIX ME
    # use your layers (don't forget to add activation functions here as well
    # if you haven't specified them in your layer definintions)
    x = self.dense1(x)
    x = tf.nn.relu(x)
    x = self.drop1(x)
    x = self.dense2(x)
    x = tf.nn.relu(x)
    x = self.drop2(x)
    x = self.dense3(x)
    return x # be sure to return logits, not softmax output

In [0]:
deep_model = Deep_Model()

epochs = 10
step_counter = 0
my_loss_deep = []

for epoch_n in range(epochs):
    print('Epoch #%d' % (epoch_n))
    for (batch, (images, labels)) in enumerate(train_dataset):
         loss_value = train(deep_model, images, labels)
         my_loss_deep.append(loss_value)
         step_counter +=1
  
         if step_counter % 100 == 0:
            print('Step #%d\tLoss: %.4f' % (step_counter, loss_value))

    test_accuracy = compute_accuracy(deep_model(x_test), y_test)
    print('Accuracy #%.2f\n' % (test_accuracy))

**Visualizing Loss for Deep Model with kernel.initialier.RandomNormal**

In [0]:
writer = tf.contrib.summary.create_file_writer(logdir='./Graph', flush_millis=1000)
with writer.as_default():
    with tf.contrib.summary.always_record_summaries():
        for i in range(len(my_loss_deep)):
            tf.contrib.summary.scalar("loss_deep_model_with_kernel.initialier.RandomNormal", my_loss_deep[i], step=i)
writer.close()

In [0]:
from google.colab import files
from IPython.display import Image
from io import BytesIO
uploaded = files.upload()

In [0]:
Image('loss_deep_model_with_kernel.initializer.RandomNormal.png',width=1200, height=500)

## Write Up



*   With a high learning rate 0.5, accuracy has been very high accuracy 0.98. For low learning rate 0.01, the accuracy can also reach 0.98. However, with a reasonable learning rate, the accuracy is only about 0.92. It does not imply that the higher or the lower the learning rate, the better the accuracy.

*   The accuracy for using Sigmoind and Tanh activation functions are about the same, which is around 0.93. No obvious difference for the activation functions I chose for this case. 

*  The accuracy for choosing AdamOptimizer is only 0.38, which is very low and should not be adapted in this model, while the AdaradOptimizer has a high accuracy of 0.98.

*   By initializing weights to be Zeros, the accuracy is super low (0.11), while using RandomNormal weights initialization, the accuracy can reach 0.98. In this case, we would definitely choose RandomNormal weights initialization strategy. 

