In [None]:
import os
import numpy as np
import time
import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib
import math

In [None]:
tf.executing_eagerly()
tf.config.list_physical_devices('GPU')

[]

In [None]:
seed=2222
np.random.seed(seed)
tf.random.set_seed(seed)

size_input = 28*28
size_hidden_1 = 256
size_hidden_2 = 128
size_output = 10
batch_size=30
lr=0.1
dropout_p=0.0
L1=0
L2=3e-5

fashion_mnist = tf.keras.datasets.fashion_mnist
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
x_train=tf.reshape(x_train,[x_train.shape[0],-1])
x_test=tf.reshape(x_test,[x_test.shape[0],-1])

x_train, x_valid = tf.split(  
            x_train,
            num_or_size_splits=[55000, 5000],
            axis=0
        )
y_train, y_valid = tf.split(
            y_train,
            num_or_size_splits=[55000, 5000],
            axis=0
        )
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)
print(x_valid.shape)
print(y_valid.shape)
print(y_test[0:20])

(55000, 784)
(55000,)
(10000, 784)
(10000,)
(5000, 784)
(5000,)
[9 2 1 1 6 1 4 6 5 7 4 5 7 3 4 1 2 4 8 0]


In [None]:
class MLP(tf.keras.Model):
  def __init__(self, size_input, size_hidden_1, size_hidden_2, size_output, device=None):
    super(MLP, self).__init__()
    """
    size_input: int, size of input layer
    size_hidden: int, size of hidden layer
    size_output: int, size of output layer
    device: str or None, either 'cpu' or 'gpu' or None. If None, the device to be used will be decided automatically during Eager Execution
    """

    self.size_input, self.size_hidden_1, self.size_hidden_2, self.size_output, self.device =\
    size_input, size_hidden_1, size_hidden_2, size_output, device

    self.initial=tf.keras.initializers.he_normal(seed=seed)
    
    # Initialize weights between input layer and hidden layer
    self.W1 = tf.Variable(self.initial([self.size_input, self.size_hidden_1]))
    # Initialize biases for hidden layer
    self.b1 = tf.Variable(self.initial([1, self.size_hidden_1]))
     # Initialize weights between hidden layer and output layer
    self.W2 = tf.Variable(self.initial([self.size_hidden_1, self.size_hidden_2]))
    # Initialize biases for hidden layer
    self.b2 = tf.Variable(self.initial([1, self.size_hidden_2]))
     # Initialize weights between hidden layer and output layer
    self.W3 = tf.Variable(self.initial([self.size_hidden_2, self.size_output]))
    # Initialize biases for output layer
    self.b3 = tf.Variable(self.initial([1, self.size_output]))
    
    # Define variables to be updated during backpropagation
    self.MLP_variables = [self.W1, self.b1, self.W2, self.b2, self.W3, self.b3]

    self.loss_object =tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    self.reg_12=tf.keras.regularizers.L1L2(l1=L1, l2=L2)
    
  def forward(self, training, X):
    """
    forward pass
    X: Tensor, inputs
    """
    if training==1:
      if self.device is not None:
        with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
          self.y = self.compute_output(X)
      else:
        self.y = self.compute_output(X)
    elif training==0:
      if self.device is not None:
        with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
          self.y = self.compute_output_test(X)
      else:
        self.y = self.compute_output_test(X)
      
    return self.y
  
  def loss(self, y_pred, y_true):
    '''
    y_pred - Tensor of shape (batch_size, size_output)
    y_true - Tensor of shape (batch_size, size_output)
    '''
    #y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
    return self.loss_object(y_true, y_pred)+self.reg_12(self.W1)+self.reg_12(self.W2)

  def loss2(self, y_pred, y_true):
    '''
    y_pred - Tensor of shape (batch_size, size_output)
    y_true - Tensor of shape (batch_size, size_output)
    '''
    #y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
    return self.loss_object(y_true, y_pred)
  
  def backward(self, X_train, y_train):
    """
    backward pass
    """
    optimizer = tf.keras.optimizers.SGD(learning_rate=lr)
    #optimizer = tf.keras.optimizers.Adam()
    with tf.GradientTape() as tape:
      predicted = self.forward(1,X_train)
      current_loss = self.loss(predicted, y_train)
    grads = tape.gradient(current_loss, self.MLP_variables)
    optimizer.apply_gradients(zip(grads, self.MLP_variables))

        
  def compute_output(self, X):
    """
    Custom method to obtain output tensor during forward pass
    """
    # Cast X to float32
    X_tf = tf.cast(X, dtype=tf.float32)
    #Remember to normalize your dataset before moving forward
    # Compute values in hidden layer
    what = tf.matmul(X_tf, self.W1) + self.b1
    hhat = tf.nn.relu(what)
    hhat = tf.nn.dropout(hhat, rate = dropout_p, seed = seed)
    what_1 = tf.matmul(hhat, self.W2) + self.b2
    hhat_1 = tf.nn.relu(what_1)
    hhat_1 = tf.nn.dropout(hhat_1, rate = dropout_p, seed = seed)
    # Compute output
    what_2 = tf.matmul(hhat_1, self.W3) + self.b3
    #output= tf.nn.softmax(what_2)
    #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
    #Second add tf.Softmax(output) and then return this variable
    return what_2

  def compute_output_test(self, X):
    """
    Custom method to obtain output tensor during forward pass
    """
    # Cast X to float32
    X_tf = tf.cast(X, dtype=tf.float32)
    #Remember to normalize your dataset before moving forward
    # Compute values in hidden layer
    what = tf.matmul(X_tf, self.W1) + self.b1
    hhat = tf.nn.relu(what)
    what_1 = tf.matmul(hhat, self.W2) + self.b2
    hhat_1 = tf.nn.relu(what_1)
    # Compute output
    what_2 = tf.matmul(hhat_1, self.W3) + self.b3
    #output= tf.nn.softmax(what_2)
    #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
    #Second add tf.Softmax(output) and then return this variable
    return what_2
  

In [None]:
t=np.array([[[1,1],[2,2]],[[3,2],[2,3]]])
print(tf.reshape(t,[2,1,-1]))
t.shape

tf.Tensor(
[[[1 1 2 2]]

 [[3 2 2 3]]], shape=(2, 1, 4), dtype=int32)


(2, 2, 2)

In [None]:
# Set number of epochs
NUM_EPOCHS = 20

In [7]:
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

val_loss = tf.keras.metrics.Mean(name='val_loss')
val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy')

train_loss.reset_states()
train_accuracy.reset_states()
val_loss.reset_states()
val_accuracy.reset_states()

mlp_on_default = MLP(size_input, size_hidden_1, size_hidden_2, size_output)

valid_ds = tf.data.Dataset.from_tensor_slices((x_valid, y_valid)).batch(batch_size)
train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000, seed=seed).batch(batch_size)

for inputs, outputs in valid_ds:
  preds = mlp_on_default.forward(0,inputs)
  val_loss(mlp_on_default.loss2(preds,outputs))
  val_accuracy(outputs, preds)

print(
  f'Epoch {0}, '
  f'Val Loss: {val_loss.result()}, '
  f'Val Accuracy: {val_accuracy.result() * 100}'
)

time_start = time.time()
for epoch in range(NUM_EPOCHS):
  train_loss.reset_states()
  train_accuracy.reset_states()
  val_loss.reset_states()
  val_accuracy.reset_states()
  
  for inputs, outputs in train_ds:
    mlp_on_default.backward(inputs, outputs)
    preds = mlp_on_default.forward(0,inputs)
    train_loss(mlp_on_default.loss(preds,outputs))
    train_accuracy(outputs, preds)

  for inputs, outputs in valid_ds:
    preds = mlp_on_default.forward(0,inputs)
    val_loss(mlp_on_default.loss2(preds,outputs))
    val_accuracy(outputs, preds)
  
  print(
    f'Epoch {epoch + 1}, '
    f'Loss: {train_loss.result()}, '
    f'Accuracy: {train_accuracy.result() * 100}, '
    f'Val loss: {val_loss.result()}, '
    f'Val Accuracy: {val_accuracy.result() * 100}'
  )

time_taken = time.time() - time_start

print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))

Epoch 0, Val Loss: 3.3782835006713867, Val Accuracy: 10.760000228881836
Epoch 1, Loss: 0.5836119651794434, Accuracy: 82.02000427246094, Val loss: 0.587325394153595, Val Accuracy: 77.56000518798828
Epoch 2, Loss: 0.41816744208335876, Accuracy: 86.66363525390625, Val loss: 0.442064493894577, Val Accuracy: 84.18000030517578
Epoch 3, Loss: 0.38377049565315247, Accuracy: 88.0345458984375, Val loss: 0.47013506293296814, Val Accuracy: 83.08000183105469
Epoch 4, Loss: 0.36135172843933105, Accuracy: 88.80545806884766, Val loss: 0.39085689187049866, Val Accuracy: 85.79999542236328
Epoch 5, Loss: 0.344319224357605, Accuracy: 89.24909210205078, Val loss: 0.3883298337459564, Val Accuracy: 85.47999572753906
Epoch 6, Loss: 0.33124828338623047, Accuracy: 89.73636627197266, Val loss: 0.3974660634994507, Val Accuracy: 85.5199966430664
Epoch 7, Loss: 0.32009562849998474, Accuracy: 90.11272430419922, Val loss: 0.3855273723602295, Val Accuracy: 85.68000030517578
Epoch 8, Loss: 0.30936217308044434, Accuracy

In [None]:
test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

test_loss.reset_states()
test_accuracy.reset_states()

test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size)

for inputs, outputs in test_ds:
  preds = mlp_on_default.forward(0,inputs)
  test_loss(mlp_on_default.loss2(preds,outputs))
  test_accuracy(outputs, preds)

print(
  f'Epoch {0}, '
  f'Test Loss: {test_loss.result()}, '
  f'Test Accuracy: {test_accuracy.result() * 100}'
)