In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from keras.datasets import fashion_mnist

In [4]:
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [5]:
# Split validation set (10% of training data)
val_size = int(0.1 * len(x_train))
x_val, y_val = x_train[:val_size], y_train[:val_size]
x_train, y_train = x_train[val_size:], y_train[val_size:]

# Normalize inputs
x_train = x_train.reshape(x_train.shape[0], -1) / 255.0
x_val = x_val.reshape(x_val.shape[0], -1) / 255.0
x_test = x_test.reshape(x_test.shape[0], -1) / 255.0

In [37]:
class NN:
  def __init__(self,input_shape,output_shape,n_hidden_layers,h_per_layer,activation_func="relu",loss_func="cross_entropy_loss",init_type="random"):
    self.input_shape = input_shape
    self.output_shape = output_shape
    self.n_h = n_hidden_layers
    self.k = h_per_layer
    self.weights = []
    self.biases = []
    self.grad_weights = []
    self.grad_biases = []
    self.activation_func = activation_func
    self.loss_func = loss_func
  def activation(self,x):
    if self.activation_func == "relu":
      return np.maximum(0,x)
    elif self.activation_func == "tanh":
      return np.tanh(x)
    elif self.activation_func == "sigmoid":
      return 1/(1+np.exp(-x))
  def activation_grad(self,x):
    if self.activation_func == "relu":
      return np.where(x>0,1,0)
    elif self.activation_func == "tanh":
      return 1-np.tanh(x)**2
    elif self.activation_func == "sigmoid":
      s = 1/(1+np.exp(-x))
      return s*(1-s)
  def softmax(self,x):
    return np.exp(x)/np.sum(np.exp(x),axis=0)
  def transpose(self,w):
    return np.array(w).T.tolist()
  def one_hot(self,y):
    return np.eye(self.output_shape)[y]
  def loss(self,y_hat,y_true):
    if self.loss_func == "cross_entropy_loss":
      return -np.sum(y_true*np.log(y_hat))
    elif self.loss_func == "squared_error":
      return np.sum((y_hat-y_true)**2)
  def accuracy(self,y_hat,y_true):
    return np.sum(y_hat==y_true)/len(y_true)
  def weight_init(self,init_type="random",n=1):
    if init_type == "random":
      self.weights.append(np.random.randn(self.input_shape,self.k))
      self.biases.append(np.random.randn(self.k,n))
      for i in range(self.n_h-1):
        self.weights.append(np.random.randn(self.k,self.k))
        self.biases.append(np.random.randn(self.k,n))
      self.weights.append(np.random.randn(self.k,self.output_shape))
      self.biases.append(np.random.randn(self.output_shape,n))
    return self.weights,self.biases
  def basic_optimizer(self,lr):
    self.weights = self.weights - lr*self.grad_weights
    self.biases = self.biases - lr*self.grad_biases
  def forward(self,x,y):
    self.weights,self.biases = self.weight_init(n=x.shape[0])
    a_list = [0]*(self.n_h+2)
    h_list = [0]*(self.n_h+2)
    h = self.transpose(x)
    h_list[0] = h
    for i in range(self.n_h):
      a_list[i+1] = np.dot(self.transpose(self.weights[i]),h_list[i])+self.biases[i]
      h_list[i+1] = self.activation(a_list[i+1])
    a_list[self.n_h+1] = np.dot(self.transpose(self.weights[self.n_h]),h_list[self.n_h])+self.biases[self.n_h]
    y_hat = self.softmax(a_list[self.n_h+1])
    h_list[self.n_h+1] = y_hat
    return a_list,h_list
  def backward(self,a_list,h_list,y):
    a_grad_list = [0]*(self.n_h+2)
    h_grad_list = [0]*(self.n_h+2)
    y_hat = h_list[self.n_h+1]
    if self.loss_func == "cross_entropy_loss": #gradient wrt output layer
      a_grad = y_hat - self.one_hot(y)
    elif self.loss_func == "squared_error":
      a_grad = 2*(y_hat - y)
    a_grad_list[-1] = a_grad
    for k in range(self.n_h,-1,-1): # gradient wrt hiddden layers
      h_grad = np.dot(self.transpose(self.weights[k+1]),a_grad_list[k+1])
      h_grad_list[k] = h_grad
      a_grad = np.multiply(h_grad,self.activation_grad(a_list[k]))
      a_grad_list[k] = a_grad
      self.grad_weights[k] = np.dot(a_grad_list[k+1],h_list[k].T) #gradients wrt parameters
      self.grad_biases[k] = a_grad_list[k+1]
  def train(self,x_train,y_train,x_val,y_val,epochs):
    for i in range(epochs):
      a_list_train,h_list_train = self.forward(x_train,y_train) # forward pass
      print("forward pass done")
      self.backward(a_list_train,h_list_train,y_train) # backward pass
      print("backward pass done")
      self.basic_optimizer(0.01) # updating weights
      print("optimizer done")
      y_hat = h_list_train[self.n_h+1]
      train_loss = self.loss(self.transpose(y_hat),self.one_hot(y_train))
      train_acc = self.accuracy(np.argmax(np.array(self.transpose(y_hat)),axis=0),y_train)
      a_list_val,h_list_val = self.forward(x_val,y_val)
      y_hat_val = h_list_val[self.n_h+1]
      val_loss = self.loss(y_hat_val,self.one_hot(y_val))
      val_acc = self.accuracy(np.argmax(np.array(y_hat_val),axis=0),y_val)
      print("Epoch: ",i+1,"Train Loss: ",train_loss,"Train Accuracy: ",train_acc,"Val Loss: ",val_loss,"Val Accuracy: ",val_acc)

In [None]:
# Hyperparameters
num_hid_layers = 3
hid_layer_size = 64  # Changeable number of layers
weight_init = 'xavier'  # Options: 'random', 'xavier'
activation = 'relu'  # Options: 'sigmoid', 'tanh', 'relu'
#l2_reg = 0.0005  # L2 regularization strength
learning_rate = 1e-3  # Options: 1e-3, 1e-4
#batch_size = 32  # Options: 16, 32, 64
epochs = 5  # Options: 5, 10
#optimizer = 'sgd'  # Options: 'sgd', 'momentum', 'nesterov', 'rmsprop', 'adam', 'nadam'
loss_func = 'cross_entropy_loss'  # Options: 'cross_entropy_loss', 'squared_error'
# self,input_shape,output_shape,n_hidden_layers,h_per_layer,activation_func="ReLU",loss_func="cross_entropy_loss"
# Initialize and train the model
nn = NN(input_shape=784,output_shape=10,n_hidden_layers=num_hid_layers, h_per_layer=hid_layer_size, activation_func=activation, init_type=weight_init, loss_func=loss_func)
nn.train(x_train, y_train, x_val, y_val, epochs=epochs)

In [14]:
x_train.shape

(54000, 784)