In [1]:
import wandb
from keras.datasets import fashion_mnist
import numpy as np
import cli_args
from sklearn.preprocessing import StandardScaler
from keras.utils.np_utils import to_categorical
from sklearn.model_selection import train_test_split

In [2]:
# wandb.init (
# # set the wand project where this run will be logged
# project="CS6910_Assignment_1",
# )
# # Loading the fashion-MNIST dataset
# (x_train, y_train), (_test, _test) = fashion_mnist.load_data ()
# #class names for fashion-MIST
# class_names = ['T-shirt',
# 'Trouser',
# 'Pullover',
# 'Dress',
# 'Sandal', 'Shirt',
# 'Sneaker',
# 'Bag',
# 'Coat',
# 'Ankle boot']
# # creating 2x5 grid
# img={}
# for i in range(10):
#     # to find first image in the training set with class label i
#     idx = np.where (y_train == i)[0][0]
#     # Plot the image
#     img[class_names[i]] = wandb.Image(x_train[idx], caption=class_names[i])
# wandb.log(img)
# # [optional] finish the wand run, necessary in notebooks
# wandb.finish()

In [3]:
arguments = cli_args.argumentsIntake()

usage: ipykernel_launcher.py [-h] [-wp WANDB_PROJECT] [-we WANDB_ENTITY]
                             [-d DATASET] [-e EPOCHS] [-b BATCH_SIZE]
                             [-l LOSS] [-o OPTIMIZER] [-lr LEARNING_RATE]
                             [-m MOMENTUM] [-beta BETA] [-beta1 BETA1]
                             [-beta2 BETA2] [-eps EPSILON] [-w_d WEIGHT_DECAY]
                             [-w_i WEIGHT_INIT] [-nhl NUM_LAYERS]
                             [-sz HIDDEN_SIZE [HIDDEN_SIZE ...]]
                             [-a ACTIVATION] [-oa OUTPUT_ACTIVATION]
ipykernel_launcher.py: error: unrecognized arguments: -f /Users/vanditshah/Library/Jupyter/runtime/kernel-8ffb2f94-ed55-4ded-845d-51c819fa1f72.json


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [4]:
class ActiviationFunction:
    def __init__(self, name):
        self.act_name = name
    
    @property
    def setname(self, new_name):
        self.act_name = new_name

    def activate(self, data, backprop = False):
        if self.act_name == "identity" and backprop:
            pass
        elif self.act_name == "identity" and not backprop:
            return self.identity(data)
        elif self.act_name == "sigmoid" and backprop:
            return self.backprop_sigmoid(data)
        elif self.act_name == "sigmoid" and not backprop:
            return self.sigmoid(data)
        elif self.act_name == "tanh" and backprop:
            return self.backprop_tanh(data)
        elif self.act_name == "tanh" and not backprop:
            return self.tanh(data)
        elif self.act_name == "ReLU" and backprop:
            pass
        elif self.act_name == "ReLU" and not backprop:
            return self.tanh(data)

    
    def identity(self, data):
        return data

    def sigmoid(self, data):
        return 1/(1 + np.exp(-data))
    
    def backprop_sigmoid(self, data):
        temp = self.sigmoid(data)
        return (1 - temp)*temp
    
    def relu(self, data):
        return np.max(0, data)
    
    def tanh(self, data):
        # return (np.exp(data) - np.exp(-data))/(np.exp(data) + np.exp(-data))
        return np.tanh(data)
    
    def backprop_tanh(self, data):
        return 1 - np.square(np.tanh(data))
    



In [5]:
class LossFunction:
    def __init__(self, name):
        self.name = name
    
    def getLoss(self, y, yhat):
        if self.name == "cross_entropy":
            return self.cross_entropy_loss(y, yhat)
        elif self.name == "mean_squared_error":
            return self.mean_squared_error_loss(y, yhat)
    
    def getGradient(self, y, yhat):
        if self.name == "cross_entropy":
            return self.backprop_cross_entropy(y, yhat)
        elif self.name == "mean_squared_error":
            return self.backprop_mean_squared_loss(y, yhat)
    
    def cross_entropy_loss(self, y, yhat):
        probability_predicted = yhat[np.argmax(y)]
        if probability_predicted <= 0:
            probability_predicted += 10**(-9)
        return -np.log(probability_predicted)
    
    def mean_squared_error_loss(self,y, yhat):
        return (1/2)*np.sum(np.square(yhat - y))
    
    def backprop_cross_entropy(self, y, yhat):
        return yhat - y
    
    def backprop_mean_squared_loss(self, y, yhat):
        return (yhat - y) * yhat * (1 - yhat)

In [6]:
from tqdm.notebook import tqdm

In [7]:
class NeuralNetwork:
    def __init__(self, arguments) -> None:
        self.args_backup = arguments # is saved for later variable purposes
        self.n_hidden_layers = arguments.num_layers
        self.neurons_h_layers = arguments.hidden_size
        self.h_activation_func_name = arguments.activation
        self.output_activation_func_name = arguments.output_activation
        self.weight_decay = arguments.weight_decay
        self.epsilon = arguments.epsilon
        self.learning_rate = arguments.learning_rate
        self.optimizer_name = arguments.optimizer
        self.loss_func_name = arguments.loss
        self.batch_size = arguments.batch_size
        self.epochs = arguments.epochs
        self.dataset_name = arguments.dataset

        self.load_dataset()

        self.layers = [self.x_train.shape[1]] + self.neurons_h_layers + [self.y_train[0].shape[0]]
        self.n_layers = self.n_hidden_layers + 2

        self.activation = ActiviationFunction(arguments.activation)
        self.outputActivation = ActiviationFunction(arguments.output_activation)
        self.loss = LossFunction(arguments.loss)

    def load_dataset(self):
        preprocessor = StandardScaler()
        if self.dataset_name == "fashion_mnist":
            from keras.datasets import fashion_mnist
            (self.x_train, self.y_train), (self.x_test, self.y_test) = fashion_mnist.load_data()

            self.x_train = self.x_train.astype('float64')
            self.y_train = self.y_train.astype('float64')
            self.x_test = self.x_test.astype('float64')
            self.y_test = self.y_test.astype('float64')

            self.x_train = self.x_train.reshape(self.x_train.shape[0],self.x_train.shape[1]*self.x_train.shape[2])
            self.x_train = preprocessor.fit_transform(self.x_train)
            self.y_train = self.y_train.reshape(self.y_train.shape[0],1)
            self.y_train = to_categorical(self.y_train)

            self.x_test = self.x_test.reshape(self.x_test.shape[0],self.x_test.shape[1]*self.x_test.shape[2])
            self.x_test = preprocessor.fit_transform(self.x_test)
            self.y_test = self.y_test.reshape(self.y_test.shape[0],1)
            self.y_test = to_categorical(self.y_test)

            self.x_train, self.x_val, self.y_train, self.y_val = train_test_split(self.x_train, self.y_train, test_size=0.10, random_state=42)
            
        else:
            pass
    
    def init_parameters(self, debug = False):
        if self.args_backup.weight_init == "random":
            self.randomInit(debug)
        elif self.args_backup.weight_init == "xavier":
            self.xavierInit(debug)
    
    def randomInit(self, _print=False):
        self.parameters = {}
        constant = 0.04
        for i in range(1, self.n_layers):
            self.parameters[f"W{i}"] = np.random.randn(self.layers[i], self.layers[i-1])*constant
            self.parameters[f"b{i}"] = np.zeros((self.layers[i], 1))
            if(_print):
                print(f'W{i} -> {self.parameters["W" + str(i)].shape}')
                print(f'b{i} -> {self.parameters["b" + str(i)].shape}')
    
    def xavierInit(self, _print=False):
        self.parameters = {}
        for i in range(1, self.n_layers):
            self.parameters[f"W{i}"] = np.random.randn(self.layer_sizes[i], self.layer_sizes[i - 1]) * np.sqrt(2/ (self.layer_sizes[i - 1] + self.layer_sizes[i]))
            self.parameters[f"b{i}"] = np.zeros((self.layers[i], 1))
            if(_print):
                print(f'W{i} -> {self.parameters["W" + str(i)].shape}')
                print(f'b{i} -> {self.parameters["b" + str(i)].shape}')

    
    def gradsInit(self, debug = False):
        temp_gradients = {}
        for i in range(1, self.n_layers):
            temp_gradients[f"W{i}"] = np.zeros((self.layers[i], self.layers[i - 1]))
            temp_gradients[f"b{i}"] = np.zeros((self.layers[i], 1))
            if debug:
                print(f'W{i} -> {temp_gradients["W" + str(i)].shape}')
                print(f'b{i} -> {temp_gradients["b" + str(i)].shape}')
        return temp_gradients

    
    def forward_propagation(self, data):
        self.a = {}  # preactivation
        self.h = {}  # activation
        self.h["h0"] = data.T
        
        for i in range(1, self.n_layers - 1):
            self.a[f"a{i}"] = np.matmul(self.parameters[f"W{i}"].T,self.h[f"h{i-1}"]) + self.parameters[f"b{i}"]
            self.h[f"h{i}"] = self.activation.activate(self.a[f"a{i}"])
        
        # for output layer
        self.a[f"a{self.n_layers-1}"] = np.matmul(self.parameters[f"W{self.n_layers-1}"].T,self.h[f"h{self.n_layers-2}"]) + self.parameters[f"b{self.n_layers-1}"]
        self.h[f"h{self.n_layers - 1}"] = self.outputActivation.activate(self.a[f"a{i}"])
    

    def back_propagation(self, data):
        gradients = {}
        gradients[f"a{self.n_layers - 1}"] = self.loss.getGradient(data.T, self.h[f"h{self.n_layers - 1}"])
        for i in range(self.n_layers - 1, 1, -1):
            gradients[f"W{i}"] = np.outer(gradients[f'a{i}'], self.h[f'h{i-1}'])
            gradients[[f"b{i}"]] = gradients[f"a{i}"]
            gradients[f"h{i-1}"] = np.dot(self.parameters[f"W{i}"].T, gradients[f"a{i}"])
            gradients[f"a{i-1}"] = gradients[f"h{i-1}"] * self.activation.activate(self.a[f"a{i-1}"], backprop=True)
        
        gradients[f"W{1}"] = np.outer(gradients[f'a{1}'], self.h[f'h{1-1}'])
        gradients[[f"b{1}"]] = gradients[f"a{1}"]
        gradients[f"h{1-1}"] = np.dot(self.parameters[f"W{1}"].T, gradients[f"a{1}"])

        return gradients
    
    def generateMetrics(self):
        pass

    def stochastic_gradient_descent(self):
        for i in tqdm(range(self.epochs), desc="Epochs"):
            for id, x, y in tqdm(enumerate(zip(self.x_train, self.y_train)), desc = "Optimizer Algorithm"):
                self.forward_propagation(x)
                gradients = self.back_propagation(y)
                for key in self.parameters.keys():
                    self.parameters[key] = self.parameters[key] - self.learning_rate * gradients[key]
            
            # TO DO: Print Accuracy for each epoch
    
    def moment_based_gradient_descent(self):
        self.momentum = self.args_backup.momentum
        

In [1]:
from tqdm.notebook import tqdm
from time import sleep

for i in tqdm(range(4), desc = "Epochs \n"):
    for j in tqdm(range(500), desc = "Gradient Descent", leave = False):
        sleep(0.01)

Epochs 
:   0%|          | 0/4 [00:00<?, ?it/s]

Gradient Descent:   0%|          | 0/500 [00:00<?, ?it/s]

Gradient Descent:   0%|          | 0/500 [00:00<?, ?it/s]

KeyboardInterrupt: 