<a href="https://colab.research.google.com/github/Ruheena-S/cs6910_assignment1/blob/main/Assignment_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#Initialising Wandb

!pip install wandb
import wandb
from wandb.keras import WandbCallback
wandb.init(project='Fmnist', entity='iitkgpch')




[34m[1mwandb[0m: Currently logged in as: [33m21ch60r74[0m ([33miitkgpch[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [2]:
#Importing other required modules

from keras.datasets import fashion_mnist
import numpy as np

In [3]:
#Load data into Train and test dataset

((x_train,y_train),(x_test,y_test)) = fashion_mnist.load_data()      

In [4]:
#displaying one image from each set

imgs=[]
classes = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat','Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
for i in range(0,len(set(y_train))):
  class_label=list(y_train).index(i)     #first appearance of each class label
  imgs.append(wandb.Image(x_train[class_label],caption = classes[i]))
          
wandb.log({"examples": imgs})  #logs images to the wandb panel

In [5]:
"""
A class to create an object of one layer along with parameters to specifiy:
  input dimensions,
  number of nodes,
  activation function,
  optimizer algorithm,
  weight initialisation type
"""
class Layer:
    
    def __init__(self, inputDimension, noOfNodes, activation='',optimizer='standard',wtype = 'random'):

        # Optimization Alogorithm for gradient Descent
        if optimizer == 'standard':
            self.optimizer = self.standard
        elif optimizer == 'momentumGD':
            self.optimizer = self.momentumGD
        elif optimizer == 'rmsprop':
            self.optimizer = self.rmsprop 
        elif optimizer == 'adam':
            self.optimizer = self.adam
        elif optimizer == 'sgd':
            self.optimizer = self.sgd
        elif optimizer == 'nadam':
            self.optimizer = self.nadam
        elif optimizer == 'nesterov':
            self.optimizer = self.nesterov


        # Activation Function for each layer
        if activation == 'sigmoid':
            self.activation = activation
            self.activationForward = self.sigmoid
            self.activationBackward = self.sigmoidGrad
        elif activation == 'relu':
            self.activation = activation
            self.activationForward = self.relu
            self.activationBackward = self.relugrad

        elif activation == 'tanh':
            self.activation = activation
            self.activationForward = self.tanh
            self.activationBackward = self.tanhgrad

        else:
            self.activation = 'softmax'
            self.activationForward = self.softmax
            self.activationBackward = self.softmaxGrad


        #Initialsing weights and Momentum weights and velocity weights for optimization Algorithms

        self.weights, self.bias = self.initialize(inputDimension, noOfNodes,activation,wtype = wtype)
        self.prevMW, self.prevMb = np.zeros([noOfNodes,inputDimension]),np.zeros([noOfNodes, 1])
        self.prevVW, self.prevVb = np.zeros([noOfNodes,inputDimension]),np.zeros([noOfNodes, 1])
        self.opt = optimizer

        
    
    # Initialize the layer with some random weights and bias
    def initialize(self, noOfInputFeatures, noOfNodes,activation,wtype):
        np.random.seed(1)

        #random Initalisation using Normal Distribution
        if wtype == 'random':                                        
          weights = np.random.normal(0.0,0.5,size=(noOfNodes, noOfInputFeatures))
          bias = np.ones([noOfNodes, 1])
        
        #Xavier Initalisation
        else:
           weights = np.random.uniform(-(1/np.sqrt(noOfNodes)), (1/np.sqrt(noOfNodes)), size=(noOfNodes, noOfInputFeatures))
           bias = np.ones([noOfNodes, 1])
        return weights, bias

    # Sigmoid activation function
    def sigmoid(self, Z):
        A = 1 / (1 + np.exp(-Z))
        return A

    # derivative of sigmoid function with chain rule applied
    def sigmoidGrad(self, dA):
        s = 1 / (1 + np.exp(-self.prevZ))
        dZ = dA * s * (1 - s)
        return dZ

    #Tanh Activation Function
    def tanh(self,Z):
        return np.tanh(Z)

    # derivative of Tanh function with chain rule applied
    def tanhgrad(self,dA):
        s = self.tanh(self.prevZ)
        return dA*(1-(s**2))

    #Relu Activation function
    def relu(self,Z):
        A = np.maximum(0,Z)
        return A

    # derivative of ReLu function with chain rule applied
    def relugrad(self,dA):
        s = np.maximum(0,self.prevZ)
        t = 1. * dA * (s>0) 
        return t

   

    #SoftMax Activation Function
    def softmax(self,Z):
      expZ = np.exp(Z - np.max(Z))
      A = expZ / expZ.sum(axis=0, keepdims=True)
      return A

    
    #Output Layer Gradient
    def softmaxGrad(self,dA):
      return dA

    
    #Predicting class labels for test or validation data (diiferent from forward because forward stores previous layer inputs)
    def predict(self,A):
        Z = np.dot(self.weights,A) +self.bias
        A = self.activationForward(Z)
        return A

    # Take's the input vector A and does the forward pass using weights and bias
    def forward(self, A):

      if self.opt != 'nesterov':
        Z = np.dot(self.weights,A) +self.bias
      else:
        Z = np.dot(self.weights-0.9*self.prevVW, A) + (self.bias-0.9*self.prevVb)
      self.prevZ = Z
      self.prevA = A
      A = self.activationForward(Z)
      return A

      
    
    # Take's the input vector derivative of A (dA) and does the backward pass
    def backward(self, dA):
        dZ = self.activationBackward(dA)
        m = self.prevA.shape[1]
        self.dW = 1 / m * np.dot(dZ, self.prevA.T) 
        self.db = 1 / m * np.sum(dZ, axis=1, keepdims=True)
        prevdA = np.dot(self.weights.T, dA)
        return prevdA

    
    # Update's the weights and bias with the passed learning_rate
    def standard(self, learning_rate,l2_lambda =0,batch_size =32,t=0):
        self.weights = self.weights - learning_rate * self.dW-learning_rate*(l2_lambda/batch_size)*self.weights
        self.bias = self.bias - learning_rate * self.db-(l2_lambda/batch_size )*self.bias
    
    #Momentum Based Gradient Descent Algorithm
    def momentumGD(self, learning_rate,t,l2_lambda=0,batch_size =32, moving_rate=0.9):
        self.prevMW = moving_rate * self.prevMW + learning_rate * self.dW+(l2_lambda/batch_size)*self.weights
        self.prevMb = moving_rate * self.prevMb + learning_rate * self.db+(l2_lambda/batch_size)*self.bias
        self.weights = self.weights - self.prevMW
        self.bias = self.bias - self.prevMb
        
    #RMSprop Algorithm for gradient descent
    def rmsprop(self, learning_rate,t,l2_lambda=0,batch_size =32, moving_rate = 0.9):
        self.prevVW = moving_rate * self.prevVW + (1-moving_rate) * (self.dW**2)
        self.prevVb = moving_rate * self.prevVb + (1-moving_rate) * (self.db**2)
        for i in self.prevVW:
          i[i<0] = 1e-9
        self.prevVb[self.prevVb<0] = 1e-9 
        self.weights = self.weights - (learning_rate/np.sqrt(self.prevVW+(1e-8))) * self.dW
        self.bias = self.bias - (learning_rate/np.sqrt(self.prevVb+(1e-8))) * self.db
    
    #SGD algorithm for updating weights 
    def sgd(self, dA,learning_rate = 0.001,t = 0,l2_lambda=0,batch_size = 32):
        dZ = self.activationBackward(dA)
        prevdA = np.dot(self.weights.T, dZ)
        m = self.prevA.shape[1]
        for i in range(m):
          self.dW = 1 / m * np.dot(dZ[:,i:i+1], self.prevA[:,i:i+1].T)   #arr[:,2]
          self.db = 1 / m * (dZ[:,i:i+1])
          self.weights = self.weights - (learning_rate * self.dW)-learning_rate*(l2_lambda/batch_size)*self.weights
          self.bias = self.bias - (learning_rate * self.db)-(l2_lambda/batch_size )*self.bias
        
        return prevdA

    #Nesterov Algorithm for Updating weights 
    def nesterov(self,learning_rate,moving_rate = 0.9,l2_lambda =0,batch_size =32,t = 0):
        
        self.prevVW = moving_rate * self.prevVW + learning_rate * self.dW
        self.weights = self.weights - self.prevVW

        self.bl = self.bias - moving_rate * self.prevVb
        self.prevVb = moving_rate * self.prevVb

    #Adam Algorithm for Updating weights
    def adam(self,learning_rate , beta1 = 0.9, beta2 = 0.999,l2_lambda =0,batch_size =32,t=0):
        self.prevMW = beta1 * self.prevMW + (1-beta1)*self.dW
        self.prevMb = beta1 * self.prevMb + (1-beta1)*self.db

        self.prevVW = beta2 * self.prevVW + (1-beta2)*np.square(self.dW)
        self.prevVb = beta2 * self.prevVb + (1-beta2)*np.square(self.db)

        self.prevMWHat = self.prevMW/(1-beta1)
        self.prevMbHat = self.prevMb/(1-beta1)

        self.prevVWHat = self.prevVW/(1-beta2)
        self.prevVbHat = self.prevVb/(1-beta2)

        self.weights = self.weights - learning_rate * np.divide(self.prevMWHat,np.sqrt(self.prevVWHat+(1e-8)))
        self.bias = self.bias - learning_rate * np.divide(self.prevMbHat,np.sqrt(self.prevVbHat+(1e-8)))

    
    #Nadam Algorithm for Updating weights    
    def nadam(self,learning_rate ,t, beta1 = 0.9, beta2 = 0.999,l2_lambda =0,batch_size =32):
        self.prevMW = beta1 * self.prevMW + (1-beta1)*self.dW
        self.prevMb = beta1 * self.prevMb + (1-beta1)*self.db

        self.prevVW = beta2 * self.prevVW + (1-beta2)*np.square(self.dW)
        self.prevVb = beta2 * self.prevVb + (1-beta2)*np.square(self.db)

        self.prevMWHat = (beta1 * self.prevMW / (1-beta1)) + self.dW
        self.prevMbHat = (beta1 * self.prevMb / (1-beta1)) + self.db

        self.prevVWHat = (beta2 * self.prevVW) / (1-beta2)
        self.prevVbHat = (beta2 * self.prevVb) / (1-beta2)

        self.weights = self.weights - learning_rate * np.divide(self.prevMWHat,np.sqrt(self.prevVWHat+(1e-8)))
        self.bias = self.bias - learning_rate * np.divide(self.prevMbHat,np.sqrt(self.prevVbHat+(1e-8)))
        

#This is the code for updating each layer. now to simulate each layer we define another class called as NeuralNetworkv

In [6]:
"""
Neural Network which consists of all layers and some helper functions
"""

class NeuralNetwork:
    
    def __init__(self, layers_size,epochs=5,learning_rate=0.001, l2_lambda = 0,optimizer = 'standard', activation = 'sigmoid',wtype = 'random', loss='cross_entropy'):
        self.layers=[]
        self.layers_size = layers_size
        self.optimizer = optimizer
        self.activation = activation
        self.wtype = wtype
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.l2_lambda = l2_lambda
        if loss=='cross_entropy':
            self.lossFunction = self.cross_entropy
            self.lossBackward = self.cross_entropy_Grad
        elif loss == 'mean_square':
            self.lossFunction = self.meanSquareError
            self.lossBackward = self.meanSquareErrorGrad
        else:
            print('Invalid loss function')
        self.loss=loss

    # add layer to the NN with input dimensions, nodes & activation function
    def addLayer(self, inputDimension=None, noOfNodes=1, activation='',wtype = 'random'):
        if (inputDimension is None):
            if (len(self.layers) == 0):
                print('Invalid number of layers')
            inputDimension = self.layers[-1].outputDimension()
        layer = Layer(inputDimension, noOfNodes, activation,optimizer = self.optimizer, wtype=wtype)
        self.layers.append(layer)

    # MeanSquareError function to calculate loss
    def meanSquareError(self, Y, A):
        loss = np.square(Y - A)
        m = Y.shape[1]
        cost = 1 / m * np.sum(loss)
        return np.squeeze(cost)
    
    # Compute's mean square grad error, Y is true value & A is predicted value
    def meanSquareErrorGrad(self, Y, A):
        dA = -2 * (Y - A)
        return dA


    # Compute's cross entropy error where Y is true value and A is predicted value
    def cross_entropy(self, Y, A):
        m = Y.shape[1]
        cost = -(1/m)*np.sum(Y*np.log(A))
        return np.squeeze(cost)

    
    # Compute's cross entropy grad error, Y is true value & A is predicted value
    def cross_entropy_Grad(self, Y, A):
        dA = A-Y
        return dA


    # Wrapper function to get the cost or loss value of the predicted values
    def cost(self, Y, A):
        return self.lossFunction(Y, A)


    # Forward pass the input vector X through all layers
    def forward(self, X):
        x = np.copy(X)
        for layer in self.layers:
            x = layer.forward(x)
        return x
            

    # Backward pass the true values and predicted values in reverse direction
    def backward(self, Y, A):
        dA = self.lossBackward(Y, A)
        #print(dA.shape)
        if self.optimizer != 'sgd':
            for layer in reversed(self.layers):
                dA = layer.backward(dA)
        else:
            for layer in reversed(self.layers):
                dA = layer.sgd(dA,learning_rate = self.learning_rate)

    
    # Update weights and compute's gradient descent of all layers
    def update(self, learning_rate=0.01,l2_lambda =0,batch_size=32,t=0):
        for layer in self.layers:
            layer.optimizer(learning_rate,l2_lambda = l2_lambda,batch_size = batch_size,t=0)


    # Training the train data using above functions and calculating loss for test and validation data 
    def fit(self,x_train,y_train,x_test,y_test,batch_size = 32):

        from sklearn.model_selection import train_test_split

        x,x_val,y,y_val = train_test_split(x_train,y_train,train_size = 0.9, test_size = 0.1, random_state=10) #Splitting the data for training and validation

        if self.activation=='relu': 
          self.wtype = 'xavier'

        
        #adding Layers with required activation function
        for i in range(1,len(self.layers_size)-1):
          self.addLayer(inputDimension=self.layers_size[i-1], noOfNodes=self.layers_size[i], activation=self.activation, wtype = self.wtype) 

        
        # Output layer with softmax activation
        self.addLayer(inputDimension=self.layers_size[-2], noOfNodes=self.layers_size[-1], activation='softmax', wtype = self.wtype)

        #generating a One-hot encoder vector for y which is y_hot
        y_hot = np.zeros([len(set(y)),len(y)])
        for i in range(y_hot.shape[1]):
          y_hot[y[i]][i] = 1


        #Training the data 
        for i in range(self.epochs):
          
          #avoiding gradient vanishing for Relu
          if self.activation =='relu' and self.optimizer == 'momentumGD' or self.optimizer == 'rmsprop' or self.optimizer == 'nesterov':
            self.learning_rate/=15


          #Training the data for each batch
          for j in range(0,x.shape[0],batch_size):
            xb = x[j:j+batch_size]
            yb = y[j:j+batch_size]
            y_hotb = y_hot[:,j:j+batch_size]
            xb = xb.reshape(xb.shape[0],xb.shape[1]*xb.shape[2]).T
            xb = xb-np.min(xb)/np.max(xb)-np.min(xb)


            #Neural Network Running
            if self.optimizer == 'sgd':
              A = self.forward(xb)
              self.backward(y_hotb,A)
            else:
              A = self.forward(xb)
              self.backward(y_hotb,A)
              self.update(learning_rate=self.learning_rate,l2_lambda = self.l2_lambda,batch_size=batch_size,t= i+1)

          #Predicting the Loss, Accuracy and Predicted labels for Validation data and Test data
          val_loss,val_acc,_=self.predict(x_val,y_val)
          loss,acc,y_pred= self.predict(x_test,y_test)
          
          #Displaying The Loss and Accuracy for Validation data and Test data
          print("After ",i+1,"iterations:")
          print("validation loss;",val_loss,"validation accuracy:",val_acc)
          print("test_loss:",loss,"test accuracy:",acc)

          #Logging the loss and accuracy values for each epoch in wandb panel
          wandb.log({"val_loss":val_loss,"val_accuracy":val_acc,"loss":loss,"accuracy":acc,"epoch":i})

        return y_pred #returning the probabilistic distributions of each class


    #Predicting the Loss and Accuracy for given x,y using current weights in NN
    def predict(self,x,y):
        A = x.reshape(x.shape[0],x.shape[1]*x.shape[2]).T
        y_hot = np.zeros([len(set(y)),len(y)])

        for i in range(y_hot.shape[1]):
          y_hot[y[i]][i] = 1

        for layer in self.layers:
            A = layer.predict(A)
        cross_entropy = -(y_hot * np.log(A)).mean() * y_hot.shape[0]
        y_pred = np.argmax(A,axis = 0)
        acc = (y==y_pred).mean()

        return cross_entropy,acc,A

      
        


In [7]:
#Sweep Configuration with Bayesian Strategy

sweep_config = {
    'name':"my-sweep",
    'method': 'bayes',
    'metric': {
      'name': 'accuracy',
      'goal': 'maximize'   
    },
    
    'parameters': {
        'epochs': {
            'values': [5, 10] #number of epochs
        },
        'number_hidden': {
            'values': [3, 4, 5] #number of hidden layers
        },
        'hidden_inputsize': {
            'values':[32, 64, 128] #size of every hidden layer
        },
        'weight_decay': {
            'values':[0, 0.0005,  0.5] #L2 regularisation
        },
        'learning_rate': {
            'values': [1e-3, 1e-4] 
        },
        'optimizer': {
            'values': ['momentumGD', 'nesterov', 'rmsprop', 'adam', 'nadam','sgd']
        },
        'batch_size' : {
            'values':[16, 32, 64]
        },
        'weight_init': {
            'values':['random','xavier']
        },
        'activation': {
            'values': ['sigmoid','tanh','relu']
        }
        
        }
}

#Generating Sweep id
sweep_id = wandb.sweep(sweep_config, entity="iitkgpch", project="Fmnist")  

Create sweep with ID: yimxqnlv
Sweep URL: https://wandb.ai/iitkgpch/Fmnist/sweeps/yimxqnlv


In [None]:
#defining train function for wandb agent to run it's configurations (directly from documentation)

def train():
  with wandb.init() as run:
    
    config = wandb.config 
    
    wandb.run.name = "hl_" + str(config.hidden_inputsize)+"_bs_"+str(config.batch_size)+"_ac_"+ config.activation
    np.random.seed(1)
    model = NeuralNetwork(layers_size = [784]+[config.hidden_inputsize]*config.number_hidden+[10],epochs = config["epochs"],learning_rate = config.learning_rate,l2_lambda = config.weight_decay,loss='cross_entropy',activation = config.activation, optimizer = config.optimizer, wtype=config.weight_init )
    y_pred = model.fit(x_train,y_train,x_test,y_test,batch_size=config.batch_size)

#running the sweep
wandb.agent('yimxqnlv',train,entity="iitkgpch", project="Fmnist")


[34m[1mwandb[0m: Agent Starting Run: snxud6h7 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_inputsize: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_hidden: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random
Exception in thread ChkStopThr:
Traceback (most recent call last):
  File "C:\Users\vijay\Anaconda3\lib\threading.py", line 973, in _bootstrap_inner
    self.run()
  File "C:\Users\vijay\Anaconda3\lib\threading.py", line 910, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\vijay\Anaconda3\lib\site-packages\wandb\sdk\wandb_run.py", line 203, in check_status
Exception in thread NetStatThr    :
Traceback (most recent call last):
  File "C:\Users\vijay\Anaconda3\lib\threading.py", line 973, in _bootstrap_inner
status_response = self._interface.communicate_stop_statu

After  1 iterations:
validation loss; 2.669528653699111 validation accuracy: 0.3175
test_loss: 2.6338572029151033 test accuracy: 0.3203
After  2 iterations:
validation loss; 2.5960649690246083 validation accuracy: 0.32966666666666666
test_loss: 2.562609295688211 test accuracy: 0.3337
After  3 iterations:
validation loss; 2.5915836603949827 validation accuracy: 0.3308333333333333
test_loss: 2.5588737353139734 test accuracy: 0.3337
After  4 iterations:
validation loss; 2.59128522512093 validation accuracy: 0.3308333333333333
test_loss: 2.5586164288561535 test accuracy: 0.334
After  5 iterations:
validation loss; 2.5912654971829525 validation accuracy: 0.3308333333333333
test_loss: 2.5585990998030916 test accuracy: 0.334
After  6 iterations:
validation loss; 2.591264182642814 validation accuracy: 0.3308333333333333
test_loss: 2.5585979438732864 test accuracy: 0.334
After  7 iterations:
validation loss; 2.591264095009691 validation accuracy: 0.3308333333333333
test_loss: 2.5585978668084146

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁█████████
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▁▁▁▁▁▁▁▁▁
val_accuracy,▁▇████████
val_loss,█▁▁▁▁▁▁▁▁▁

0,1
accuracy,0.334
epoch,9.0
loss,2.5586
val_accuracy,0.33083
val_loss,2.59126


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: syxxvanb with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_inputsize: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_hidden: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


  A = 1 / (1 + np.exp(-Z))
  s = 1 / (1 + np.exp(-self.prevZ))


After  1 iterations:
validation loss; 0.8775090713746748 validation accuracy: 0.6558333333333334
test_loss: 0.8791611120528099 test accuracy: 0.6524
After  2 iterations:
validation loss; 0.7372179492635738 validation accuracy: 0.7165
test_loss: 0.738448112300008 test accuracy: 0.7165
After  3 iterations:
validation loss; 0.6934725833776673 validation accuracy: 0.7311666666666666
test_loss: 0.7003730741635372 test accuracy: 0.7346
After  4 iterations:
validation loss; 0.6585700788892156 validation accuracy: 0.7486666666666667
test_loss: 0.6648315858414233 test accuracy: 0.7431
After  5 iterations:
validation loss; 0.6609154372595782 validation accuracy: 0.7436666666666667
test_loss: 0.6675515938369692 test accuracy: 0.7419
After  6 iterations:
validation loss; 0.6586786604636803 validation accuracy: 0.7468333333333333
test_loss: 0.6651630632187464 test accuracy: 0.7463
After  7 iterations:
validation loss; 0.6278095480333672 validation accuracy: 0.7525
test_loss: 0.6321940512814761 test

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▅▆▆▆▆▇▇██
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▅▄▃▃▃▂▂▁▁
val_accuracy,▁▅▆▇▆▇▇▇██
val_loss,█▅▄▃▃▃▂▂▁▁

0,1
accuracy,0.7727
epoch,9.0
loss,0.5932
val_accuracy,0.7715
val_loss,0.58352


[34m[1mwandb[0m: Agent Starting Run: hkx1a61g with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_inputsize: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_hidden: 4
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


After  1 iterations:
validation loss; 1.2898332264713495 validation accuracy: 0.684
test_loss: 1.2914160766077134 test accuracy: 0.6827
After  2 iterations:
validation loss; 1.231108653053654 validation accuracy: 0.6905
test_loss: 1.2269955426533266 test accuracy: 0.6902
After  3 iterations:
validation loss; 1.2275247080831715 validation accuracy: 0.6903333333333334
test_loss: 1.2234545207283172 test accuracy: 0.6894
After  4 iterations:
validation loss; 1.2273609554447427 validation accuracy: 0.6898333333333333
test_loss: 1.2232498523441235 test accuracy: 0.6899
After  5 iterations:
validation loss; 1.2273500709721876 validation accuracy: 0.6898333333333333
test_loss: 1.223236720022294 test accuracy: 0.6899
After  6 iterations:
validation loss; 1.2273493437909568 validation accuracy: 0.6898333333333333
test_loss: 1.223235845997184 test accuracy: 0.6899
After  7 iterations:
validation loss; 1.22734929530971 validation accuracy: 0.6898333333333333
test_loss: 1.2232357877349647 test accu

In [None]:
#configuration for which Best accuracy is possible (to plot Confusion matrix)
best_config={
    "activation":"relu",
    "batch_size":64,
    "epochs":10,
    "hidden_inputsize":128,
    "learning_rate":1e-03,
    "number_hidden":4,
    "optimizer":"nadam",
    "weight_decay":0.0005,
    "weight_init":"xavier"
}

#generating y prediction to plot confusion matrix
np.random.seed(1)
wandb.init(config = best_config,project = "Fmnist", entity = "iitkgpch")
config = wandb.config
model = NeuralNetwork(layers_size = [784]+[config.hidden_inputsize]*config.number_hidden+[10],epochs = config["epochs"],learning_rate = config.learning_rate,l2_lambda = config.weight_decay,loss='mean_square',activation = config.activation, optimizer = config.optimizer, wtype=config.weight_init )
y_pred = model.fit(x_train,y_train,batch_size=config.batch_size)



VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

After  1 iterations:
validation loss; 0.5392592430586817 validation accuracy: 0.822
test_loss: 0.5642389220738478 test accuracy: 0.814
After  2 iterations:
validation loss; 0.4685713036207118 validation accuracy: 0.848
test_loss: 0.4802781311013714 test accuracy: 0.838
After  3 iterations:
validation loss; 0.458238520467594 validation accuracy: 0.8441666666666666
test_loss: 0.46469568634378366 test accuracy: 0.8458
After  4 iterations:
validation loss; 0.42587087680561125 validation accuracy: 0.8623333333333333
test_loss: 0.4369534224731964 test accuracy: 0.8551
After  5 iterations:
validation loss; 0.4110575861857041 validation accuracy: 0.867
test_loss: 0.4147320587775296 test accuracy: 0.8609
After  6 iterations:
validation loss; 0.4262195697492748 validation accuracy: 0.8645
test_loss: 0.4198452056317647 test accuracy: 0.8642
After  7 iterations:
validation loss; 0.42147501086239464 validation accuracy: 0.8666666666666667
test_loss: 0.4325251989715934 test accuracy: 0.8634
After  8

In [None]:
#plotting Confusion matrix

classes = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat','Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
wandb.log({"conf_mat" : wandb.plot.confusion_matrix(probs=y_pred.T,
                        y_true=y_test,class_names=classes)})

In [None]:
#importing MNIST data to test 3 best possible configurations
from keras.datasets import mnist
((x_train,y_train),(x_test,y_test)) = mnist.load_data()

In [None]:
#initalising Sweep for MNIST data (only 3)
sweep_config = {
    'name':"my-sweep-mnist",
    'method': 'bayes',
    'metric': {
      'name': 'accuracy',
      'goal': 'maximize'   
    },
    
    'parameters': {
        'epochs': {
            'values': [10] #number of epochs
        },
        'number_hidden': {
            'values': [5] #number of hidden layers
        },
        'hidden_inputsize': {
            'values':[64, 128] #size of every hidden layer
        },
        'weight_decay': {
            'values':[0] #L2 regularisation
        },
        'learning_rate': {
            'values': [1e-3] 
        },
        'optimizer': {
            'values': ['adam', 'nadam']
        },
        'batch_size' : {
            'values':[64]
        },
        'weight_init': {
            'values':['xavier']
        },
        'activation': {
            'values': ['relu']
        }
        
        }
}

sweep_id = wandb.sweep(sweep_config, entity="iitkgpch", project="Fmnist")



Create sweep with ID: kpz1umf6
Sweep URL: https://wandb.ai/cs21m053-cs21m071/Assignment%201/sweeps/kpz1umf6


In [None]:
#Writing train function to plot MNIST data accuracies

def train():
  with wandb.init() as run:
    
    config = wandb.config 
    
    wandb.run.name = "hl_" + str(config.hidden_inputsize)+"_bs_"+str(config.batch_size)+"_ac_"+ config.activation
    np.random.seed(1)
    model = NeuralNetwork(layers_size = [784]+[config.hidden_inputsize]*config.number_hidden+[10],epochs = config["epochs"],learning_rate = config.learning_rate,l2_lambda = config.weight_decay,loss='cross_entropy',activation = config.activation, optimizer = config.optimizer, wtype=config.weight_init )
    y_pred = model.fit(x_train,y_train,x_test,y_test,batch_size=config.batch_size)

#Plotting Accuracies for MNIST dataset
wandb.agent(sweep_id,train,entity="iitkgpch", project="Fmnist")

[34m[1mwandb[0m: Agent Starting Run: dgiips3d with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_inputsize: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_hidden: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


After  1 iterations:
validation loss; 0.23740288037763987 validation accuracy: 0.9333333333333333
test_loss: 0.23618098276362565 test accuracy: 0.9331
After  2 iterations:
validation loss; 0.22794069753683807 validation accuracy: 0.9378333333333333
test_loss: 0.22073847113615128 test accuracy: 0.9408
After  3 iterations:
validation loss; 0.1853807347632036 validation accuracy: 0.9508333333333333
test_loss: 0.17693320803346496 test accuracy: 0.952
After  4 iterations:
validation loss; 0.16705336406806354 validation accuracy: 0.9555
test_loss: 0.1556653105576729 test accuracy: 0.9575
After  5 iterations:
validation loss; 0.15992111014597926 validation accuracy: 0.9581666666666667
test_loss: 0.15309428812159254 test accuracy: 0.9586
After  6 iterations:
validation loss; 0.1516566149934731 validation accuracy: 0.9581666666666667
test_loss: 0.1444359982720714 test accuracy: 0.9623
After  7 iterations:
validation loss; 0.14979406264997117 validation accuracy: 0.9618333333333333
test_loss: 0.

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁▃▆▇▇██▇█▆
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▇▃▂▂▁▁▂▂▅
val_accuracy,▁▂▅▆▇▇█▇▇▇
val_loss,█▇▄▂▂▁▁▂▃▄

0,1
accuracy,0.9551
epoch,9.0
loss,0.19943
val_accuracy,0.95683
val_loss,0.19229


[34m[1mwandb[0m: Agent Starting Run: tkt1873h with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_inputsize: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_hidden: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


After  1 iterations:
validation loss; 0.19002230285563307 validation accuracy: 0.9443333333333334
test_loss: 0.19605326307557833 test accuracy: 0.9448
After  2 iterations:
validation loss; 0.15863588540689377 validation accuracy: 0.9546666666666667
test_loss: 0.15706268151242556 test accuracy: 0.9567
After  3 iterations:
validation loss; 0.13732162622634536 validation accuracy: 0.9625
test_loss: 0.12996948821416446 test accuracy: 0.9654
After  4 iterations:
validation loss; 0.13277095797617594 validation accuracy: 0.9655
test_loss: 0.12699583131478842 test accuracy: 0.9671
After  5 iterations:
validation loss; 0.14906416340901651 validation accuracy: 0.9646666666666667
test_loss: 0.13454119622315946 test accuracy: 0.966
After  6 iterations:
validation loss; 0.15059113489527484 validation accuracy: 0.9676666666666667
test_loss: 0.1352147658434312 test accuracy: 0.9694
After  7 iterations:
validation loss; 0.16756078999036525 validation accuracy: 0.968
test_loss: 0.15565867689367546 test

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁▄▇▇▇█▇█▇▇
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▄▁▁▂▂▄▄▆▆
val_accuracy,▁▄▆▇▇██▇▇█
val_loss,█▄▂▁▃▃▅▆█▇

0,1
accuracy,0.9676
epoch,9.0
loss,0.17414
val_accuracy,0.96767
val_loss,0.18176


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: pevw0rkm with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_inputsize: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_hidden: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


After  1 iterations:
validation loss; 0.19002230285563307 validation accuracy: 0.9443333333333334
test_loss: 0.19605326307557833 test accuracy: 0.9448
After  2 iterations:
validation loss; 0.15863588540689377 validation accuracy: 0.9546666666666667
test_loss: 0.15706268151242556 test accuracy: 0.9567
After  3 iterations:
validation loss; 0.13732162622634536 validation accuracy: 0.9625
test_loss: 0.12996948821416446 test accuracy: 0.9654
After  4 iterations:
validation loss; 0.13277095797617594 validation accuracy: 0.9655
test_loss: 0.12699583131478842 test accuracy: 0.9671
After  5 iterations:
validation loss; 0.14906416340901651 validation accuracy: 0.9646666666666667
test_loss: 0.13454119622315946 test accuracy: 0.966
After  6 iterations:
validation loss; 0.15059113489527484 validation accuracy: 0.9676666666666667
test_loss: 0.1352147658434312 test accuracy: 0.9694
After  7 iterations:
validation loss; 0.16756078999036525 validation accuracy: 0.968
test_loss: 0.15565867689367546 test

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁▄▇▇▇█▇█▇▇
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▄▁▁▂▂▄▄▆▆
val_accuracy,▁▄▆▇▇██▇▇█
val_loss,█▄▂▁▃▃▅▆█▇

0,1
accuracy,0.9676
epoch,9.0
loss,0.17414
val_accuracy,0.96767
val_loss,0.18176


[34m[1mwandb[0m: Agent Starting Run: 722sipd4 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_inputsize: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_hidden: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


After  1 iterations:
validation loss; 0.20608849596185302 validation accuracy: 0.9418333333333333
test_loss: 0.22175812807617448 test accuracy: 0.9401
After  2 iterations:
validation loss; 0.15823545444515552 validation accuracy: 0.9573333333333334
test_loss: 0.1753582083128471 test accuracy: 0.9512
After  3 iterations:
validation loss; 0.14342805315026905 validation accuracy: 0.9625
test_loss: 0.15131704646329158 test accuracy: 0.96
After  4 iterations:
validation loss; 0.13549020018086175 validation accuracy: 0.9653333333333334
test_loss: 0.13746949277195425 test accuracy: 0.9653
After  5 iterations:
validation loss; 0.14321058215890112 validation accuracy: 0.9673333333333334
test_loss: 0.14129508118700776 test accuracy: 0.965
After  6 iterations:
validation loss; 0.1472329105308683 validation accuracy: 0.9683333333333334
test_loss: 0.16135315651404009 test accuracy: 0.9661
After  7 iterations:
validation loss; 0.16628226808844046 validation accuracy: 0.9666666666666667
test_loss: 0.

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁▄▆▇▇▇▇███
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▄▂▁▁▃▃▃▃▄
val_accuracy,▁▅▆▇▇▇▇███
val_loss,█▃▂▁▂▂▄▃▃▅

0,1
accuracy,0.9693
epoch,9.0
loss,0.17401
val_accuracy,0.96883
val_loss,0.17297


[34m[1mwandb[0m: Agent Starting Run: sox3olqd with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_inputsize: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_hidden: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


After  1 iterations:
validation loss; 0.24055181688780175 validation accuracy: 0.9353333333333333
test_loss: 0.23498564012932985 test accuracy: 0.9346
After  2 iterations:
validation loss; 0.19038784110630605 validation accuracy: 0.9473333333333334
test_loss: 0.1916571337660786 test accuracy: 0.9456
After  3 iterations:
validation loss; 0.1707222315406849 validation accuracy: 0.9536666666666667
test_loss: 0.16651937350661583 test accuracy: 0.9528
After  4 iterations:
validation loss; 0.16811862205998754 validation accuracy: 0.9563333333333334
test_loss: 0.1603281112262683 test accuracy: 0.9549
After  5 iterations:
validation loss; 0.1594790390703752 validation accuracy: 0.9575
test_loss: 0.1547321876424242 test accuracy: 0.9579
After  6 iterations:
validation loss; 0.17305968628904503 validation accuracy: 0.9543333333333334
test_loss: 0.16472202615630868 test accuracy: 0.9573
After  7 iterations:
validation loss; 0.17211436513515724 validation accuracy: 0.9556666666666667
test_loss: 0.

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁▄▅▆▇▇▇███
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▄▂▁▁▂▁▂▂▂
val_accuracy,▁▄▆▇▇▆▇▇██
val_loss,█▄▂▂▁▂▂▃▃▃

0,1
accuracy,0.963
epoch,9.0
loss,0.16705
val_accuracy,0.95967
val_loss,0.1836


[34m[1mwandb[0m: Agent Starting Run: ihfzse46 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_inputsize: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_hidden: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


After  1 iterations:
validation loss; 0.19002230285563307 validation accuracy: 0.9443333333333334
test_loss: 0.19605326307557833 test accuracy: 0.9448
After  2 iterations:
validation loss; 0.15863588540689377 validation accuracy: 0.9546666666666667
test_loss: 0.15706268151242556 test accuracy: 0.9567
After  3 iterations:
validation loss; 0.13732162622634536 validation accuracy: 0.9625
test_loss: 0.12996948821416446 test accuracy: 0.9654
After  4 iterations:
validation loss; 0.13277095797617594 validation accuracy: 0.9655
test_loss: 0.12699583131478842 test accuracy: 0.9671
After  5 iterations:
validation loss; 0.14906416340901651 validation accuracy: 0.9646666666666667
test_loss: 0.13454119622315946 test accuracy: 0.966
After  6 iterations:
validation loss; 0.15059113489527484 validation accuracy: 0.9676666666666667
test_loss: 0.1352147658434312 test accuracy: 0.9694
After  7 iterations:
validation loss; 0.16756078999036525 validation accuracy: 0.968
test_loss: 0.15565867689367546 test

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁▄▇▇▇█▇█▇▇
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▄▁▁▂▂▄▄▆▆
val_accuracy,▁▄▆▇▇██▇▇█
val_loss,█▄▂▁▃▃▅▆█▇

0,1
accuracy,0.9676
epoch,9.0
loss,0.17414
val_accuracy,0.96767
val_loss,0.18176


[34m[1mwandb[0m: Agent Starting Run: mgr1fm1s with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_inputsize: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_hidden: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


After  1 iterations:
validation loss; 0.19002230285563307 validation accuracy: 0.9443333333333334
test_loss: 0.19605326307557833 test accuracy: 0.9448
After  2 iterations:
validation loss; 0.15863588540689377 validation accuracy: 0.9546666666666667
test_loss: 0.15706268151242556 test accuracy: 0.9567
After  3 iterations:
validation loss; 0.13732162622634536 validation accuracy: 0.9625
test_loss: 0.12996948821416446 test accuracy: 0.9654
After  4 iterations:
validation loss; 0.13277095797617594 validation accuracy: 0.9655
test_loss: 0.12699583131478842 test accuracy: 0.9671


[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.
