<a href="https://colab.research.google.com/github/vasid99/cs6910-dl/blob/main/Assignment01/Assignment01.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install wandb

In [3]:
# imports
import numpy as np

In [21]:
# function constants
ACTIVATION_SIGMOID   = 0
ACTIVATION_SOFTMAX   = 1
ACTIVATION_THRESHOLD = 2
ACTIVATION_RELU      = 3

LOSS_SQERROR         = 0
LOSS_CROSSENTROPY    = 1

GDOPT_NONE           = 0
GDOPT_MOMENTUM       = 1
GDOPT_NESTEROV       = 2
GDOPT_ADAGRAD        = 3
GDOPT_RMSPROP        = 4
GDOPT_ADAM           = 5
GDOPT_NADAM          = 6

# math functions class
class neuralNetworkMathFunctions:
  """
  Helper class to handle mathematical operations of neural network passes, specifically activations and loss calculation
  """
  def __init__(self):
    """
    Initialize math helper class for a neural network. Hyperparameter object can be the same as given to neural network
    """
    # Hyperparameters are initialized separately by the parent neural network

  def setHyperparameters(self,hp):
    """
    Set mathematical hyperparameters of neural network
    """
    self.activations = hp["activations"]
    self.lossFn = hp["lossFn"]

  def activation(self,layerNum,x):
    """
    Compute and return activation values for a given layer and its sum values
    """
    layerNum -= 1 # index adjustment
    if self.activations[layerNum]==ACTIVATION_SIGMOID:
      return 1/(1+np.exp(-x))
    elif self.activations[layerNum]==ACTIVATION_SOFTMAX:
      z = np.exp(x)
      return z/np.sum(z)
    elif self.activations[layerNum]==ACTIVATION_THRESHOLD:
      return (x>=0)+0
    elif self.activations[layerNum]==ACTIVATION_RELU:
      return np.maximum(x,0)
  
  def activationDerivative(self,layerNum,**kwargs):
    """
    Compute and return activation derivative values for a given layer and its sum or output values depending on the given argument
    """
    assert ( len(kwargs.keys())==1 and np.any([_ in kwargs.keys() for _ in ["x","y"]]) ), "activationDerivative argument malformed. \
    Use activationDerivative(layerNum,x=x_val) or activationDerivative(layerNum,y=y_val)"
    layerNum -= 1 # index adjustment
    
    if "y" in kwargs.keys():
      y = kwargs["y"]
      if self.activations[layerNum]==ACTIVATION_SIGMOID:
        return y*(1-y)
      elif self.activations[layerNum]==ACTIVATION_SOFTMAX:
        return y*(1-y)
      elif self.activations[layerNum]==ACTIVATION_THRESHOLD:
        return y*(1-y)
      elif self.activations[layerNum]==ACTIVATION_RELU:
        return (y>=0)+0
    else:
      x = kwargs["x"]
      if self.activations[layerNum]==ACTIVATION_SIGMOID:
        return np.exp(-x)/(1+np.exp(-x))**2
      elif self.activations[layerNum]==ACTIVATION_SOFTMAX:
        z = np.exp(x)
        s = np.sum(z)
        return z*(s-z)/(s**2)
      elif self.activations[layerNum]==ACTIVATION_THRESHOLD:
        return np.exp(-x)/(1+np.exp(-x))**2
      elif self.activations[layerNum]==ACTIVATION_RELU:
        return (x>=0)+0
  
  def lossOutputDerivative(self,outputData,targetData):
    """
    Compute and return loss derivatives for given output and target data
    """
    if self.lossFn==LOSS_SQERROR:
      return outputData-targetData
    elif self.lossFn==LOSS_CROSSENTROPY:
      return targetData * np.log2(outputData) ## fix function

class neuralNetwork:
  """
  Class for a neural network made up of multiple layers of perceptrons
  """
  def __init__(self,hyperparams):
    """
    Initialize parameters and hyperparameters of neural network
    """
    # create empty math functions object
    self.fns = neuralNetworkMathFunctions()

    # assign basic hyperparameters to neural network and math functions object
    self.hyperparams = {}
    self.setHyperparameters(hyperparams)

    # initialize the weight and bias matrices of the NN
    self.initModel(hyperparams)
  
  def setHyperparameters(self,hp):
    """
    Set hyperparameters of neural network
    """
    # change values of only the hyperparameters specified in the input variable
    self.hyperparams.update(hp)
    
    # use member variables for commonly used hyperparameters
    self.layerSizes       = self.hyperparams["layerSizes"]
    self.batchSize        = self.hyperparams["batchSize"]
    self.learningRate     = self.hyperparams["learningRate"]
    self.epochs           = self.hyperparams["epochs"]
    self.numLayers        = len(self.layerSizes) - 1
    
    # set math functions object hyperparameters
    assert len(self.hyperparams["activations"])==self.numLayers, "number of layers (%d) and number of activations (%d) don't match"%(self.numLayers,len(hp["activations"]))
    self.fns.setHyperparameters(self.hyperparams)

  def initModel(self,hyperparams):
    """
    Initialize parameters (weight and bias matrices) of neural network
    """
    # checking bounds arg
    bounds = (0,1)
    if "initWeightBounds" in hyperparams.keys():
      assert len(hyperparams["initWeightBounds"])==2, "bounds arg has to be a list/tuple of 2 numbers"
      bounds = hyperparams["initWeightBounds"]

    # create list of weight matrices and bias vectors
    # the goal is to make the indexing same as that in lecture derivation, hence the dummy values
    self.wmat = [np.array([1],ndmin=2)]
    self.bias = [np.array([1],ndmin=2)]
    
    # create random initial parameters and append them to the above initialized lists
    for i in range(1,self.numLayers+1):
      self.wmat.append((bounds[1]-bounds[0])*np.random.rand(self.layerSizes[i],self.layerSizes[i-1])+bounds[0])
      self.bias.append((bounds[1]-bounds[0])*np.random.rand(self.layerSizes[i],1)+bounds[0])
  
  def forwardPass(self, inputData):
    """
    Compute output activations of all layers of neural network
    Data can also be given as sets of datapoints (dimensions being layer dimension x dataset size)
    """
    #                                              # --- PSEUDOCODE ---
    h     = inputData                              # h[0] = x
    hData = [h]                                    #
    datasetSize = np.shape(inputData)[1]           #
    #                                              #
    for i in range(1,self.numLayers+1):            # for i from 1 to L:
      a   = self.wmat[i] @ h + self.bias[i]        #     a[i] = w[i] @ h[i-1] + b[i]
      h   = self.fns.activation(i,a)               #     h[i] = f(a[i])
      hData.append(h)
    
    return hData
  
  def backwardPass(self, layerwiseOutputData, targetData):
    """
    Compute weight and bias gradients for all layers of neural network
    Data can also be given as sets of datapoints (dimensions being layer dimension x dataset size)
    """
    #                                                                                        # --- PSEUDOCODE ---
    lossData    = self.fns.lossOutputDerivative(layerwiseOutputData[-1], targetData)         # loss_derivative = d(loss)/dh[L]
    Delta       = lossData                                                                   # Delta[L] = loss_derivative
    datasetSize = np.shape(targetData)[1]                                                    #
    biasInputs  = np.array(np.ones(datasetSize),ndmin=2).T                                   #
    gradW       = []                                                                         #
    gradB       = []                                                                         #
    #                                                                                        #
    for iFwd in range(self.numLayers):                                                       # for i from L to 1:
      i            = self.numLayers - iFwd                                                   #     // index correction
      stocBiasCorr = self.fns.activationDerivative(i,y=layerwiseOutputData[i]) * Delta       #     stochastic_bias_corrections = f'(a[i]) * Delta[i]
      gW           = stocBiasCorr @ layerwiseOutputData[i-1].T                               #     grad(W[i]) = stochastic_bias_corrections x (h[i-1]).T
      gB           = stocBiasCorr @ biasInputs                                               #     grad(b[i]) = sum(stochastic_bias_corrections)
      Delta        = self.wmat[i].T @ stocBiasCorr                                           #     Delta[i-1] = W[i] x stochastic_bias_corrections
      
      gradW.append(gW)
      gradB.append(gB)
    
    # dummy element and order handling
    gradW.append(np.array([0],ndmin=2))
    gradW.reverse()
    gradB.append(np.array([0],ndmin=2))
    gradB.reverse()
    
    return (gradW,gradB)
  
  def infer(self,inputData,**kwargs):
    """
    Perform inference on input dataset using the neural network
    Unless colwiseData=True is given as an argument, data will be interpreted as being dataset size x layer dimension
    """
    # resolving input dimensions
    inputData  = np.array(inputData,ndmin=2)
    if "colwiseData" in kwargs and kwargs["colwiseData"]==True:
      pass
    else:
      inputData  = inputData.T
    assert np.shape(inputData)[0]==self.layerSizes[0], "size of input datapoint differs from size of input vector given as hyperparameter"
    
    # perform forward pass and return last-layer outputs
    return self.forwardPass(inputData)[-1]

  def train(self, inputData, targetData, **kwargs):
    """
    Train the network on the given input and target datasets
    Unless colwiseData=True is given as an argument, data will be interpreted as being dataset size x layer dimension
    """
    # resolving input and target dimensions
    inputData  = np.array(inputData,ndmin=2)
    targetData = np.array(targetData,ndmin=2)
    if "colwiseData" in kwargs and kwargs["colwiseData"]==True:
      pass
    else:
      inputData  = inputData.T
      targetData = targetData.T
    assert np.shape(inputData)[1]==np.shape(targetData)[1], "input and target datasets have different dataset sizes"
    assert np.shape(inputData)[0]==self.layerSizes[0], "size of input datapoint differs from size of input vector given as hyperparameter"
    assert np.shape(targetData)[0]==self.layerSizes[-1], "size of target datapoint differs from size of target vector given as hyperparameter"
    datasetSize = np.shape(targetData)[1]

    # calculate batch parameters
    batchSize = datasetSize if self.batchSize==-1 else self.batchSize
    numBatches = int(np.ceil(datasetSize / batchSize))

    # run training loop
    for epoch in range(self.epochs):
      for batchIndex in range(numBatches):
        # create data batches
        startIndex  = batchSize * batchIndex
        endIndex    = min(startIndex + batchSize, datasetSize)
        inputBatch  = inputData[:,startIndex:endIndex]
        targetBatch = targetData[:,startIndex:endIndex]
        
        # perform forward and backward passes to compute gradients
        layerwiseOutputData = self.forwardPass(inputBatch)
        (gradW, gradB)      = self.backwardPass(layerwiseOutputData,targetBatch)
        
        # perform parameter update
        for i in range(1,self.numLayers+1):
          self.wmat[i] += -self.learningRate * gradW[i]
          self.bias[i] += -self.learningRate * gradB[i]

In [22]:
hyp = {
    "layerSizes": [2,4,1],
    "batchSize": 1,
    "learningRate": 1,
    "epochs": 500,
    "activations": [ACTIVATION_SIGMOID, ACTIVATION_SIGMOID],
    "lossFn": LOSS_SQERROR,
    "initWeightBounds": (-0.1,0.1)
}

x = neuralNetwork(hyp)
inp = np.array([[1,0.5],[-0.5,0.25],[1,2]])
tar = np.array([[0.5],[0.75],[0.67]])
print("Target data:")
print(tar.T)
print("Output before training:")
print(x.infer(inp))
print("Performing training now")
x.train(inp,tar)
print("Output after training for %d epochs with learning rate of %.2f:"%(x.epochs,x.learningRate))
print(x.infer(inp))

Target data:
[[0.5  0.75 0.67]]
Output before training:
[[0.46542051 0.46491969 0.46388329]]
Performing training now
Output after training for 500 epochs with learning rate of 1.00:
[[0.52145658 0.74345998 0.66017379]]


In [23]:
x.setHyperparameters({
    "learningRate":0.2,
    "epochs":5000,
    "activations":[ACTIVATION_RELU, ACTIVATION_RELU]
    # square error loss will work as Hamming distance in this case
})
inp = np.array([[0,0],[0,1],[1,0],[1,1],[0,0],[0,1],[1,0],[1,1]])
tar = np.array([[0],[0],[0],[1],[0],[0],[0],[1]])
print("Target data:")
print(tar.T)
print("Output before training:")
print(x.infer(inp))
print("Performing training now")
x.train(inp,tar)
print("Output after training for %d epochs with learning rate of %.2f:"%(x.epochs,x.learningRate))
print(x.infer(inp))

Target data:
[[0 0 0 1 0 0 0 1]]
Output before training:
[[0.         0.59381962 0.         0.         0.         0.59381962
  0.         0.        ]]
Performing training now
Output after training for 5000 epochs with learning rate of 0.20:
[[0.00000000e+00 0.00000000e+00 1.11022302e-16 1.00000000e+00
  0.00000000e+00 0.00000000e+00 1.11022302e-16 1.00000000e+00]]
