In [2]:
# -*- coding: utf-8 -*-
"""
Created on Fri Jan 29 22:27:15 2021

@author: priyojitDas
"""

import numpy as np
import sys
import matplotlib.pyplot as plt
"""
For this entire file there are a few constants:
activation:
0 - linear
1 - logistic (only one supported)
loss:
0 - sum of square errors
1 - binary cross entropy
"""

np.random.seed(12345)

def sigmoid(x):
    # This function applies sigmoid function to the input and returns the output 
    return 1./(1+np.exp(-x))

# A class which represents a single neuron
class Neuron:
    #initilize neuron with activation type, number of inputs, learning rate, and possibly with set weights
    activation_f = None # store activation type of the neuron - linear or logistic (sigmoid); str
    l_rate = None # store learning rate; float
    W = None # weight vector of the neuron; numpy array shape (number_of_nodes_in presvious_layer+1,); +1 for bias node
    n_input = None # store input to the neuron for future use in backpropagation
    n_output = None # store output of the neuron for future use in backpropagation
    dLdW = 0 # dLoss/dW
    dLdb = 0 # dLoss/db
    
    def __init__(self, activation, lr, weights):
        #print('constructor') 
        self.activation_f = activation
        #self.n_dim = input_num
        self.l_rate = lr
        self.W = weights
        
    #This method returns the activation of the net
    def activate(self,net):
        #print('activate')
        if self.activation_f == 'linear':
            # if activation type is linear, return the input to this function
            return net
        elif self.activation_f == 'logistic':
            # if activation type is logistic, apply sigmoid function to the input and return the output
            return 1./(1+np.exp(-net))
        else:
            pass
        
    #Calculate the output of the neuron should save the input and output for back-propagation.   
    def calculate(self,input):
        #print('calculate')
        self.n_input = input
        self.n_output = self.activate(np.sum(self.W[:-1]*input) + self.W[-1])
        return self.n_output

    #This method returns the derivative of the activation function with respect to the net   
    def activationderivative(self):
        #print('activationderivative')
        if self.activation_f == 'linear':
            return 1
        elif self.activation_f == 'logistic':
            return self.n_output*(1-self.n_output)
        else:
            pass
    
    #This method calculates the partial derivative for each weight and returns the delta*w to be used in the previous layer
    def calcpartialderivative(self, wtimesdelta):
        #print('calcpartialderivative')
        delta = wtimesdelta * self.activationderivative() # calculate delta
        self.dLdW += self.n_input * delta  # for each iteration adds up dLdW of all samples
        self.dLdb += delta # for each iteration adds up dLdb of all samples
        return self.W[:-1] * delta # return w*delta
    
    #Simply update the weights using the partial derivatives and the leranring weight
    def updateweight(self):
        #print('updateweight')
        self.W[:-1] -= self.l_rate * self.dLdW # update neuron weights 
        self.W[-1] -= self.l_rate * self.dLdb # update bias weight
        self.dLdW = 0 # set dLdW and dLdb to 0 after weight update
        self.dLdb = 0
        
#A fully connected layer        
class FullyConnected:
    #initialize with the number of neurons in the layer, their activation,the input size, the leraning rate and a 2d matrix of weights (or else initilize randomly)
    n_neuron = None # number of neuron in the layer; integer
    activation_l = None # activation function type for the neurons of the layer; str 
    l_rate = None # learing rate
    W = None # weight matrix; numpy 2D matrix of shape (number_of_nodes_in_current_layer,number_of_nodes_in_previous_layer+1; +1 for bias)
    neurons_ = None # store the neurons; list; length: n_neuron; each item represents a neuron object
    
    def __init__(self, numOfNeurons, activation, lr, weights=None):
        #print('constructor')
        self.n_neuron = numOfNeurons
        self.activation_l = activation
        self.l_rate = lr
        self.W = weights
        
        # creates 'n_neuron' number of neurons and stores them in a list
        self.neurons_ = [Neuron(self.activation_l,self.l_rate,self.W[i]) for i in range(self.n_neuron)]
        
    #calcualte the output of all the neurons in the layer and return a vector with those values (go through the neurons and call the calcualte() method)      
    def calculate(self, input):
        #print('calculate') 
        layer_f = np.array([neuron.calculate(input) for neuron in self.neurons_])
        return layer_f
        
    #given the next layer's w*delta, should run through the neurons calling calcpartialderivative() for each (with the correct value), sum up its ownw*delta, and then update the wieghts (using the updateweight() method). I should return the sum of w*delta.          
    def calcwdeltas(self, wtimesdelta):
        #print('calcwdeltas')
        s_wdelta = 0
        for i,neuron in enumerate(self.neurons_):
            s_wdelta += neuron.calcpartialderivative(wtimesdelta[i])
            #neuron.updateweight()
        return s_wdelta
           
        
#An entire neural network        
class NeuralNetwork:
    #initialize with the number of layers, number of neurons in each layer (vector), input size, activation (for each layer), the loss function, the learning rate and a 3d matrix of weights weights (or else initialize randomly) 
    n_layer = None # number of layers in the neural network (hidden + output); integer
    n_layer_neurons = None # number of neurons in each layer; list
    n_dim = None # dimension of the inputs or number of input nodes; integer 
    activation_t = None # activation function for each of the fully connected layers; list
    loss_t = None # loss function type: sum of squared error or binary cross entropy; str
    l_rate = None # learning rate; float
    W = None
    layers_ = None # stores the neural network layers; list; length: n_layer; each item represents a fully connected layer object
    
    def __init__(self,numOfLayers,numOfNeurons, inputSize, activation, loss, lr, weights=None):
        #print('constructor')
        self.n_layer = numOfLayers
        self.n_layer_neurons = numOfNeurons
        self.n_dim = inputSize
        self.activation_t = activation
        self.loss_t = loss
        self.l_rate = lr
        
        if weights is None:
            # if weights are not supplied, a weight list is created where each of the elements is a numpy 2D matrix of shape (number_of_nodes_in_current_layer,number_of_nodes_in_previous_layer+1; +1 for bias node) associted with a fully connected layer 
            temp_ = [self.n_dim] + self.n_layer_neurons
            #print(temp_)
            self.W = [np.random.normal(0,1*temp_[i]**-0.5,[temp_[i+1],temp_[i]+1]) for i in range(len(temp_)-1)]
            #self.W = [np.random.randn(temp_[i+1],temp_[i]+1) for i in range(len(temp_)-1)]
            #print(self.W)
        else:
            self.W = weights
            
        # creates 'n_layer' number of fully connected layers and stores them in a list  
        self.layers_ = [FullyConnected(self.n_layer_neurons[i],self.activation_t[i],self.l_rate,self.W[i]) for i in range(self.n_layer)]
    
    #Given an input, calculate the output (using the layers calculate() method)
    def calculate(self,input):
        #print('constructor')
        # performs a forward pass through the neural network and return predicted output
        for layer in self.layers_:
            input = layer.calculate(input)
        return input
            
    #Given a predicted output and ground truth output simply return the loss (depending on the loss function)
    def calculateloss(self,yp,y):
        #print('calculate')
        # calculates loss for a sample by comparing predicted output with ground truth after forward pass and return it  
        if self.loss_t == 'sse':
            return 0.5 * np.sum((yp-y) ** 2)
        elif self.loss_t == 'bce':
            return -(y*np.log(yp)+(1-y)*np.log(1-yp))
        else:
            pass
            
    #Given a predicted output and ground truth output simply return the derivative of the loss (depending on the loss function)        
    def lossderiv(self,yp,y):
        if self.loss_t == 'sse':
            return (yp - y)
        elif self.loss_t == 'bce':
            return (yp - y) / (yp - yp * yp)
        else:
            pass
    
    #Given a single input and desired output preform one step of backpropagation (including a forward pass, getting the derivative of the loss, and then calling calcwdeltas for layers with the right values         
    def train(self,x,y,iterations,printWeight=False):
        #print('train')
        error_l = [] # store neural network training error for each iteration
        for it in range(iterations):
            error = 0
            for i in range(x.shape[0]): # pass through each sample
                yp  = self.calculate(x[i]) # perform forward pass and stores the predicted output in yp 
                loss_v = self.calculateloss(yp,y[i]) # calculate loss
                error += loss_v # add the loss for each sample
                wdelta = self.lossderiv(yp,y[i]) # calculate derivative of the loss
                for layer in self.layers_[::-1]: # perform backpropagation 
                    wdelta = layer.calcwdeltas(wdelta)
            for layer in self.layers_[::-1]: # update weight based on backpropagation
                for neuron in layer.neurons_:
                    neuron.updateweight()
            #print("Epoch",it+1,error)
            error_l.append(error/x.shape[0])
        if printWeight:
            for layer in self.layers_[::-1]: # update weight based on backpropagation
                for neuron in layer.neurons_:
                    print(neuron.W)
        print("Final Output:")
        for i in range(x.shape[0]):
            print("Input:",x[i],"Output:",self.calculate(x[i]))
        return error_l

if __name__=="__main__":
    if (len(sys.argv)<2):
        print('a good place to test different parts of your code')
        
    elif (sys.argv[1]=='example'):
        print('run example from class (single step)')
        w = np.array([[[.15,.2,.35],[.25,.3,.35]],[[.4,.45,.6],[.5,.55,.6]]])
        x = np.array([[0.05,0.1]])
        y = np.array([[0.01,0.99]])
        
        nn = NeuralNetwork(numOfLayers=2,numOfNeurons=[2,2],inputSize=2,activation=['logistic','logistic'],loss='sse',lr=0.5,weights=w)
        t_error = nn.train(x,y,iterations=1,printWeight=True)
        
        for lrate in [0.5]:
            print("*" * 50)
            print("Learning Rate: %f" % (lrate))
            nn = NeuralNetwork(numOfLayers=2,numOfNeurons=[2,2],inputSize=2,activation=['logistic','logistic'],loss='sse',lr=lrate,weights=w)
            t_error = nn.train(x,y,iterations=100)
            plt.plot(t_error,label='Learing Rate: %f' % (lrate))
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.show()
        
    elif(sys.argv[1]=='and'):
        print('learn and')
        w = None
        x = np.array([[0,0],[0,1],[1,0],[1,1]])
        y = np.array([[0],[0],[0],[1]])
        
        for lrate in [0.5,0.1,0.05,0.01,0.005]:
            nn = NeuralNetwork(numOfLayers=1,numOfNeurons=[1],inputSize=2,activation=['logistic'],loss='bce',lr=lrate,weights=w)
            print("*" * 50)
            print("Learning Rate: %f" % (lrate))
            t_error = nn.train(x,y,iterations=10000)
            plt.plot(t_error,label='Learing Rate: %f' % (lrate))
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.show()
        
    elif(sys.argv[1]=='xor'):
        print('learn xor')
        w = None
        x = np.array([[0,0],[0,1],[1,0],[1,1]])
        y = np.array([[0],[1],[1],[0]])
        
        print('Single preceptron ......')
        for lrate in [1.0,0.5,0.05,0.005]:
            nn = NeuralNetwork(numOfLayers=1,numOfNeurons=[1],inputSize=2,activation=['logistic'],loss='bce',lr=lrate,weights=w)
            print("*" * 50)
            print("Learning Rate: %f" % (lrate))
            t_error = nn.train(x,y,iterations=100000)
            plt.plot(t_error,label='Learing Rate: %f' % (lrate))
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.show()
        
        print('\n\nANN with two layers (one hidden & one output) ......')
        for lrate in [1.0,0.5,0.05,0.005]:
            nn = NeuralNetwork(numOfLayers=2,numOfNeurons=[2,1],inputSize=2,activation=['logistic','logistic'],loss='bce',lr=lrate,weights=w)
            print("*" * 50)
            print("Learning Rate: %f" % (lrate))
            t_error = nn.train(x,y,iterations=100000)
            plt.plot(t_error,label='Learing Rate: %f' % (lrate))
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.show()
        
        """nn = NeuralNetwork(numOfLayers=3,numOfNeurons=[4,2,1],inputSize=2,activation=['logistic','logistic','logistic'],loss='bce',lr=0.005,weights=w)
        t_error = nn.train(x,y,iterations=100000)
        
        plt.plot(t_error)
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.show()"""



In [3]:
print('run example from class (single step)')
w = np.array([[[.15,.2,.35],[.25,.3,.35]],[[.4,.45,.6],[.5,.55,.6]]])
x = np.array([[0.05,0.1]])
y = np.array([[0.01,0.99]])

nn = NeuralNetwork(numOfLayers=2,numOfNeurons=[2,2],inputSize=2,activation=['logistic','logistic'],loss='sse',lr=0.5,weights=w)
t_error = nn.train(x,y,iterations=1,printWeight=True)

run example from class (single step)
[0.35891648 0.40866619 0.53075072]
[0.51130127 0.56137012 0.61904912]
[0.14978072 0.19956143 0.34561432]
[0.24975114 0.29950229 0.34502287]
Final Output:
Input: [0.05 0.1 ] Output: [0.72844176 0.77837692]


In [75]:
import numpy as np
a = np.array([1,2,3])
b = np.array([[[1,2,3], [4,5,6], [7,8,9]],
            [[1,2,3], [4,5,6], [7,8,9]]])
#c = np.hstack((b, np.ones([b.shape[0],1])))
c = b.flatten()

In [85]:
for i in b.reshape(-1,9).T:
    print(i.reshape(-1,1))

[[1]
 [1]]
[[2]
 [2]]
[[3]
 [3]]
[[4]
 [4]]
[[5]
 [5]]
[[6]
 [6]]
[[7]
 [7]]
[[8]
 [8]]
[[9]
 [9]]
