In [1]:
import numpy as np

In [2]:
class NeuralNetwork():    
    
     def __init__(self, d,d1,d2,d3): 
        
        """
        d = number of neurons in the input vector/layer
        d1 = number of neurons in the first hidden layer
        d2 = number of neurons in the second hidden layer
        d3 = number of neurons in the third hidden layer
        the network outputs single value
        
        """
        # hyperparameters
        self.d=d     
        self.d1=d1
        self.d2=d2
        self.d3=d3
        # weights
        self.W1=2*np.random.uniform(0,1,(self.d1,self.d))-1
        self.W2=2*np.random.uniform(0,1,(self.d2,self.d1))-1
        self.W3=2*np.random.uniform(0,1,(self.d3,self.d2))-1
        self.W4=2*np.random.uniform(0,1,(1,self.d3))-1
        # biases 
        self.b1=2*np.random.uniform(0,1,(self.d1,1))-1
        self.b2=2*np.random.uniform(0,1,(self.d2,1))-1
        self.b3=2*np.random.uniform(0,1,(self.d3,1))-1
        self.b4=2*np.random.uniform(0,1)-1
        
     def forward_pass (self, minibatch):
        """
        minibatch : each row corresponds to a point, thus for m points the shape should be (m,d) 
        in case of single point the shape should be (1,d)
        The function outputs the forward propagation pass of the network on each minibatch point 
        """
        size = minibatch.shape[0]
        minibatch_values = np.zeros(size)
        
        net_layer1 = self.W1 @ minibatch.T + self.b1
        layer1 = np.maximum(net_layer1,0)
        net_layer2 = self.W2 @ layer1 + self.b2
        layer2 = np.maximum(net_layer2,0)
        net_layer3 = self.W3 @ layer2 + self.b3
        layer3 = np.maximum(net_layer3,0)
        minibatch_values = self.W4 @layer3 + self.b4
        
        net_layers = [net_layer1, net_layer2, net_layer3]
        layers = [layer1, layer2, layer3]
        
        return minibatch_values, net_layers, layers
    
     def backpropagation(self, minibatch, targets):

        size = minibatch.shape[0]
        minibatch_values, net_layers, layers = self.forward_pass(minibatch)
        
        delta4 = 2*(minibatch_values - targets)
        dW4 = delta4 @ layers[2].T /size
        db4 = np.sum(delta4) /size
        
        delta3 = delta4.T @ self.W4 * np.where(net_layers[2]>=0,1,0).T
        dW3 = delta3.T @ layers[1].T / size
        db3 = np.sum(delta3) / size
        
        delta2 = delta3 @ self.W3 * np.where(net_layers[1]>=0,1,0).T
        dW2 = delta2.T @ layers[0].T / size
        db2 = np.sum(delta2) / size 
        
        delta1 = delta2 @ self.W2 * np.where(net_layers[0]>=0,1,0).T
        dW1 = delta1.T @ minibatch / size
        db1 = np.sum(delta1) / size 

        return dW4, dW3, dW2, dW1, db4, db3, db2, db1 
    
     def draw_random_minibatch(self, dataset, minibatch_size):
         j = np.random.choice(dataset.shape[0],minibatch_size,False) 
         random_minibatch =  dataset[j]
         return j, random_minibatch
    
     def sgd(self, x_train, y_train, learning_rate, minibatch_size, iterations) :
        
        for i in range(iterations):
        
            j,B = self.draw_random_minibatch(x_train, minibatch_size)
            
            f_B = self.forward_pass(B)
            
            dW4, dW3, dW2, dW1, db4, db3, db2, db1 = self.backpropagation(B, y_train[j]) 
            
            self.W1 = self.W1 - learning_rate * dW1
            self.W2 = self.W2 - learning_rate * dW2
            self.W3 = self.W3 - learning_rate * dW3
            self.W4 = self.W4 - learning_rate * dW4
            self.b4 = self.b4 - learning_rate * db4
            self.b3 = self.b3 - learning_rate * db3
            self.b2 = self.b2 - learning_rate * db2
            self.b1 = self.b1 - learning_rate * db1