In [1]:
import numpy as np

In [3]:
class DnnOneClass:

    
    ''' create datasets '''

    def create_dataset(self, col_num):
        # create entry dataset X (train or test)
        self.X = np.random.randint(2, size=(2,col_num))

        # create label dataset Y (train or test)
        self.Y = np.sum(X, axis=0, keepdims=True)
        self.Y[self.Y!=1] = 0

        # create noises in the entry dataset X by adding (-0.6, 0.6) to the data
        self.X_noise = np.random.randn(2,col_num)
        self.X_noise = self.X + (self.X_noise / 20)

        return self.X_noise, self.Y


    ''' initialize parameters -- W, b '''
    def initialize_parameters(self, layer_dims):

        self.parameters = {}
        self.L = len(layer_dims)

        # for the first L-1 layers, we use a heristic to initialize weight that is customized to the relu function
        for i in range(1, self.L-1):
            self.parameters[f'W{i}'] = np.random.randn(layer_dims[i], layer_dims[i-1]) * self.relu(None, heuristic=layer_dims[i-1])
            self.parameters[f'b{i}'] = np.zeros((layer_dims[i], 1))

        # for the last layer (L), we use a heristic to initialize weight that is customized to the sigmoid function
        self.parameters[f'W{L-1}'] = np.random.randn(layer_dims[L-1], layer_dims[L-2]) * self.sigmoid(None, heuristic=layer_dims[L-2])
        self.parameters[f'b{L-1}'] = np.zeros((layer_dims[L-1], 1))

        return self.parameters


    ''' define activation function (sigmoid) and its derivative '''
    def sigmoid(self, F, derivative=False, heuristic=False):

        # calculate the derivative of sigmoid
        if derivative:
            return F * (1 - F) # F = A

        # calculate the heuristic to initialize weight that is customized to the sigmoid function 
        if heuristic:
            return np.sqrt(1 / heuristic)

        # calucate the sigmoid function
        else:
            return 1 / (1 + np.exp(-F)) # F = Z

    def relu(self, F, derivative=False, heuristic=False):

        # calculate the derivative of relu
        if derivative:
            return 1 * (F > 0) # F = Z

        # calculate the heuristic to initialize weight that is customized to the relu function
        elif heuristic:
            return np.sqrt(2 / heuristic)

        # calucate the relu function
        else:
            return F * (F > 0) # F = Z


    ''' 1. forward propagation function - calculate pre-activation fn (Z) & activation fn (A) ''' 

    def forward_pass(self, X, parameters, layer_nums):

        self.cache = {}
        self.cache['A0'] = X
        self.L = len(layer_nums)

        for i in range(1, self.L-1):

            # for the first L-1 layers, use relu as an activation function
            self.cache[f'Z{i}'] = np.dot(parameters[f'W{i}'], self.cache[f'A{i-1}']) + parameters[f'b{i}']
            self.cache[f'A{i}'] = self.relu(self.cache[f'Z{i}'])

        # for the last layer L, use sigmoid as an activation function
        self.cache[f'Z{L-1}'] = np.dot(parameters[f'W{L-1}'], self.cache[f'A{L-2}']) + parameters[f'b{L-1}']
        self.cache[f'A{L-1}'] = self.sigmoid(self.cache[f'Z{L-1}'])

        return self.cache


    ''' 2. calculate cost '''
    def cost(self, A, Y):

        self.m = Y.shape[1]
        self.J = - np.sum (Y * np.log(A) + (1 - Y) * np.log(1 - A)) / self.m

        return self.J


    ''' 3. backward propagation fonction - calculate dW & db from dA & dZ'''

    # backward non_linear function to calculate dA & dZ
    def backward_pass(self, cache, parameters, Y, layer_dims):

        self.grads = {}
        self.m = Y.shape[1]
        self.L = len(layer_dims)

        # for last layer, use the derivative of sigmoid, which is simply (A-Y). So no need to call the sigmoid function
        self.dZ = self.cache[f'A{self.L-1}'] - Y
        self.grads[f'dW{L-1}'] = np.dot(self.dZ, self.cache[f'A{L-2}'].T) / self.m
        self.grads[f'db{L-1}'] = np.sum(self.dZ, axis = 1, keepdims = True) / self.m 

        # for L-1 precendent layers, use the derivative of relu
        for i in range(self.L-2, 0, -1):
            self.dA = np.dot(self.parameters[f'W{i+1}'].T, self.dZ)
            self.dZ = self.dA * self.relu(self.cache[f'Z{i}'], derivative=True)
            self.grads[f'dW{i}'] = np.dot(self.dZ, self.cache[f'A{i-1}'].T) / self.m
            self.grads[f'db{i}'] = np.sum(self.dZ, axis = 1, keepdims = True) / self.m

        return self.grads


    ''' 4. update parameters - W & b'''

    # update parameters W & b using the gradients that were calculated from the backward pass
    def update_parameters(self, parameters, grads, learning_rate, layer_dims):

        for i in range(1, len(layer_dims)):
            self.parameters[f'W{i}'] -= learning_rate * self.grads[f'dW{i}']
            self.parameters[f'b{i}'] -= learning_rate * self.grads[f'db{i}']

        return self.parameters

In [5]:
dnn = DnnOneClass()
