In [5]:
import numpy as np

class Optimizer:
    def __init__(self, learning_rate=0.01):
        self.learning_rate = learning_rate
    
    def update(self, params, grads):
        raise NotImplementedError

In [None]:

'''
ANN code from the last chapter
adding ________
'''

class MyNeuralNetwork:

    '''
    num_layer: number of layer including input layer
    layer_size: units of hidden layer

    weights and bias are hidden layers
    '''
    def __init__(self, layer_size: list, L2: bool = False, output_activation: str = "sigmoid", lambd = "0.1"):
        self.num_layer = len(layer_size)
        self.layer_size = layer_size
        self.output_activation = output_activation
        self.L2 = L2
        self.lambd = lambd

        self.weights = []
        self.bias = []

        # initializing weights and bias
        for i in range(len(layer_size)-1):
            self.weights.append(np.random.rand(layer_size[i+1], layer_size[i]) * 0.01)
            self.bias.append(np.zeros((layer_size[i+1], 1)))


    # forward propagation
    def forward(self, a0):
        a = a0
        self.z_value = []
        self.a_value = [a0]
        
        for idx, (w,b) in enumerate(zip(self.weights, self.bias)):
            z = np.dot(w, a) + b
            if idx == len(self.weights) - 1:  # use sigmoid for output layer
                if self.output_activation == "sigmoid":
                    a = self.sigmoid(z)
                elif self.output_activation == "softmax":
                    a = self.softmax(z)
            else:  # hidden layers use ReLU function
                a = self.ReLU(z)
            self.z_value.append(z)
            self.a_value.append(a)
            
        return a
    

    # backward propagation
    def backward(self, y):
        m = y.shape[1]
        y_hat = self.a_value[-1]
        L = self.num_layer - 1
        dw_value = [0] * L
        db_value = [0] * L

        if self.output_activation == "sigmoid":
            dz = self.a_value[len(self.a_value)-1] - y  # dL/dz
        elif self.output_activation == "softmax":
            dz = y_hat - y
        a_prev = self.a_value[-2]
        dw_value[L-1] = (1/m) * np.dot(dz, a_prev.T)  # the last index of dw_value
        db_value[L-1] = (1/m) * np.sum(dz, axis=1, keepdims=True)
        
        for i in reversed(range(L-1)): # start from L-2
            a_prev = self.a_value[i] # get a from previous layer. a has a[0] initially, so we can use i directly
            da = np.dot(self.weights[i+1].T, dz)
            dz = self.ReLU_backward(da, self.z_value[i])
            dw_value[i] = (1/m) * np.dot(dz, a_prev.T)
            db_value[i] = (1/m) * np.sum(dz, axis=1, keepdims=True)

        return dw_value, db_value
    
    # update w and b
    def update_params(self, dw_value, db_value, alpha=0.01):
        for i in range(len(dw_value)):
            self.weights[i] -= alpha*dw_value[i]
            self.bias[i] -= alpha*db_value[i]
    

    # activation derivatives
    def sigmoid_backward(self, da, z):
        return da * self.sigmoid(z) * (1-self.sigmoid(z))
    
    def ReLU_backward(self, da, z):
        dz = np.array(da, copy=True)
        dz[z <= 0] = 0
        return dz
    
    # cost function
    def compute_cost(self, y_hat, y):
        eps = 1e-8
        m = y.shape[1]
        if self.output_activation == "sigmoid":
            cost = -(1/m) * np.sum(y*np.log(y_hat + eps) + (1-y)*np.log(1-y_hat + eps))
        elif self.output_activation == "softmax":
            cost = -(1/m) * np.sum(y*np.log(y_hat + eps))

        ## L2 regularization
        if self.L2:
            norm = 0
            for w in self.weights:
                norm += np.sum(w**2)
            cost += self.lambd/(2*m)*norm
        return cost

    # activation functions
    def ReLU(self, x):
        return np.maximum(0, x)
    
    def sigmoid(self, x):
        return 1/(1+np.exp(-x))
    
    def softmax(self, z):
        t = np.exp(z)
        a = t/np.sum(t)
        return a
        
        
    