# L-layer network. 

* We will work on classifying cat pictures with L layers of neural network 
* First, we will write some function that will help make the complete model 
* Broadly, we will follow the steps below: 
    - Initialize parameters for all layers 
    - Forward propagation for Linear -> ReLu layers, i.e. from layer 1 to laye L-1
    - Forward propagation for Linear -> Sigmoid layers, i.e. for layer L.
    - Compute cost function(cross entropy loss)
    - Backward propagation for Linear -> Sigmoid layer L.
    - Backward propagation for Linear -> ReLu layers, i.e. from L-1 to 1.
    - we update parameter at each step during this process 

 * We will be using following function for this work 
     - Initilaize_parameters 
     - L_model_forward 
     - Compute_cost 
     - L_model_backward 
     - Update_parameters 
     - sigmoid 
     - Relu 
     - Sigmoid_backward 
     - ReLu_backward

In [1]:
# imports 
import numpy as np
import matplotlib.pyplot as plt
import h5py

In [2]:
# Activation functions for different layers 

## Relu and its back prop implementation 

def relu(Z):
    """
    Implement ReLu activation fucntion 
    
    Argument: 
    Z : array of matrix 
    
    Return : 
    A : a rectified linear unit output. 
    Activation_cache : cached value of Z in a dictionary, that will be used during back propagation 
    """
    A = np.maximum(0, Z) 
    
    assert(A.shape == Z.shape)
    
    cache = Z
    return A, cache

def relu_backward(dA, cache):
    """
    Implement relu backward for single unit 
    
    Arguments: 
    dA : 
    cache : 
    
    Returns: 
    dZ : drivative of the relu function 
    """
    
    Z = cache 
    dZ = np.array(dA, copy=True)
    
    # for relu, we only need to take care of the less than zero terms and set them to zero 
    dZ[Z<=0] = 0
    assert(dZ.shape == Z.shape)
    
    return dZ

# sigmoid and its backprop implementation

def sigmoid(Z): 
    """
    Implement sigmoid function on array. 
    
    Argument: 
    Z : weighted input array before activation 
    
    Returns: 
    A : activation value A after application of sigmoid. 
    cache : value of Z, cached for effective calculation during back propagation
    """
    
    A = 1/(1+np.exp(-Z))
    cache = Z
    
    return A, cache

def sigmoid_backward(dA, cache): 
    """
    Implement derivative of sigmoid. 
    
    Arguments: 
    A : post active gradient of any shape 
    Cache : value Z cached during sigmoid calculation
    
    Returns : 
    Z : derivative of sigmoid, here it will return derivative of Cost.
    """
    
    Z = cache 
    sigmoid = 1/ (1 + np.exp(-Z))
    dZ = dA*sigmoid*(1-sigmoid)
    
    assert(dZ.shape == Z.shape)
    
    return dZ



In [4]:
    
# a function to load data, 
# credit: deeplearnin.ai module 1 week 4 assignments 

def load_data():
    train_dataset = h5py.File('train_catvnoncat.h5', "r")
    train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features
    train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels

    test_dataset = h5py.File('test_catvnoncat.h5', "r")
    test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features
    test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels

    classes = np.array(test_dataset["list_classes"][:]) # the list of classes
    
    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
    
    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes

In [7]:
# Parameter initialization 

def initialize_parameters(layer_dims): 
    """
    Initialize the parameters for each layer of L-layer network 
    
    Argument: 
    layer_dims : it has all the dimension needed for inintializing the parameters 
    
    Returns: 
    parameters: a dictionary of dw and db after initialization
    """
    
    # to make the parameters same on random initialization.
    np.random.seed(1)
    # the dictionary which will be returned.
    parameters = {}
    # number of layers in the network is denoted by L
    L = len(layer_dims)
    
    for l in range(1, L):
        # W.shape == (#hidden units, #output from last layer)
        # two different ways of initilization
        parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) / np.sqrt(layer_dims[l-1])
        parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1])*0.01
        
        # b.shape == (#hidden units, 1)
        parameters['b' + str(l)] = np.zeros((layer_dims[l], 1))
        
        # double checking 
        assert(parameters['W' + str(l)].shape == (layer_dims[l], layer_dims[l-1]))
        assert(parameters['b' + str(l)].shape == (layer_dims[l], 1))
        
    return parameters 
    
    

In [None]:
## Linear forward and linear activation forward functions for implementing forward propagation, 
## this function will be used insdie the L_model_forward for calculating value from 1 to L layers.

def linear_forward(A, W, b): 
    """
    Implement linear part during forward propagation 
    
    Arguments: 
    A : activations from previous layer (or input data): (size of previous layer, number of examples)
    W : weights matrix: numpy array of shape (size of current layer, size of previous layer)
    b : bias vector, numpy array of shape (size of the current layer, 1)
    
    Returns : 
    Z : the input of the activation function, also called pre-activation parameter 
    cache : a python dictionary containing "A", "W" and "b" 
    """
    Z = W.dot(A) + b
    
    assert(Z.shape == (W.shape[0], A.shape[1]))
    cache = (A, W, b)
    
    return Z, cache

def linear_activation_forward(A_prev, W, b, activation):
    """
    
    """
    
    
    
    

In [None]:
## L-model forward function 

def L_model_forward(X, parameters):
    """
    Implement forward propagation for L-layer model 
    
    Arguments : 
    X : import images, flattened. 
    parameters : randomly initialized parameters. 
    
    Returns: 
    AL : final layer output 
    caches : caches from linear_acitvation_layer, which has Z values as caches.
    """
    
    
    
