<font color=#FF0000 size=4 face="黑体">Package import</font>

In [None]:
import time
import numpy as np
import h5py
import matplotlib.pyplot as plt
import scipy
from scipy import ndimage

<font color=#FF0000 size=4 face="黑体">Predefine Functions</font>

In [None]:
def sigmoid(z):
    s = 1/(1 + np.exp(-z))
    return s, z

In [None]:
def relu(z):
    s = np.maximum(z, 0)
    return s, z

In [None]:
def sigmoid_backward(dA, activation_cache):
    z = activation_cache
    s = sigmoid(z) * (1 - sigmoid(z))
    return s

In [None]:
def relu_backward(dA, activation_cache):
    s = activation_cache > 0 ? 1 : 0
    return s

<font color=#FF0000 size=4 face="黑体">Initialization L-layer Neural Network</font>

In [None]:
def initialize_parameters(layer_dims):
    """
    layer_dims -- python array (list) containing the dimensions of each layer in our network
    """
    np.random.random()
    parameters == {}
    L = len(layer_dims)
    
    for l in range(1, L):
        parameters['W', str(l)] = np.random.randn(layer_dims[l], layer_dims[l - 1]) * 0.01
        parameters['b', str(l)] = np.zeros((layer_dims[l], 1))
        
        assert(parameters['W' + str(l)].shape == (layer_dims[l], layer_dims[l - 1]))
        assert(parameters['b' + str(l)].shape == (layer_dims[l], 1))
    
    return parameters

<font color=#FF0000 size=4 face="黑体">Linear Forward</font>

In [None]:
def linear_forward(A, W, b):
    Z = np.dot(W, A) + b
    
    assert(Z.shape == (W.shape[0], A.shape[1]))
    cache = (A, W, b)
    
    return Z, cache

<font color=#FF0000 size=4 face="黑体">Linear-Activation Forward</font>

In [None]:
def linear_activation_forward(A_prev, W, b, activation):
    """
    activation -- the activation to be used in this layer, stored as a text string "sigmoid" or "relu"
    """
    if activation == "sigmoid":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = sigmoid(Z)
    
    elif activation == "relu":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = relu(Z)
        
    assert (A.shape == (W.shape[0], A_prev.shape[1]))
    cache = (linear_cache, activation_cache)
    
    return A, cache

<font color=#FF0000 size=4 face="黑体">L-layer Model</font>

In [None]:
def L_model_forward(X, parameters):
    """
    AL -- last post-activation value
    caches -- list of caches containing:
                every cache of linear_relu_forward() indexed from 0 to L-2
                every cache of linear_sigmoid_forward() indexed L-1
    """
    caches = []
    A = X
    L = len(parameters)//2
    
    for l in range(1, L):
        A_prev = A
        W = parameters["W" + str(l)]
        b = parameters["b" + str(l)]
        A, cache = linear_activation_forward(A_prev, W, b, activation = "relu")
        caches.append(cache)
    
    W = parameters["W" + str(L)]
    b = parameters["b" + str(L)]
    AL, cache = linear_activation_forward(A, W, b, activation = "sigmoid")
    caches.append(cache)
    
    assert(AL.shape == (1, X.shape[1]))
    
    return AL, caches

<font color=#FF0000 size=4 face="黑体">Cost function</font>

In [None]:
def compute_cost(AL, Y):
    m = Y.shape[1]
    
    cost = (-1/m)*np.sum(np.multiply(Y, np.log(AL)) + np.multiply(1 - Y, np.log(1 - AL)))
    cost = np.squeeze(cost)
    assert(cost.shape = ())
    
    return cost

<font color=#FF0000 size=4 face="黑体">Back propagation module</font>

In [None]:
# Linear backward
def linear_backward(dZ, cache):
    A_prev, W, b = cache
    m = A_prev.shape[1]
    
    dW = (1/m) * np.dot(dZ, A_prev.T)
    db = (1/m) * np.sum(dZ, axis = 1, keepdims = True)
    dA_prev = np.dot(W.T, dZ)
    
    assert (dA_prev.shape == A_prev.shape)
    assert (dW.shape == W.shape)
    assert (db.shape == b.shape)
    
    return dA_prev, dW, db

# Linear-Activation backward
def linear_activation_backward(dA, cache, activation):
    """
    cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently
    activation -- the activation to be used in this layer, stored as a text string "sigmoid" or "relu"
    """
    linear_cache, activation_cache = cache
    
    if activation == "relu":
        dZ = relu_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    
    elif activation == "sigmoid":
        dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    
    return dA_prev, dW, db

# L-layer Model Backward
def L_model_backward(AL, Y, caches):
     """
     AL -- probability vector output of the forward propagation
     
     Returns:
     grads -- dictionary with the gradients
     """
    grads = {}
    L = len(caches)
    m = AL.shape[1]
    Y = Y.reshape(AL.shape)
    
    # Initial the backpropagation
    dAL  = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL)) #derrivative
    
    # Lth layer (sigmoid -> linear) gradients
    current_cache = caches[L - 1]
    grads["dA" + str(L)], grads["dW" + str(L)], grads["db" + str(L)] = linear_activation_backward(dAL, current_cache, activation = "sigmoid")
    
    # for loop
    for l in reversed(range( L - 1 )):
        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA" + str(l + 2)], current_cache, activation = "relu")
        grads["dA" + str(l + 1)] = dA_prev_temp
        grads["dW" + str(l + 1)] = dW_temp
        grads["db" + str(l + 1)] = db_temp
    
    return grads

<font color=#FF0000 size=4 face="黑体">Update parameters</font>

In [None]:
def update_parameters(parameters, grads, learning_rate):
    """
    Returns:
    parameters -- python dictionary containing your updated parameters
    """
    L = len(parameters)
    
    for l in range(L):
        parameters["W" + str(l + 1)] = parameters["W" + str(l + 1)] - learning_rate * grads["dW" + str(l + 1)]
        parameters["b" + str(l + 1)] = parameters["b" + str(l + 1)] - learning_rate * grads["db" + str(l + 1)]
    
    return parameters

<font color=#FF0000 size=4 face="黑体">L-layer neural network</font>

In [None]:
# dims -- the number of layers

layer_dims = None

def L_layer_model(X, Y, layer_dims, learning_rate = 0.0075, num_iterations = 3000, print_cost = False):
    costs = []
    
    parameters = initialize_parameters_deep(layer_dims)
    
    # loop for iterations
    for i in range(0, num_iterations):
        AL, caches = L_model_forward(X, parameters)
        cost = compute_cost(AL, Y)
        grads = L_model_backward(AL, Y, caches)
        parameters = update_parameters(parameters, grads, learning_rate)
        
        if print_cost and i % 100 == 0:
            print("Cost after iteration %i: %f" %(i, cost))
        if print_cost and i % 100 == 0:
            costs.append(cost)
    
    # plot
    plt.plot(np.squeeze(costs))
    plt.ylabel('cost')
    plt.xlabel('iterations (per tens)')
    plt.title("Learning rate" + str(learning_rate))
    plt.show()
    
    return parameters