<font color=#FF0000 size=4 face="黑体">Package import</font>

In [7]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.io
import math
import sklearn
import sklearn.datasets
import Ipynb_importer # a package that enables us to import function from other notebooks

<font color=#FF0000 size=4 face="黑体">Deep neural network package include</font>

In [8]:
import Deep_Neural_Network

<font color=#FF0000 size=4 face="黑体">Gradient descent</font>

In [None]:
def update_parameters_with_gd(parameters, grads, learning_rate):
    L = len(parameters)
    for l in range(L):
        parameters["W" + str(l + 1)] = parameters["W" + str(l + 1)] - learning_rate * grads['dW' + str(l + 1)]
        parameters["b" + str(l + 1)] = parameters["b" + str(l + 1)] - learning_rate * grads['db' + str(l + 1)]
    return parameters

<font color=#FF0000 size=4 face="黑体">Mini-batch gradient descent</font>

In [None]:
def random_mini_batches(X, Y, mini_batch_size = 64):
    """
    shuffle and partition
    Returns:
    mini_batches -- lis of synchronous (mini_batch_X, mini_batch_Y)
    """
    np.random.random()
    m = X.shape[1]
    mini_batches = []
    
    # shuffle
    permutation = list(np.random.permutation(m))
    shuffled_X = X[:, permutation]
    shuffled_Y = Y[:, permutation]
    
    # Partition
    num_complete_minibatches = math.floor(m/mini_batch_size)
    
    for k in range(0, num_complete_minibatches):
        mini_batch_X = shuffled_X[:, k * mini_batch_size : (k + 1) * mini_batch_size]
        mini_batch_Y = shuffled_Y[:, k * mini_batch_size : (k + 1) * mini_batch_size]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)
        
    if m % mini_batch_size != 0: # handling with the end case
        mini_batch_X = shuffled_X[:, num_complete_minibatches * mini_batch_size : m]
        mini_batch_Y = shuffled_Y[:, num_complete_minibatches * mini_batch_size : m]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)
    
    return mini_batches

<font color=#FF0000 size=4 face="黑体">Momentum</font>

In [None]:
# initialize velocity
def initialize_velocity(parameters):
    """
    Returns:
    v -- python dictionary containing the current velocity
    """
    L = len(parameters)
    v = {}
    
    for l in range(L):
        v["dW" + str(l + 1)] = np.zeros(parameters["W" + str(l + 1)].shape)
        v["db" + str(l + 1)] = np.zeros(parameters["b" + str(l + 1)].shape)
    
    return v

# update parameters with momentum
def update_parameters_with_momentum(parameters, grads, v, beta, learning_rate):
    """
    Returns:
    parameters -- python dictionary containing updated parameters
    v -- python dictionary containing updated velocity
    """
    L = len(parameters)
    
    for l in range(L):
        v["dW" + str(l + 1)] = beta * v["dW" + str(l + 1)] + (1 - beta) * grads["dW" + str(l + 1)]
        v["db" + str(l + 1)] = beta * v["db" + str(l + 1)] + (1 - beta) * grads["db" + str(l + 1)]
        
        parameters["W" + str(l + 1)] = parameters["W" + str(l + 1)] - learning_rate * v["dW" + str(l + 1)]
        parameters["b" + str(l + 1)] = parameters["b" + str(l + 1)] - learning_rate * v["db" + str(l + 1)]
        
    return parameters, v

<font color=#FF0000 size=4 face="黑体">Adam</font>

In [None]:
# initialize Adam
def initialize_adam(parameters):
    L = len(parameters)
    v = {}
    s = {}
    
    for l in range(L):
        v["dW" + str(l + 1)] = np.zeros(parameters["W" + str(l + 1)].shape)
        v["db" + str(l + 1)] = np.zeros(parameters["b" + str(l + 1)].shape)
        s["dW" + str(l + 1)] = np.zeros(parameters["W" + str(l + 1)].shape)
        s["db" + str(l + 1)] = np.zeros(parameters["b" + str(l + 1)].shape)
        
    return v, s

# update parameters with Adam
def update_parameters_with_adam(parameters, grads, v, s, t, learning_rate = 0.01, beta1 = 0.9, beta2 = 0.999, epsilon = 1e-8):
    """
    Returns:
    parameters -- python dictionary containing updated parameters
    v -- Adam variable, moving average of the first gradient, python dictionary
    s -- Adam variable, moving average of the first gradient, python dictionary
    """
    L = len(parameters)//2 #number of layers in the neural networks
    v_corrected = {}
    s_corrected = {}
    
    for l in range(L):
        v["dW" + str(l + 1)] = beta1 * v["dW" + str(l + 1)] + (1 - beta1) * grads["dW" + str(l + 1)]
        v["db" + str(l + 1)] = beta1 * v["db" + str(l + 1)] + (1 - beta1) * grads["db" + str(l + 1)]
        
        v_corrected["dW" + str(l + 1)] = v["dW" + str(l + 1)]/(1 - np.power(beta1, t))
        v_corrected["db" + str(l + 1)] = v["db" + str(l + 1)]/(1 - np.power(beta1, t))
        
        s["dW" + str(l + 1)] = beta2 * s["dW" + str(l + 1)] + (1 - beta2) * np.multiply(grads["dW" + str(l + 1)], grads["dW" + str(l + 1)])
        s["db" + str(l + 1)] = beta2 * s["db" + str(l + 1)] + (1 - beta2) * np.multiply(grads["db" + str(l + 1)], grads["db" + str(l + 1)])
        
        s_corrected["dW" + str(l + 1)] = s["dW" + str(l + 1)]/(1 - np.power(beta2, t))
        s_corrected["db" + str(l + 1)] = s["db" + str(l + 1)]/(1 - np.power(beta2, t))
        
        parameters["W" + str(l + 1)] = parameters["W" + str(l + 1)] - learning_rate * v_corrected["dW" + str(l + 1)]/(np.sqrt(s_corrected["dW" + str(l + 1)]) + epsilon)
        parameters["b" + str(l + 1)] = parameters["b" + str(l + 1)] - learning_rate * v_corrected["db" + str(l + 1)]/(np.sqrt(s_corrected["db" + str(l + 1)]) + epsilon)
        
        return parameters, v, s

<font color=#FF0000 size=4 face="黑体">Model with different optimization algorithms</font>

In [None]:


def model(X, Y, layers_dims, optimizer, learning_rate = 0.0007, mini_batch_size = 64, beta = 0.9,
         beta1 = 0.9, beta2  = 0.999, epsilon = 1e-8, num_epochs = 10000, print_cost = True):
    """
    num_epochs -- number of epochs
    
    Returns:
    parameters -- python dictionary containing updated parameters
    """
    L = len(layer_dims)
    costs = []
    t = 0
    
    parameters = Deep_Neural_Network.initialize_parameters(layer_dims)
    
    if optimizer == "gd":
        pass # no initialization required for gradient descent
    elif optimizer == "momentum":
        v = initialize_velocity(parameters)
    elif optimizer == "adam":
        v, s = initialize_adam(parameters)
    
    for i in range(num_epochs):
        minibatches = random_mini_batches(X, Y, mini_batch_size)
        (minibatch_X, minibatch_Y) = minibatch
        
        aL, caches = Deep_Neural_Network.L_model_forward(minibatch_X, parameters)
        
        cost = Deep_Neural_Network.compute_cost(aL, minibatch_Y)
        
        grads = Deep_Neural_Network.L_model_backward(aL, minibatch_Y, caches)
        
        if optimizer == "gd":
            parameters = update_parameters_with_gd(parameters, grads, learning_rate)
        elif optimizer == "momentum":
            parameters = update_parameters_with_momentum(parameters, grads, v, beta, learning_rate)
        elif optimizer == "adam":
            parameters = update_parameters_with_adam(parameters, grads, v, s, t, learning_rate, beta1, beta2, epsilon)
            
    if print_cost and i % 1000 == 0:
        print("Cost after epoch %i: %f" %(i, cost))
    if print_cost and i % 100 == 0:
        costs.append(cost)
    
    # plot
    plt.plot(np.squeeze(costs))
    plt.ylabel('cost')
    plt.xlabel('iterations (per tens)')
    plt.title("Learning rate" + str(learning_rate))
    plt.show()
    
    return parameters