In [None]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.io
import math
import sklearn
import sklearn.datasets
import opt_utils
import testCase
%matplotlib inline
plt.rcParams['figure.figsize'] = (7.0, 4.0)
plt.rcParams['image.interpolation'] = 'nearnest'
plt.rcParams['image.cmap'] = 'gray'

In [None]:
def update_parameters_with_gd(parameters, grads, learning_rate):
    num_layers = len(parameters) // 2
    for i in range(num_layers):
        parameters["W" + str(i + 1)] = parameters["W" + str(i + 1)] - learning_rate * grads["dW" + str(i + 1)]
        parameters["b" + str(i + 1)] = parameters["b" + str(i + 1)] - learning_rate * grads["db" + str(i + 1)]
    
    return parameters
        

In [None]:
# #测试update_parameters_with_gd
# print("-------------测试update_parameters_with_gd-------------")
# parameters , grads , learning_rate = testCase.update_parameters_with_gd_test_case()
# parameters = update_parameters_with_gd(parameters,grads,learning_rate)
# print("W1 = " + str(parameters["W1"]))
# print("b1 = " + str(parameters["b1"]))
# print("W2 = " + str(parameters["W2"]))
# print("b2 = " + str(parameters["b2"]))

In [None]:
def random_mini_batches(X, Y, mini_batches_size = 64, seed = 0):
    np.random.seed(seed)
    mini_batches = []
    m = X.shape[1]
    
    permutation = list(np.random.permutation(m))  # 把0到m-1的自然数打乱，放入list
    X = X[:, permutation] # 列按照打乱来重新排
    Y = Y[:, permutation]
    
    num_batch = math.floor(m / mini_batch_size)
    for i in range(num_batch):
        # 取第i + 1个batch
        X_batch_i = X[:, i * mini_batches_size:(i + 1) * mini_batches_size]
        Y_batch_i = Y[:, i * mini_batches_size:(i + 1) * mini_batches_size]
        # 把第i + 1个batch放到列表里
        mini_batches.append((X_batch_i, Y_batch_i))
        
    if m % mini_batches_size != 0:
        X_batch_last = X[:, num_batch * mini_batches_size:]
        Y_batch_last = Y[:, num_batch * mini_batches_size:]
        
        mini_batches.append((X_batch_last, Y_batch_last))  
    
    return mini_batches

In [None]:
# #测试random_mini_batches
# print("-------------测试random_mini_batches-------------")
# X_assess,Y_assess,mini_batch_size = testCase.random_mini_batches_test_case()
# mini_batches = random_mini_batches(X_assess,Y_assess,mini_batch_size)

# print("第1个mini_batch_X 的维度为：",mini_batches[0][0].shape)
# print("第1个mini_batch_Y 的维度为：",mini_batches[0][1].shape)
# print("第2个mini_batch_X 的维度为：",mini_batches[1][0].shape)
# print("第2个mini_batch_Y 的维度为：",mini_batches[1][1].shape)
# print("第3个mini_batch_X 的维度为：",mini_batches[2][0].shape)
# print("第3个mini_batch_Y 的维度为：",mini_batches[2][1].shape)

In [None]:
def initialize_velocity(parameters):
    num_layers = len(parameters) // 2
    v = {}
    for i in range(num_layers):
        v["dW" + str(i + 1)] = np.zeros_like(parameters["W" + str(i + 1)])
        v["db" + str(i + 1)] = np.zeros_like(parameters["b" + str(i + 1)])
    
    return v

In [None]:
# #测试initialize_velocity
# print("-------------测试initialize_velocity-------------")
# parameters = testCase.initialize_velocity_test_case()
# v = initialize_velocity(parameters)

# print('v["dW1"] = ' + str(v["dW1"]))
# print('v["db1"] = ' + str(v["db1"]))
# print('v["dW2"] = ' + str(v["dW2"]))
# print('v["db2"] = ' + str(v["db2"]))

In [None]:
def update_parameters_with_momentun(parameters,grads,v,beta,learning_rate):
    """
    更新一次每层参数，用动量梯度下降
    调用次函数j次，就更新参数j次，v其实是另一种梯度下降方向，普通下降方向的就是当前参数W的梯度，现在是考虑了之前所有的梯度不仅仅是当前的
    """
    num_layers = len(parameters)//2
    for i in range(num_layers):
        # 更新一次第i + 1层动量及其参数
        v["dW" + str(i + 1)] = beta * v["dW" + str(i + 1)] + (1 - beta) * grads["dW" + str(i + 1)]
        v["db" + str(i + 1)] = beta * v["db" + str(i + 1)] + (1 - beta) * grads["db" + str(i + 1)]
        parameters["W" + str(i + 1)] = parameters["W" + str(i + 1)] - learning_rate * v["dW" + str(i + 1)]
        parameters["b" + str(i + 1)] = parameters["b" + str(i + 1)] - learning_rate * v["db" + str(i + 1)]
    
    return parameters,v

In [None]:
def initialize_adam(parameters):

    L = len(parameters) // 2
    v = {}
    s = {}

    for l in range(L):
        v["dW" + str(l + 1)] = np.zeros_like(parameters["W" + str(l + 1)])
        v["db" + str(l + 1)] = np.zeros_like(parameters["b" + str(l + 1)])

        s["dW" + str(l + 1)] = np.zeros_like(parameters["W" + str(l + 1)])
        s["db" + str(l + 1)] = np.zeros_like(parameters["b" + str(l + 1)])

    return (v,s)


In [None]:
#测试initialize_adam
print("-------------测试initialize_adam-------------")
parameters = testCase.initialize_adam_test_case()
v,s = initialize_adam(parameters)

print('v["dW1"] = ' + str(v["dW1"])) 
print('v["db1"] = ' + str(v["db1"])) 
print('v["dW2"] = ' + str(v["dW2"])) 
print('v["db2"] = ' + str(v["db2"])) 
print('s["dW1"] = ' + str(s["dW1"])) 
print('s["db1"] = ' + str(s["db1"])) 
print('s["dW2"] = ' + str(s["dW2"])) 
print('s["db2"] = ' + str(s["db2"])) 