In [None]:
#导入软件包
import numpy as np
import matplotlib.pyplot as plt
from testCases import *
import sklearn
import sklearn.datasets
import sklearn.linear_model
from planar_utils import plot_decision_boundary, sigmoid, load_planar_dataset, load_extra_datasets
%matplotlib inline
np.random.seed(1)

In [None]:
X, Y = load_planar_dataset()  #加载数据
plt.scatter(X[0, :], X[1, :], c = np.squeeze(Y), s = 40, cmap = plt.cm.Spectral)

In [None]:
shape_X = X.shape
shape_Y = Y.shape
m = shape_X[1]
print('The size of X is : {}\nThe size of Y is : {}\nThe number of datasets is : {}'.format(shape_X, shape_Y, m))

In [None]:
clf = sklearn.linear_model.LogisticRegressionCV() #先使用逻辑回归看看
clf.fit(X.T, Y.T)

In [None]:
plot_decision_boundary(lambda x: clf.predict(x), X, np.squeeze(Y))# 画决策边界
plt.title("Logistic Regression")
LR_predictions = clf.predict(X.T) #预测结果
print('The accuracy of LR :{}%'.format(float((np.dot(Y, LR_predictions) + 
                                             np.dot(1 - Y,1 - LR_predictions)) / float(Y.size) * 100)))
#预测正确的：1.预测1正确 2.预测0正确

In [None]:
def init_params(input_size, hidden_layer_size, output_size):
    np.random.seed(2)
    w1 = np.random.randn(hidden_layer_size, input_size)*0.01
    b1 = np.zeros((hidden_layer_size,1))
    w2 = np.random.randn(output_size, hidden_layer_size)*0.01
    b2 = np.zeros((output_size, 1))
    init_params = {"w1":w1, "w2":w2, "b1":b1, "b2":b2}
    return init_params

In [None]:
n_x , n_h , n_y = initialize_parameters_test_case()
parameters = init_params(n_x , n_h , n_y)
print("W1 = " + str(parameters["w1"]))
print("b1 = " + str(parameters["b1"]))
print("W2 = " + str(parameters["w2"]))
print("b2 = " + str(parameters["b2"]))

In [None]:
def sigmoid(z):
    sig = np.zeros_like(z)
    sig = 1/(1 + np.exp(-z))
    return sig

In [None]:
def forwa_propa(input, params):
    """
    定义params = {"w1":w1, "w2":w2, "b1":b1, "b2":b2}
    train_x:每列代表一个数据
    train_y:行向量
    """
    m = input.shape[1]
    w1 = params["w1"]
    w2 = params["w2"]
    b1 = params["b1"]
    b2 = params["b2"]
    
    a0 = input #输入成
    
    # 隐藏层
    z1 = np.dot(w1, a0) + b1 # 用矩阵w1对每个数据进行线性变换，得到第二层输入值
    a1 = np.tanh(z1) #矩阵：第i列是第i个样本在隐藏层的激活值
    # 输出层
    z2 = np.dot(w2, a1) + b2 
    a2 = sigmoid(z2) # 1 x m的行向量（输出层只有一个神经元）
    
    
    cache = {"z1":z1,"a1":a1,"z2":z2, "a2":a2}
    return cache

In [None]:
def calcu_loss(h_x, train_y):
    a2 = h_x
    m = train_y.shape[1]
    loss = -(1/m)*(np.dot(train_y, np.log(a2.T)) + np.dot(1 - train_y, np.log(1 - a2.T)))
    loss = float(np.squeeze(loss))
    return loss

In [None]:
def back_propa(train_x, train_y, a, params, rate):#单次反向传播
    """
    a：向前传播的没错激活值，a = {'a1':a1, 'a2':a2}
    返回一次优化后的参数：params = {"w1":w1, "w2":w2, "b1":b1, "b2":b2}
    """
    #print('--before back propagation :', params['w1'][:, 1:10])
    m = train_y.shape[1]
    a0 = train_x
    a1 = a["a1"] # 1xm
    a2 = a["a2"] # 4xm
    w1 = params["w1"]
    w2 = params["w2"]
    #反向传播计算各层参数的导数矩阵
    delat2 = a2 - train_y  #第二层误差,行向量，每列是每个样本的delat2
    delat1 = np.dot(w2.T, delat2)*(1 - np.power(a1, 2)) # 对每个样本的delat2做Ww2.T的线性变换后...每列代表每个样本的delat1值   
    dw2 = 1/m * np.dot(delat2, a1.T)  # 列乘以行得到参数偏导矩阵，m个相加
    db2 = 1/m * np.sum(delat2, axis = 1, keepdims = True)
    dw1 = 1/m * np.dot(delat1, a0.T)
    db1 = 1/m * np.sum(delat1, axis = 1, keepdims = True)
    assert(dw2.shape == (a2.shape[0], a1.shape[0]))
    
    delat = {"dw2":dw2, "dw1":dw1, "db1":db1, "db2":db2}
    
    params['w1'] = params['w1'] - rate * dw1
    params['b1'] = params['b1'] - rate * db1
    params['w2'] = params['w2'] - rate * dw2
    params['b2'] = params['b2'] - rate * db2
    #print('++after back propagation :', params['w1'][:, 1:10])
    return (params,delat)

In [None]:
print("=========================测试backward_propagation=========================")
parameters, cache, X_assess, Y_assess = backward_propagation_test_case()
params = {'w1':parameters["W1"], "w2":parameters["W2"],"b1":parameters["b1"], "b2":parameters["b2"]}
cache = {"a1":cache["A1"], "a2":cache["A2"]}

_, grads = back_propa(X_assess, Y_assess, cache, params, rate = 0.5)
print ("dW1 = "+ str(grads["dw1"]))
print ("db1 = "+ str(grads["db1"]))
print ("dW2 = "+ str(grads["dw2"]))
print ("db2 = "+ str(grads["db2"]))

In [None]:
def train(train_x, train_y, iterations = 10000, print_loss = False, rate = 0.5, hidden_layer_size = 4):
    """
    输入训练集、学习率、初始化的参数，经过梯度下降得到优化后的参数 
    """
    np.random.seed(3)
    cost = []
    params = init_params(train_x.shape[0], hidden_layer_size, 1)
    print('{}Train start{}'.format('.' * 5, '.' * 5))
    for i in range(iterations):
        #1.正向传播，得到当前更新参数后的loss，要看看在当前参数下的loss，得到当前各层激活值。
        cache = forwa_propa(train_x, params)
        #print('*'*10)
        los = calcu_loss(cache["a2"], train_y)
        if i % 1000 == 0:
            cost.append(los)
            #print('-'*10)
        if print_loss and i % 1000 ==0:
            print('Iteration: {} | loss: {}'.format(i, los))
        #2.gradient descent更新参数
        params,_ = back_propa(train_x, train_y, cache, params, rate)
    #返回最终的参数，和loss（记录训练过程中的loss）
    print('{}Train end{}'.format('.' * 5, '.' * 5))
    return (params, cost)

In [None]:
print("=========================测试nn_model=========================")
X_assess, Y_assess = nn_model_test_case()
print('The shape of X_assess :', X_assess.shape)
np.seterr(divide='ignore', invalid='ignore')


parameters, _ = train(X_assess, Y_assess, iterations=10000, print_loss=False, rate = 0.5, hidden_layer_size = 4)
print("W1 = " + str(parameters["w1"]))
print("b1 = " + str(parameters["b1"]))
print("W2 = " + str(parameters["w2"]))
print("b2 = " + str(parameters["b2"]))


In [None]:
params_train, cost = train(X, Y, iterations = 10000, print_loss = True, rate = 0.5, hidden_layer_size = 4)

In [None]:
def predict(params, X):
    cache = forwa_propa(X, params)
    pred = np.round(cache["a2"])
    return pred


In [None]:
print("=========================测试predict=========================")

parameters, X_assess = predict_test_case()
params_test = {'w1':parameters["W1"], "w2":parameters["W2"],"b1":parameters["b1"], "b2":parameters["b2"]}

predictions = predict(params_test, X_assess)
print("预测的平均值 = " + str(np.mean(predictions)))

In [None]:
plot_decision_boundary(lambda x: predict(params_train, x.T), X, np.squeeze(Y))  
plt.title("Decision Boundary for hidden layer size " + str(4))
predictions = predict(params_train, X)
accua = float((np.dot(Y, predictions.T) + np.dot(1 - Y, 1 - predictions.T))/float(Y.size)) * 100
print("accuracy:{}%".format(accua))

In [None]:
plt.figure(figsize = (16,32))
hidden_size = [1, 2, 3, 4, 5, 20, 50]
for index, num_hidd in enumerate(hidden_size):
    plt.subplot(5, 2, index + 1)
    plt.title('Hidden Layer of size {}'.format(num_hidd))
    params_train, _ = train(X, Y, iterations = 5000, print_loss = False, rate = 0.5, hidden_layer_size = num_hidd)
    plot_decision_boundary(lambda x: predict(params_train, x.T), X, np.squeeze(Y))    
    predictions = predict(params_train, X)
    accuracy = float((np.dot(Y, predictions.T) + np.dot(Y, predictions.T))/float(Y.size) * 100)
    print('the number of hidden layer is {}, the accuracy is {}%'.format(num_hidd, accuracy))