In [1]:
import numpy as np

def parameter_initializer(n1, n2, n3):
    """ 初始化权值 """
    w1 = np.random.normal(scale=(2.0/n1)**0.5, size=(n1, n2))
    b1 = np.zeros(shape=(1,n2))
    w2 = np.random.normal(scale=(2.0/n2)**0.5, size=(n2, n3))
    b2 = np.zeros(shape=(1, n3))
    return w1, b1, w2, b2

def sigmoid(z):
    a = 1/(1+np.exp(-z))
    return a

def forward_propagate(x, w1, b1, w2, b2):
    """ 构建三层神经网络 -- 前向传播 """
    """ x.shape = (m,n1)
        y.shape = (m, n3)
        w1.shape = (n, n2)
        b1.shape = (1, n2)
        w2.shape = (n2, n3)
        b2.shape = (1, n3)
    """
    z_1 = np.dot(x, w1) + b1
    a_1 = sigmoid(z_1)
    z_2 = np.dot(a_1, w2) + b2
    output = sigmoid(z_2)
    return a_1, output

def loss(output, y):
    """ 损失函数 """
    cross_entropy = -((1-y)*np.log(1-output) + y * np.log(output))
    cost = np.mean(np.sum(cross_entropy, axis=1))
    return cost

def back_propagate(w1,b1, w2, b2, a_1, output, y, learning_rate):
    """ 反向传播 """
    m = y.shape[0]
    dz_2 = output - y    # (m, n3)
    dw2 = 1/m * np.dot(a_1.T, dz_2)  # (n2, n3)
    db2 = 1/m * np.sum(dz_2, axis=0, keepdims=True)

    # 梯度下降
    w2 = w2 - learning_rate * dw2
    b2 = b2 - learning_rate * db2

    dz_1 = np.dot(dz_2, w2.T) * ((1-a_1)*a_1)  # sigmoid(z_1)的导数  # (m, n2)
    dw1 = 1/m * np.dot(x.T, dz_1)  # (n1, n2)
    db1 = 1/m * np.sum(dz_1, axis=0, keepdims=True)  # (1, n2)

    w1 = w1 - learning_rate * dw1
    b1 = b1 - learning_rate * db1

    return w1, b1, w2, b2

def bpnn(x, y, epochs, learning_rate):
    w1,b1,w2,b2 = parameter_initializer(4,3,2)
    for epoch in range(epochs):
        a_1, output = forward_propagate(x, w1, b1, w2, b2)
        cost = loss(output, y)
        print("{} epoch, cost is {}".format(epoch, cost))
        w1, b1, w2, b2 = back_propagate(w1, b1, w2, b2, a_1, output, y, learning_rate)
    return w1,b1,w2,b2


if __name__ == "__main__":

    x = np.arange(0,20).reshape(5,4)
    y = np.array([[1,0],[1,0],[0,1],[0,1],[0,1]], dtype=float).reshape(5,2)

    w1, b1, w2, b2 = bpnn(x, y, 100, 0.1)
    print(w1, b1, w2, b2)

    # 测试效果
    x_test = np.array([[1,3,2,4], [4,5,7,4], [7,9,12,7]])
    output = forward_propagate(x_test, w1, b1, w2, b2)

    print(output)

0 epoch, cost is 1.3453042697920807
1 epoch, cost is 1.3390422067809316
2 epoch, cost is 1.3330951112490328
3 epoch, cost is 1.3274056571474622
4 epoch, cost is 1.3219416925230454
5 epoch, cost is 1.316707771871996
6 epoch, cost is 1.311754388125618
7 epoch, cost is 1.3071727048184474
8 epoch, cost is 1.3030592612261622
9 epoch, cost is 1.2994541331490246
10 epoch, cost is 1.2962990137711112
11 epoch, cost is 1.2934660154253672
12 epoch, cost is 1.2908298355165362
13 epoch, cost is 1.288309073889698
14 epoch, cost is 1.2858610760567741
15 epoch, cost is 1.283463329714454
16 epoch, cost is 1.281101514574881
17 epoch, cost is 1.278764626810349
18 epoch, cost is 1.2764433660622108
19 epoch, cost is 1.2741296097413586
20 epoch, cost is 1.2718161965706702
21 epoch, cost is 1.2694967960032097
22 epoch, cost is 1.267165806586744
23 epoch, cost is 1.2648182693042176
24 epoch, cost is 1.2624497920063846
25 epoch, cost is 1.2600564832961283
26 epoch, cost is 1.2576348947432976
27 epoch, cost is 

In [2]:
import numpy as np
import time
class Cyrus_BP(object):
    """
    layer 为神经网络各层神经元的个数,包括输出层神经元个数,传参形式以列表传入；
    activate:为各层的激活函数，传参形式为字符串或列表，
             若传入一个字符串，则各层激活函数相同，
             若传入一个列表，则列表元素代表各层激活函数
             可传参数有：（1）sigmoid：S型函数
                         （2）tanh：双曲正弦函数
                         （3）relu:max(0,x)函数
                         （4）purline：线性函数
                         （5）softsign：平滑函数

    lr:学习率，默认为0.01
    epoch：最大迭代次数 默认为1e4
    该模型具有的主要方法和属性如下：
    fit(X,Y):模型拟合方法
    predict(X):输出预测方法
    predict_label(X):分类标签输出预测方法
    activate:激活函数列表
    W：权值列表
    
    """
    def __init__(self,layer,**kargs):
        self.layer = np.array(layer).reshape(1,-1)
        if 'activate' in kargs.keys():
            if str(type(kargs["activate"])) == "<class 'str'>":    
                self.activate = [kargs["activate"]]*int(len(layer))
            else:
                self.activate = kargs["activate"]
        else:
            self.activate = ["sigmoid"]*int(len(layer))
        self.diff_activate = []
        if 'lr' in kargs.keys():
            self.lr = kargs["lr"]
        else:
            self.lr = 0.01
        if 'epoch' in kargs.keys():
            self.epoch = kargs["epoch"]
        else:
            self.epoch = int(1e4)
            
        self.X = None
        self.Y = None
        self.W = None
        self.output = []
        self.delta = []
        self.sum_input = []
    # 1、选择激活函数
    def activation_func(self):
        temp_func = []
        for i in range(len(self.activate)):
            if self.activate[i] == "sigmoid":
                temp_func.append(lambda x:1/(1+np.exp(-x)))
                self.diff_activate.append(lambda x:(1/(1+np.exp(-x)))*(1-(1/(1+np.exp(-x)))))
            if self.activate[i] == "tanh":
                temp_func.append(lambda x:(np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x)))
                self.diff_activate.append(lambda x:((-np.exp(x) + np.exp(-x))*(np.exp(x) - np.exp(-x))/(np.exp(x) + np.exp(-x))**2 + 1))
            if self.activate[i] == "softsign":
                temp_func.append(lambda x:x/(1+np.abs(x)))
                self.diff_activate.append(lambda x:1/((1+x/np.abs(x)*x)**2))
            if self.activate[i] == "relu":
                temp_func.append(lambda x:(x+np.abs(x))/(2*np.abs(x))*x)
                self.diff_activate.append(lambda x:(x+np.abs(x))/(2*np.abs(x)))
            if self.activate[i] == "purline":
                temp_func.append(lambda x:x)
                self.diff_activate.append(lambda x:1+x-x)
        self.activate = temp_func
    # 2、权值初始化函数
    def init_w(self):
        self.W = []
        for i in range(self.layer.shape[1]):
            if i == 0:
                w = np.random.random([self.X.shape[1]+1,self.layer[0,i]])*2-1
            else:
                w = np.random.random([self.layer[0,i-1]+1,self.layer[0,i]])*2-1
            self.W.append(w)
     
    # 3、权值调整函数
    def update_w(self):
        # 1 计算各层输出值
        self.output = []
        self.sum_input = []
        for i in range(self.layer.shape[1]):
            if i == 0:
                temp = np.dot(np.hstack((np.ones((self.X.shape[0],1)),self.X)),self.W[i])
                self.sum_input.append(temp)
                self.output.append(self.activate[i](temp))
            else:
                temp = np.dot(np.hstack((np.ones((self.output[i-1].shape[0],1)),self.output[i-1])),self.W[i])
                self.sum_input.append(temp)
                self.output.append(self.activate[i](temp))
        # 2 求每层的学习信号
        self.delta = [0 for i in range(len(self.output))]
        for i in range(len(self.output)):
            if i == 0:
                self.delta [-i-1] = ((self.Y-self.output[-i-1])*self.diff_activate[-i-1](self.sum_input[-i-1]))
            else:
                self.delta [-i-1] = ((self.delta[-i].dot(self.W[-i][1:,:].T))*self.diff_activate[-i-1](self.sum_input[-i-1]))
        # 3 更新权值
        for i in range(len(self.W)):
            if i == 0 :
                self.W[i] += self.lr * np.hstack((np.ones((self.X.shape[0],1)),self.X)).T.dot(self.delta[i])
            else:
                self.W[i] += self.lr * np.hstack((np.ones((self.output[i-1].shape[0],1)),self.output[i-1])).T.dot(self.delta[i])
                            
    def fit(self,X,Y):
        self.X = np.array(X)
        self.Y = np.array(Y)
        # 1 权值初始化
        self.init_w()

        # 2 选择激活函数
        self.activation_func()
        # 3 更新权值
        start_time = time.time()
        for i in range(int(self.epoch)):
            self.update_w()
            end_time = time.time()
            if end_time - start_time >= 5:
                print("Epoch%d:"%(i+1),np.mean(np.square(self.Y-self.output[-1])))
                print("\n")
                start_time = time.time()
    def predict(self,x):
        x = np.array(x)
        result = []
        for i in range(self.layer.shape[1]):
            if i == 0:
                result.append(self.activate[i](np.dot(np.hstack((np.ones((x.shape[0],1)),x)),self.W[i])))
            else:
                result.append(self.activate[i](np.dot(np.hstack((np.ones((result[i-1].shape[0],1)),result[i-1])),self.W[i])))
        return result[-1]
    def predict_label(self,x):
        x = np.array(x)
        result = []
        for i in range(self.layer.shape[1]):
            if i == 0:
                result.append(self.activate[i](np.dot(np.hstack((np.ones((x.shape[0],1)),x)),self.W[i])))
            else:
                result.append(self.activate[i](np.dot(np.hstack((np.ones((result[i-1].shape[0],1)),result[i-1])),self.W[i])))
        result = result[-1]   
        return np.array([result[i].argmax() for i in range(result.shape[0])]).reshape(-1,1)
    
if __name__ == "__main__":
    bp = Cyrus_BP([50,10,3],lr=0.01,epoch = 2e5,activate = ["softsign","softsign","softsign"])
    from sklearn.datasets import load_iris
    from sklearn.metrics import accuracy_score
    data = load_iris()
    X = data["data"]
    Y = data["target"]
    import pandas as pd
    # 用神经网络进行分类时，需把输出先进行独热编码
    Y1 = pd.get_dummies(Y) # 进行独热编码或将期望输出转换为哑变量
    bp.fit(X,Y1)
    Y_pre = bp.predict_label(X)
    print("准确率为：",accuracy_score(Y,Y_pre))

Epoch19529: 0.015481569950039227


Epoch39595: 0.01042822001077979


Epoch59676: 0.008364547147068455


Epoch81002: 0.009030854097557137


Epoch102938: 0.006095252666154289


Epoch124864: 0.005074369524129282


Epoch146792: 0.004543823103674674


Epoch168694: 0.004324672258388802


Epoch190560: 0.004249159627839161


准确率为： 0.9933333333333333
