In [1]:
import numpy as np
import random
import math
import csv
from sklearn.model_selection import train_test_split  # 只是用来划分数据集和测试集

In [2]:
def sigmoid(x):     # 激活函数
    return 1/(1+math.pow(math.e,-x))

def MSL(y1,y2):     # 均方误差，对应平方和
    return sum(np.power(y1-y2,2))*0.5

In [3]:
# 读入数据
def read_deal(path,layout):
    file_reader = csv.reader(open(path,"r"))
    data=[]
    for i in file_reader:
        data.append(i)
    name=data[0]
    del(data[0])
    data=np.asarray(data).astype(np.float)
    
    length=len(data)
    xlen=len(data[0])-1
    x=data[:,:xlen]
    y_true=data[:,xlen]
    y=np.zeros((length,layout),dtype=float)
    for i in range(length):
        y[i,int(y_true[i])]=1
    return name,length,x,y

In [8]:
# 开始训练
def train(length,layin,layhide,layout,x,y,train_cnt):
    n=0.1   # 学习率
    v=np.random.random((layin,layhide))   # 输入到隐层的激活系数
    u=np.random.random(layhide)         # 隐层阈值
    w=np.random.random((layhide,layout)) # 隐层到输出的激活系数
    o=np.random.random(layout)         # 输出层阈值
    for c in range(train_cnt):
        for i in range(length):
            a=np.dot(x[i],v)            # 和u组成隐藏层输入
            b=np.zeros(layhide)         # 得到隐藏层输出          
            for j in range(layhide):    
                b[j]=sigmoid(a[j]-u[j])
            beta=np.dot(b,w)            # 和o组成输出层输入
            out=np.zeros(layout)        # 输出层输出
            for j in range(layout):
                out[j]=sigmoid(beta[j]-o[j])
            g=out*(1-out)*(y[i]-out)
            # 计算Δw
            w_del=np.zeros((layhide,layout))
            for j in range(layhide):
                w_del[j]=n*b[j]*g
            # 计算Δθ
            o_del=-n*g
            # 计算Δv
            v_del=np.zeros((layin,layhide),dtype=float)
            # 计算Δγ
            u_del=np.zeros(layhide)
            e=np.zeros(layhide)
            for h in range(layhide):
                e[h]=b[h]*(1-b[h])*(sum(w[h]*g))

            for j in range(layin):
                v_del[j]=n*x[i,j]*e
            u_del=-n*e

            w=w+w_del   # 隐藏层到输出层的系数更新
            o=o+o_del   # 输出层输入偏差量的更新
            v=v+v_del   # 输入层到隐藏层的系数更新
            u=u+u_del   # 隐藏层输入偏差量的更新
    
    return w,o,v,u

        

In [9]:
# 鸢尾花数据集，输入6属性，输出3属性(1,0,0)代表第一类
layin=13
layhide=20
layout=3

# 得到对应输入x和输出y矩阵
name,length,x,y=read_deal("wine.csv",layout)
# print(x,y)
train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.3)
train_x = np.array(train_x)
train_y = np.array(train_y)
test_x = np.array(test_x)
test_y = np.array(test_y)
test_len=len(test_x)
train_len=length-test_len
print(train_len,test_len)
# 得到系数矩阵和偏移量
w,o,v,u=train(train_len,layin,layhide,layout,train_x,train_y,100)
# print(w,o);print(v,u)
cnt=0
for i in range (test_len):
    a=np.dot(test_x[i],v)       # 和u组成隐藏层输入
    b=np.zeros(layhide)         # 得到隐藏层输出          
    for j in range(layhide):    
        b[j]=sigmoid(a[j]-u[j])
    beta=np.dot(b,w)            # 和o组成输出层输入
    out=np.zeros(layout)        # 输出层输出
    for j in range(layout):
        out[j]=sigmoid(beta[j]-o[j])

    print(a-u,out)
    if(np.argmax(out)==np.argmax(test_y[i])):
        cnt+=1

print("正确率:%f" %float(cnt/test_len))

124 54
[518.1399536  139.2758092  233.78838401 514.61864277 178.33460171
 517.56209918 437.18250422 147.18048261 323.24187931 142.59458373
 207.89678057 195.77901469 129.4978104   76.98225702 311.47287009
 307.33232598 286.59433675 239.16391585  87.51395603 113.57556027] [0.99999129 0.99996076 0.1749976 ]
[839.81065556 204.28157499 335.2410497  850.48313306 257.2585164
 874.79100248 742.85831269 177.36611664 502.309639   182.8071334
 285.59754347 309.14304176 176.8152881  103.82005117 512.5936422
 450.54748597 433.17014142 388.07502606 103.49282589 154.76965329] [0.99999129 0.99996076 0.1749976 ]
[512.4036196  135.94138171 242.5681258  502.41005273 179.51857986
 508.87337851 428.40339874 155.7498356  316.71508216 146.81793363
 205.62239885 192.59186002 133.10404035  75.49038809 299.72883888
 305.50943165 287.83523282 237.25296531  96.60770462 106.00802576] [0.99999129 0.99996076 0.1749976 ]
[ 976.59540803  231.2415002   377.52474916  993.13225638  290.7580649
 1029.45011513  875.093753