## BP网络二分类

In [1]:

from pandas import read_csv
from sklearn.preprocessing import MinMaxScaler
from sklearn.cross_validation import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import SGD
from matplotlib import pyplot
import keras
import os

def scale(train, test):#对数据进行规格化，使得分布在[-1,1]内,shape 不变
	# fit scaler
    scaler = MinMaxScaler(feature_range=(-1, 1))
    scaler = scaler.fit(train)
    # transform train
    train = train.reshape(train.shape[0], train.shape[1])
    train_scaled = scaler.transform(train)
	# transform test
	test = test.reshape(test.shape[0], test.shape[1])
	test_scaled = scaler.transform(test)
	return scaler, train_scaled, test_scaled

DataFrame = read_csv('dataset.data', header=None, index_col= False)#pandas.DataFrame
raw_values = DataFrame.values#pandas.ndarray

x = raw_values[:, 0:3]#所有列，0~2号特征为X输入
y = keras.utils.to_categorical((raw_values[:, 3]), num_classes=2)#所有列，0号为标签，一共有2个类，本来是数字标类号的变成正交向量标类了
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)#其中20%的数据用于验证集，42是随机种子
scaler, train_x_scaled, test_x_scaled = scale(x_train, x_test)#对数据预处理

Using TensorFlow backend.


## 层次模型建立

In [2]:
modelfile = 'modelweight.model' #神经网络权重保存
model = Sequential()  
model.add(Dense(units=10, activation='sigmoid', input_dim=3))#第一层10个神经元，用sigmoid激活函数，输入维度3
model.add(Dropout(0.2))#droupout 层
model.add(Dense(units=2, input_dim=10,activation='softmax'))#分类，用softmax做，输出为2d矢量
model.summary()#打印出模型概述信息
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)#随机梯度下降做优化
model.compile(loss='binary_crossentropy', #二分类用binary交叉熵
              optimizer=sgd,
              metrics=['accuracy'])

if os.path.exists(modelfile):#如果存在之前训练的权重矩阵，载入模型
     model.load_weights(modelfile)
else:#否则训练
    hist=model.fit(train_x_scaled, y_train,#开始训练
              epochs=200,
              batch_size=4,validation_data=(test_x_scaled, y_test))
    model.save_weights(modelfile) #保存模型权重
        





_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 10)                40        
_________________________________________________________________
dropout_1 (Dropout)          (None, 10)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 22        
Total params: 62
Trainable params: 62
Non-trainable params: 0
_________________________________________________________________


In [3]:
score = model.evaluate(test_x_scaled, y_test, batch_size=4)#这里做评估，返回误差值和评估标准
print(model.metrics_names)
print(score)#打印误差值和评估标准值


['loss', 'acc']
[0.29477190630415739, 0.89830508575601098]


In [4]:
"""y_test是样本测试集，y_test_是测试集经过模型后的输出，数据结构都为n*2的ndarray
    两者相乘，如果一个样本点的label为[1 ,0]，经过模型的输出为[0.9 ,0]相乘就得到[0.9 , 0],该随机矢量无穷范数为0.9，大于阈值0.5，预测正确
    如果一个样本点的label为[1 ,0]，经过模型的输出为[0 ,0.9]相乘就得到[0 , 0],该随机矢量无穷范数为0，小于阈值，预测错误
"""
y_test_=model.predict(x_test)
y_test_acc=abs(y_test_* y_test)

acc=0
for i in range(y_test_acc.shape[0]):#行数
   
    if y_test_acc[i][0]>0.5 or y_test_acc[i][1]>0.5:#无穷范数
        acc+=1
print("自测 acc = ",acc/y_test_acc.shape[0])
#这里使用模型，顺便测试误差值，结果和上面评估完全一样


自测 acc =  0.1864406779661017
