In [154]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np

In [155]:
class Ann(torch.nn.Module):
    def __init__(self,input_size,hidden_size,output_size,activator,hidden_layer_size=0):
        super().__init__()
        self.input_layer=torch.nn.Linear(input_size,hidden_size)
        self.output_layer=torch.nn.Linear(hidden_size,output_size)
        self.activator=activator
        self.hidden_layers=[]
        for i in range(hidden_layer_size):
            self.hidden_layers.append(torch.nn.Linear(hidden_size,hidden_size).to(torch.double))
        
    def forward(self,x):
        # x=self.fc1(x)
        x=self.input_layer(x)
        x=self.activator(x)
        for i,layer in enumerate(self.hidden_layers):
            x=layer(x)
            x=self.activator(x)
        x=self.output_layer(x)
        x=torch.sigmoid(x)
        return x    
        
        

数据载入

In [156]:
features_row=pd.read_csv('./ex4x.dat',header=None,sep=r'\s+')
labels_row=pd.read_csv('./ex4y.dat',header=None,sep=r'\s+')

数据读取

In [157]:
features=np.column_stack((features_row[0].values.tolist(),features_row[1].values.tolist()))
labels=np.array(labels_row[0].values.tolist()).reshape(-1,1)
print('{},{}'.format(features.shape,labels.shape))

(80, 2),(80, 1)


K倍交叉验证切分

In [158]:
def K_fold_split(data,k,test_data_index):
    data_copy=np.array(data)
    splited_datas=np.array_split(data_copy,k)
    test_data=splited_datas[test_data_index]
    train_data=[]
    for i in range(k):
        if i==test_data_index:
            continue
        train_data.extend(splited_datas[i])
    
    train_data=np.array(train_data).reshape(-1,data.shape[1])
    return test_data,train_data

In [159]:
def normalize_data(data):
    res = np.array(data.T)
    means = np.mean(res, axis=1)
    stds = np.std(res, axis=1)
    for i in range(res.shape[0]):
        res[i] = (res[i] - means[i]) / stds[i]
    res = res.T
    return res, means, stds

In [160]:
def train(model,max_epoch,lr,features_train_data,labels_train_data):
    optimizer=torch.optim.SGD(model.parameters(),lr=lr)
    losses=[]
    for epoch in range(max_epoch):
        optimizer.zero_grad()
        output=model.forward(features_train_data)
        loss=nn.CrossEntropyLoss()(output.flatten(),labels_train_data.flatten())
        losses.append(loss)
        loss.backward()
        optimizer.step()
    
    return losses

In [161]:
def classify(data,threshold=0.5):
    res=torch.tensor(data)
    for i in range(res.shape[0]):
        if res[i][0]>=threshold:
            res[i][0]=1
        else:
            res[i][0]=0
    return res

In [162]:
def normalize_test_data(test_data,means,stds):
    res=np.array(test_data.T)
    
    for i in range(res.shape[0]):
        res[i]=(res[i]-means[i])/stds[i]
    
    res=res.T
    return res

In [163]:
def accuracy(prediction,labels_test_data):
    equal_count=0
    total=prediction.shape[0]
    for i in range(prediction.shape[0]):
        if prediction[i][0]==labels_test_data[i][0]:
            equal_count+=1
    
    return equal_count/total

控制隐藏层层数为1，测试相同迭代次数下,不同神经元个数的模型表现

In [None]:
hidden_sizes=[1,2,3,4,5,6,7,8,16,32,64,128]
input_size=2
output_size=1
average_accuracy_history=[]
for i,hidden_size in enumerate(hidden_sizes):
    accuracy_history=[]
    losses_history=[]
    for index in range(5):
        # print("-----------------------{}----------------------".format(index))
        # print("features:{}".format(features))
        features_test_data,features_train_data=K_fold_split(features,5,index) # 训练集与测试集的切分
        # print("features_train_data:{}".format(features_train_data))
        labels_test_data,labels_train_data=K_fold_split(labels,5,index)
        # print("labels_train_data:{}".format(labels_train_data))
        features_train_norm,means,stds=normalize_data(features_train_data) # 训练集标准化
        features_train_norm=torch.from_numpy(features_train_norm)
        labels_train_data=torch.from_numpy(labels_train_data)
        # 开始训练
        model=Ann(input_size,hidden_size,output_size,torch.nn.functional.sigmoid).to(torch.double)
        max_epoch=2000
        losses=train(model,max_epoch,0.1,features_train_norm,labels_train_data)
        # losses_history.append(losses)
        #开始测试
        features_test_data_norm=normalize_test_data(features_test_data,means,stds) # 测试集标准化
        features_test_data_norm=torch.from_numpy(features_test_data_norm)
        y_pred=model.forward(features_test_data_norm)
        y_pred=classify(y_pred)
        acc=accuracy(y_pred,labels_test_data)
        accuracy_history.append(acc)
    
    average_accuracy=np.mean(accuracy_history)
    average_accuracy_history.append({"{}".format(hidden_size):"{}".format(average_accuracy)})

In [165]:
average_accuracy_history

[{'1': '0.7625'},
 {'2': '0.75'},
 {'3': '0.7375'},
 {'4': '0.7375'},
 {'5': '0.75'},
 {'6': '0.75'},
 {'7': '0.75'},
 {'8': '0.75'},
 {'16': '0.75'},
 {'32': '0.7625'},
 {'64': '0.75'},
 {'128': '0.75'}]

以上是对应不同神经元个数的平均预测准确率，发现并不是神经元越多就有更好的预测效果，1和2的数量反而更适合这组数据

选取神经元个数为2，试用不同的中间层激活函数进行训练

In [None]:
hidden_size=2
input_size=2
output_size=1
max_epoch=2000
average_accuracy_history=[]
import torch.nn.functional as fn
activators=[fn.sigmoid,fn.tanh,fn.relu,fn.leaky_relu,fn.elu]
for i,activator in enumerate(activators):
    accuracy_history=[]
    losses_history=[]
    for index in range(5):
        # 数据准备
        features_test_data,features_train_data=K_fold_split(features,5,index) # 训练集与测试集的切分
        labels_test_data,labels_train_data=K_fold_split(labels,5,index)
        features_train_norm,means,stds=normalize_data(features_train_data) # 训练集标准化
        features_train_norm=torch.from_numpy(features_train_norm)
        labels_train_data=torch.from_numpy(labels_train_data)
        # 开始训练
        model=Ann(input_size,hidden_size,output_size,activator,1).to(torch.double)
        losses=train(model,max_epoch,0.1,features_train_norm,labels_train_data)
        #开始测试
        features_test_data_norm=normalize_test_data(features_test_data,means,stds) # 测试集标准化
        features_test_data_norm=torch.from_numpy(features_test_data_norm)
        y_pred=model.forward(features_test_data_norm)
        y_pred=classify(y_pred)
        acc=accuracy(y_pred,labels_test_data)
        accuracy_history.append(acc)
        
    average_accuracy=np.mean(accuracy_history)
    average_accuracy_history.append({"{}".format(activator.__name__):"{}".format(average_accuracy)})
    

In [167]:
average_accuracy_history

[{'sigmoid': '0.775'},
 {'tanh': '0.7375'},
 {'relu': '0.7125'},
 {'leaky_relu': '0.7375'},
 {'elu': '0.7375'}]

通过实验可以发现使用relu或leaky_relu作为隐藏层激活函数有更好的效果

In [None]:
hidden_size=2
input_size=2
output_size=1
max_epoch=2000
hidden_layer_size=[1,2,3,4,8,16]
average_accuracy_history=[]

import torch.nn.functional as fn
activator=fn.relu

for i,layer_size in enumerate(hidden_layer_size):
    accuracy_history=[]
    losses_history=[]
    for index in range(5):
        # 数据准备
        features_test_data,features_train_data=K_fold_split(features,5,index) # 训练集与测试集的切分
        labels_test_data,labels_train_data=K_fold_split(labels,5,index)
        features_train_norm,means,stds=normalize_data(features_train_data) # 训练集标准化
        features_train_norm=torch.from_numpy(features_train_norm)
        labels_train_data=torch.from_numpy(labels_train_data)
        # 开始训练
        model=Ann(input_size,hidden_size,output_size,activator,layer_size).to(torch.double)
        losses=train(model,max_epoch,0.1,features_train_norm,labels_train_data)
        #开始测试
        features_test_data_norm=normalize_test_data(features_test_data,means,stds) # 测试集标准化
        features_test_data_norm=torch.from_numpy(features_test_data_norm)
        y_pred=model.forward(features_test_data_norm)
        y_pred=classify(y_pred)
        acc=accuracy(y_pred,labels_test_data)
        accuracy_history.append(acc)
        
    average_accuracy=np.mean(accuracy_history)
    average_accuracy_history.append({"{}".format(layer_size):"{}".format(average_accuracy)})

In [169]:
average_accuracy_history


[{'1': '0.5875'},
 {'2': '0.7375'},
 {'3': '0.6125'},
 {'4': '0.35'},
 {'8': '0.5'},
 {'16': '0.9'}]

多次试验后发现隐藏层为3时平均效果最好

那么接下来使用2神经元，3层隐藏层，relu激活

In [None]:
import torch.nn.functional as fn
activator=fn.relu
hidden_size=2
layer_size=3

for index in range(5):
    # 数据准备
    features_test_data,features_train_data=K_fold_split(features,5,index) # 训练集与测试集的切分
    labels_test_data,labels_train_data=K_fold_split(labels,5,index)
    features_train_norm,means,stds=normalize_data(features_train_data) # 训练集标准化
    features_train_norm=torch.from_numpy(features_train_norm)
    labels_train_data=torch.from_numpy(labels_train_data)
    # 开始训练
    model=Ann(input_size,hidden_size,output_size,activator,layer_size).to(torch.double)
    losses=train(model,max_epoch,0.1,features_train_norm,labels_train_data)
    #开始测试
    features_test_data_norm=normalize_test_data(features_test_data,means,stds) # 测试集标准化
    features_test_data_norm=torch.from_numpy(features_test_data_norm)
    y_pred=model.forward(features_test_data_norm)
    y_pred=classify(y_pred)
    acc=accuracy(y_pred,labels_test_data)
    accuracy_history.append(acc)
    
average_accuracy=np.mean(accuracy_history)


In [171]:
average_accuracy

0.91875

可以发现平均准确率高达0.91875