In [14]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, scale
from sklearn.utils import shuffle

x_data = pd.read_csv('data/X_data.csv').to_numpy()
y_label = pd.read_csv('data/y_label.csv').to_numpy()

x_data.shape, y_label.shape

((4999, 400), (4999, 1))

In [15]:
#定义sigmoid函数
def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))

In [16]:
class NN:
    def __init__(self, x, y, classes, hidden_features):
        self.x = x
        self.y = y
        self.classifications = classes
        self.samples, self.features = x.shape
        self.hidden_features = hidden_features

        self.weights1 = np.random.rand(self.features, self.hidden_features)
        self.weights2 = np.random.rand(self.hidden_features, self.classifications)
        self.bias1 = 1
        self.bias2 = 1

        self.net_hidden = None
        self.sigmoid_hidden = None
        self.net_out = None
        self.sigmoid_out = None

    def _forward_propagation(self, x_test=None):
        """前向传播"""
        # 输入层---->隐含层
        self.net_hidden = np.dot(self.x if x_test is None else x_test, self.weights1) + self.bias1
        self.sigmoid_hidden = sigmoid(self.net_hidden)
        # 隐含层---->输出层
        self.net_out = np.dot(self.sigmoid_hidden, self.weights2) + self.bias2
        self.sigmoid_out = sigmoid(self.net_out)

    def _back_propagation(self, eta):
        """反向传播"""
        delta_out = -(self.y - self.sigmoid_out) * (self.sigmoid_out * (1 - self.sigmoid_out))
        theta_weight2 = np.dot(self.sigmoid_hidden.T, delta_out)

        delta_hidden = np.sum(np.concatenate([(delta_out * self.weights2[i]).reshape(self.samples, 1, self.classifications) for i in range(self.hidden_features)], 1), axis=2)* (self.sigmoid_hidden * (1 - self.sigmoid_hidden))
        theta_weight1 = np.dot(self.x.T, delta_hidden)

        self.weights1 -= eta * theta_weight1  # 隐含层---->输出层的偏置项更新
        self.bias1 -= eta * np.sum(delta_hidden)  # 隐含层---->输出层的权值更新
        self.weights2 -= eta * theta_weight2  # 输入层---->隐含层的偏置项更新
        self.bias2 -= eta * np.sum(delta_out)  # 输入层---->隐含层的权值更新

    def fit(self, epochs=10000, eta=1e-2):
        print("--------------训练开始--------------")
        max_score = 0
        for i in np.arange(1, epochs + 1):
            self.x, self.y = shuffle(self.x, self.y)
            self._forward_propagation()
            self._back_propagation(eta)

            s = self.score()
            if s > max_score:
                max_score = s
            #每100轮打印score
            if i % 100 == 0:
                print(i, "Loss:",self._calculate_square_error(),"Score:", s)
        print("最好的准确率:", max_score)

    def predict(self, x_test):
        self._forward_propagation(x_test)
        pred_zeros = np.zeros((self.samples, self.classifications))
        for i in range(len(self.sigmoid_out)):
            pred_zeros[i, np.argmax(self.sigmoid_out[i])] = 1
        return pred_zeros

    def score(self, x_test=None, y_test=None):
        if x_test is None and y_test is None:
            x_test, y_test = self.x, self.y
        pred = [np.argmax(i) for i in self.predict(x_test)]
        label = [np.argmax(i) for i in y_test]
        s = 0.0
        for i in range(len(pred)):
            if label[i] == pred[i]:
                s += 1
        return s / x_test.shape[0]

    def _calculate_square_error(self, x_test=None, y_test=None):
        if x_test is None and y_test is None:
            x_test, y_test = self.x, self.y
        return 0.5 * np.sum((y_test - self.sigmoid_out) ** 2)

In [17]:
# 特征归一化
# 标签one-hot编码
np.random.seed(64)
x_data = scale(x_data)
y_label_oh = OneHotEncoder(sparse=False).fit_transform(y_label)

In [18]:
# 十分类 25个隐藏层节点个数
nn = NN(x_data, y_label_oh, 10, 25)
nn.fit(eta=0.001)
print("模型预测准确率:", nn.score())

--------------训练开始--------------
100 Loss: 2362.659242653648 Score: 0.1842368473694739
200 Loss: 2334.450291120498 Score: 0.21824364872974594
300 Loss: 2217.810160004977 Score: 0.27385477095419086
400 Loss: 1849.2197589901998 Score: 0.4138827765553111
500 Loss: 1617.9206785172291 Score: 0.511502300460092
600 Loss: 1463.548896389634 Score: 0.5945189037807561
700 Loss: 1337.1152972777895 Score: 0.6335267053410683
800 Loss: 1241.9479578510122 Score: 0.6649329865973195
900 Loss: 1152.0497805430125 Score: 0.6875375075015003
1000 Loss: 1066.626701566244 Score: 0.7069413882776555
1100 Loss: 985.239575459059 Score: 0.8013602720544108
1200 Loss: 857.094733507512 Score: 0.8271654330866173
1300 Loss: 711.5098095751126 Score: 0.8401680336067213
1400 Loss: 631.9677858472234 Score: 0.8577715543108622
1500 Loss: 568.3634909296445 Score: 0.8711742348469694
1600 Loss: 516.6404258886522 Score: 0.8815763152630526
1700 Loss: 475.4222755673038 Score: 0.8919783956791358
1800 Loss: 442.57443661051934 Score: 