In [None]:
class FCLayer(object):
    """
    全连接层
    """

    def __init__(self, input_units, output_units):
        self._input_units = input_units
        self._output_units = output_units
        self.weight = np.random.standard_normal(size=(input_units, output_units))
        self.biase = np.zeros((input_units,))  # TODO use none-zero value

    def feed_forward(self, inputs, use_activation_fuc=True):
        self.input = inputs
        self.out = np.matmul(inputs, self.weight)
        if use_activation_fuc:
            self.out = self.activation_fuc(self.out)
        return self.out

    def activation_fuc(self, x):
        """
        use sigmod
        """
        return 1 / (1 + np.exp(-x))

    def d_out_wrt_net(self):
        # ∂oᵢ/∂netᵢ
        return self.out * (1 - self.out)

In [None]:
def generate_data(batch_size=36):
    x = np.random.random_sample((batch_size, 1))
    # y = 3.5 * x + 2
    y = np.where(x > 0.5, 1, 0)
    return x, y


def compute_loss(predictions, labels):
    """
    使用l2 loss
    """
    return np.sum(np.power(labels - predictions, 2) / 2)


def compute_accuracy(preditions, labels):
    return np.abs(np.sum(np.equal(np.where(preditions > 0.5, 1, 0), labels)) / len(preditions) * 100)

$$
o_i = \gamma(net_i) \\\\
E = \sum[t\cdot\log(y) + (1-t)\cdot\log(1-y)]
$$

其中:
 - $\gamma$ 是激活函数
 - **E** 是误差(这里使用Cross Entropy Loss), **y**是为输出神经元的实际输出， **t**为样本的预期输出

所以求误差对于权重的偏微分:
$$
\begin{align}
\dfrac{\sigma(E)}{\sigma(\omega_{i, j})} &= \dfrac{\sigma(E)}{\sigma(o_j)} \cdot \dfrac{\sigma(o_j)}{\sigma(net_j)} \cdot \dfrac{\sigma(net_j)}{\sigma(\omega_{i, j})} \\\\
\end{align}
$$

In [None]:
def train(params=1, maxstep=1000, batch_size=32):
    print('开始训练')
    loss_list = []
    accuracy_list = []
    fc2_weight_list = []
    fc1_weight_list = []

    # 建立模型
    fc1 = FCLayer(params, 5)
    fc2 = FCLayer(5, 1)

    def predict(input):
        return fc2.feed_forward(fc1.feed_forward(input))

    step = 0
    while step < maxstep:
        # 生成训练数据
        inputs, labels = generate_data(batch_size)
        # forward propagation
        fc1_output = fc1.feed_forward(inputs)
        fc2_output = fc2.feed_forward(fc1_output)
        # 误差
        loss = compute_loss(fc2_output, labels)

        # 最后一层的delta
        delta_out = (fc2_output - labels) * fc2_output * (1 - fc2_output)  # [batch_size, out]
        # 隐藏层的delta
        # [batch_size, out] * [unit_1, out].T
        delta_hidden = np.dot(delta_out, fc2.weight.T) * fc2_output * (1 - fc2_output)  # [batch_size, unit_1]

        # 更新参数，weight
        # [unit_1, out] = [batch_size, unit_1].T * [batch_size, out]
        fc2.weight -= LR * np.dot(fc1.out.T, delta_out)
        # [input, unit_1] = [batch_size, input].T * [batch_size, unit_1]
        fc1.weight -= LR * np.dot(inputs.T, delta_hidden)

        if step % 100 == 0:
            accuracy = compute_accuracy(fc2_output, labels)
            print('step: %d , loss: %0.4f, accuracy: %0.2f' % (step, loss, accuracy))
            # print(fc2.weight)
            loss_list.append(loss)
            accuracy_list.append(accuracy)
            fc2_weight_list.append(np.average(fc2.weight))
            fc1_weight_list.append(np.average(fc1.weight))
        step += 1

    # test
    test_data, test_label = generate_data(100)
    results = predict(test_data)
    print('正确率: %0.2f' % compute_accuracy(results, test_label))


    # plot weight
    fig = plt.figure()
    fig.subplots_adjust(top=0.8)
    ax1 = fig.add_subplot(211)
    ax1.set_ylabel('percent')
    ax1.set_title('accuracy')
    ax1.plot(np.arange(len(accuracy_list)), accuracy_list, color='blue', lw=2)

    ax2 = fig.add_axes([0.15, 0.1, 0.7, 0.3])
    ax2.plot(np.arange(len(loss_list)), loss_list, color='yellow', lw=2)
    plt.show()
    

```
def train(params=1, maxstep=1000, batch_size=32):
    print('开始训练')
    loss_list = []
    accuracy_list = []
    fc2_weight_list = []
    fc1_weight_list = []

    # 建立模型
    fc1 = FCLayer(params, 5)
    fc2 = FCLayer(5, 1)

    def predict(input):
        return fc2.feed_forward(fc1.feed_forward(input))

    step = 0
    while step < maxstep:
        # 生成训练数据
        inputs, labels = generate_data(batch_size)
        # forward propagation
        fc1_output = fc1.feed_forward(inputs)
        fc2_output = fc2.feed_forward(fc1_output)
        # 误差
        loss = compute_loss(fc2_output, labels)

        # 最后一层的delta
        delta_out = (fc2_output - labels) * fc2_output * (1 - fc2_output)  # [batch_size, out]
        # 隐藏层的delta
        # [batch_size, out] * [unit_1, out].T
        delta_hidden = np.dot(delta_out, fc2.weight.T) * fc2_output * (1 - fc2_output)  # [batch_size, unit_1]

        # 更新参数，weight
        # [unit_1, out] = [batch_size, unit_1].T * [batch_size, out]
        fc2.weight -= LR * np.dot(fc1.out.T, delta_out)
        # [input, unit_1] = [batch_size, input].T * [batch_size, unit_1]
        fc1.weight -= LR * np.dot(inputs.T, delta_hidden)

        if step % 100 == 0:
            accuracy = compute_accuracy(fc2_output, labels)
            print('step: %d , loss: %0.4f, accuracy: %0.2f' % (step, loss, accuracy))
            # print(fc2.weight)
            loss_list.append(loss)
            accuracy_list.append(accuracy)
            fc2_weight_list.append(np.average(fc2.weight))
            fc1_weight_list.append(np.average(fc1.weight))
        step += 1

    # test
    test_data, test_label = generate_data(100)
    results = predict(test_data)
    print('正确率: %0.2f' % compute_accuracy(results, test_label))


    # plot weight
    fig = plt.figure()
    fig.subplots_adjust(top=0.8)
    ax1 = fig.add_subplot(211)
    ax1.set_ylabel('percent')
    ax1.set_title('accuracy')
    ax1.plot(np.arange(len(accuracy_list)), accuracy_list, color='blue', lw=2)

    ax2 = fig.add_axes([0.15, 0.1, 0.7, 0.3])
    ax2.plot(np.arange(len(loss_list)), loss_list, color='yellow', lw=2)
    plt.show()
```

In [None]:
# 开始训练
# step: 0 , loss: 1.4918, accuracy: 10.00
# step: 100 , loss: 1.1960, accuracy: 70.00
# step: 200 , loss: 0.9805, accuracy: 90.00
# step: 300 , loss: 0.8510, accuracy: 70.00
# step: 400 , loss: 0.4447, accuracy: 90.00
# step: 500 , loss: 0.5089, accuracy: 90.00
# step: 600 , loss: 0.2708, accuracy: 100.00
# step: 700 , loss: 0.3503, accuracy: 100.00
# step: 800 , loss: 0.5821, accuracy: 80.00
# step: 900 , loss: 0.3951, accuracy: 90.00
# 正确率: 95.00

![trends](Figure_1.png)