# 用Python实现神经网络框架

$约定：x 为 1 \times n维矩阵，\omega 为n \times k维矩阵，b为1 \times k维矩阵$
其中x为输入数据X的某一行数据，n是输入数据X的列数，k是神经网络隐藏结点的个数

In [1]:
import numpy as np
from sklearn.datasets import load_boston
from sklearn.utils import shuffle, resample

## 定义结点类
结点类初始化的时候会为如下属性赋值：  
1. 输入结点：即与当前结点连接的上一个结点
2. 结点的值：当前结点的数值
3. 输出结点：即与当前结点连接的下一个结点
4. 梯度：当前结点的梯度

并定义如下方法：  
1. 前向传播：使用原函数及上一结点value来更新当前结点的value
2. 反向传播：使用导数、下一结点的梯度、当前结点的梯度及上一结点的value来更新当前结点的梯度

In [2]:
class Node:
    def __init__(self, inbound_nodes=[]):
        self.inbound_nodes = inbound_nodes
        self.value = None
        self.outbound_nodes = []
        self.gradients = {}
        for node in inbound_nodes:
            node.outbound_nodes.append(self)

    def forward(self):
        raise NotImplementedError

    def backward(self):
        raise NotImplementedError

## 定义输入结点类 
$\normalsize y = x$  

$\Large\frac{dy}{dx} \normalsize= 1$

In [3]:
class Input(Node):
    def __init__(self):
        Node.__init__(self)

    def forward(self):
        pass

    def backward(self):
        self.gradients = {self: 0}
        for n in self.outbound_nodes:
            self.gradients[self] += n.gradients[self]

## 定义线性结点类
$\normalsize y = \omega x + b$  

$\Large\frac{\partial y}{\partial x} \normalsize= \omega$  

$\Large\frac{\partial y}{\partial \omega} \normalsize= x$  

$\Large\frac{\partial y}{\partial b} \normalsize= 1$  

In [4]:
class Linear(Node):
    def __init__(self, X, W, b):
        Node.__init__(self, [X, W, b])

    def forward(self):
        X = self.inbound_nodes[0].value
        W = self.inbound_nodes[1].value
        b = self.inbound_nodes[2].value
        self.value = np.dot(X, W) + b

    def backward(self):
        self.gradients = {n: np.zeros_like(n.value) for n in self.inbound_nodes}
        for n in self.outbound_nodes:
            grad_cost = n.gradients[self]
            self.gradients[self.inbound_nodes[0]] += np.dot(grad_cost, self.inbound_nodes[1].value.T)
            self.gradients[self.inbound_nodes[1]] += np.dot(self.inbound_nodes[0].value.T, grad_cost)
            self.gradients[self.inbound_nodes[2]] += np.sum(grad_cost, axis=0, keepdims=False)

## 定义Sigmoid结点类
$\normalsize y = \Large\frac{1}{e^x + 1}$  

$\Large\frac{dy}{dx} \normalsize= (1-y)y$

In [5]:
class Sigmoid(Node):
    def __init__(self, node):
        Node.__init__(self, [node])

    def _sigmoid(self, x):
        return 1. / (1. + np.exp(-x))

    def forward(self):
        input_value = self.inbound_nodes[0].value
        self.value = self._sigmoid(input_value)

    def backward(self):
        self.gradients = {n: np.zeros_like(n.value) for n in self.inbound_nodes}
        for n in self.outbound_nodes:
            grad_cost = n.gradients[self]
            sigmoid = self.value
            self.gradients[self.inbound_nodes[0]] += sigmoid * (1 - sigmoid) * grad_cost

## 定义MSE结点类
$\normalsize z = \Large \frac{1}{m}\normalsize\sum_{i=1}^{m}({y}_{i} - a_{i})^2$  
$\Large\frac{\partial z}{\partial y} \normalsize= \Large \frac{2}{m}\normalsize\sum_{i=1}^{m}({y}_{i} - a_{i})$  
$\Large\frac{\partial z}{\partial a} \normalsize=- \Large \frac{2}{m}\normalsize\sum_{i=1}^{m}({y}_{i} - a_{i})$ 

In [6]:
class MSE(Node):
    def __init__(self, y, a):
        Node.__init__(self, [y, a])

    def forward(self):
        y = self.inbound_nodes[0].value.reshape(-1, 1)
        a = self.inbound_nodes[1].value.reshape(-1, 1)

        self.m = self.inbound_nodes[0].value.shape[0]
        self.diff = y - a
        self.value = np.mean(self.diff**2)

    def backward(self):
        self.gradients[self.inbound_nodes[0]] = (2 / self.m) * self.diff
        self.gradients[self.inbound_nodes[1]] = (-2 / self.m) * self.diff

## 拓扑排序 
对结点进行排序，排序规则如下：  
首先把所有input nodes放入列表中  
定义一个二维字典G，把所有input nodes和其对应的outbound nodes存入字典中

In [7]:
def topological_sort(feed_dict):
    input_nodes = [n for n in feed_dict.keys()]

    G = {}
    nodes = [n for n in input_nodes]
    while len(nodes) > 0:
        n = nodes.pop(0)
        if n not in G:
            G[n] = {'in': set(), 'out': set()}
        for m in n.outbound_nodes:
            if m not in G:
                G[m] = {'in': set(), 'out': set()}
            G[n]['out'].add(m)
            G[m]['in'].add(n)
            nodes.append(m)

    L = []
    S = set(input_nodes)
    while len(S) > 0:
        n = S.pop()

        if isinstance(n, Input):
            n.value = feed_dict[n]

        L.append(n)
        for m in n.outbound_nodes:
            G[n]['out'].remove(m)
            G[m]['in'].remove(n)
            if len(G[m]['in']) == 0:
                S.add(m)
    return L

## 定义正向和反向传播的方法 

In [8]:
def forward_and_backward(graph):
    for n in graph:
        n.forward()

    for n in graph[::-1]:
        n.backward()

## 定义更新梯度的方法 
可训练结点的值 = 可训练结点的值 - 学习率 * 梯度

In [9]:
def sgd_update(trainables, learning_rate=1e-2):
    for t in trainables:
        partial = t.gradients[t]
        t.value -= learning_rate * partial

In [10]:
# Load data
data = load_boston()
X_ = data['data']
y_ = data['target']

In [11]:
# Normalize data
X_ = (X_ - np.mean(X_, axis=0)) / np.std(X_, axis=0)

n_features = X_.shape[1]
n_hidden = 10
W1_ = np.random.randn(n_features, n_hidden)
b1_ = np.zeros(n_hidden)
W2_ = np.random.randn(n_hidden, 1)
b2_ = np.zeros(1)

In [12]:
# Neural network
X, y = Input(), Input()
W1, b1 = Input(), Input()
W2, b2 = Input(), Input()

l1 = Linear(X, W1, b1)
s1 = Sigmoid(l1)
l2 = Linear(s1, W2, b2)
cost = MSE(y, l2)

feed_dict = {
    X: X_,
    y: y_,
    W1: W1_,
    b1: b1_,
    W2: W2_,
    b2: b2_
}

epochs = 1000
# Total number of examples
m = X_.shape[0]
batch_size = 11
steps_per_epoch = m // batch_size

graph = topological_sort(feed_dict)
trainables = [W1, b1, W2, b2]

print("Total number of examples = {}".format(m))

Total number of examples = 506


In [13]:
# Step 4
for i in range(epochs):
    loss = 0
    for j in range(steps_per_epoch):
        # Step 1
        # Randomly sample a batch of examples
        X_batch, y_batch = resample(X_, y_, n_samples=batch_size)

        # Reset value of X and y Inputs
        X.value = X_batch
        y.value = y_batch

        # Step 2
        forward_and_backward(graph)

        # Step 3
        sgd_update(trainables)

        loss += graph[-1].value

    print("Epoch: {}, Loss: {:.3f}".format(i+1, loss/steps_per_epoch))

Epoch: 1, Loss: 127.184
Epoch: 2, Loss: 38.539
Epoch: 3, Loss: 23.880
Epoch: 4, Loss: 23.327
Epoch: 5, Loss: 23.523
Epoch: 6, Loss: 22.151
Epoch: 7, Loss: 19.863
Epoch: 8, Loss: 15.525
Epoch: 9, Loss: 17.041
Epoch: 10, Loss: 14.782
Epoch: 11, Loss: 14.931
Epoch: 12, Loss: 11.219
Epoch: 13, Loss: 14.842
Epoch: 14, Loss: 13.628
Epoch: 15, Loss: 12.485
Epoch: 16, Loss: 11.637
Epoch: 17, Loss: 9.994
Epoch: 18, Loss: 11.851
Epoch: 19, Loss: 10.302
Epoch: 20, Loss: 8.814
Epoch: 21, Loss: 8.585
Epoch: 22, Loss: 14.109
Epoch: 23, Loss: 10.644
Epoch: 24, Loss: 9.702
Epoch: 25, Loss: 9.319
Epoch: 26, Loss: 9.848
Epoch: 27, Loss: 11.929
Epoch: 28, Loss: 10.691
Epoch: 29, Loss: 11.741
Epoch: 30, Loss: 8.876
Epoch: 31, Loss: 9.455
Epoch: 32, Loss: 9.202
Epoch: 33, Loss: 10.199
Epoch: 34, Loss: 8.291
Epoch: 35, Loss: 7.776
Epoch: 36, Loss: 7.709
Epoch: 37, Loss: 9.337
Epoch: 38, Loss: 8.969
Epoch: 39, Loss: 8.444
Epoch: 40, Loss: 9.032
Epoch: 41, Loss: 7.431
Epoch: 42, Loss: 9.553
Epoch: 43, Loss: 6

Epoch: 355, Loss: 4.233
Epoch: 356, Loss: 3.976
Epoch: 357, Loss: 4.023
Epoch: 358, Loss: 4.750
Epoch: 359, Loss: 4.331
Epoch: 360, Loss: 4.926
Epoch: 361, Loss: 4.631
Epoch: 362, Loss: 4.524
Epoch: 363, Loss: 4.267
Epoch: 364, Loss: 4.919
Epoch: 365, Loss: 3.615
Epoch: 366, Loss: 4.058
Epoch: 367, Loss: 5.259
Epoch: 368, Loss: 4.634
Epoch: 369, Loss: 4.183
Epoch: 370, Loss: 4.403
Epoch: 371, Loss: 3.795
Epoch: 372, Loss: 4.236
Epoch: 373, Loss: 5.250
Epoch: 374, Loss: 4.898
Epoch: 375, Loss: 4.673
Epoch: 376, Loss: 4.653
Epoch: 377, Loss: 4.814
Epoch: 378, Loss: 4.592
Epoch: 379, Loss: 3.596
Epoch: 380, Loss: 3.980
Epoch: 381, Loss: 4.688
Epoch: 382, Loss: 4.417
Epoch: 383, Loss: 3.866
Epoch: 384, Loss: 4.515
Epoch: 385, Loss: 4.490
Epoch: 386, Loss: 3.931
Epoch: 387, Loss: 4.062
Epoch: 388, Loss: 4.086
Epoch: 389, Loss: 4.317
Epoch: 390, Loss: 3.689
Epoch: 391, Loss: 4.483
Epoch: 392, Loss: 4.019
Epoch: 393, Loss: 3.754
Epoch: 394, Loss: 4.403
Epoch: 395, Loss: 4.258
Epoch: 396, Loss

Epoch: 709, Loss: 3.873
Epoch: 710, Loss: 4.200
Epoch: 711, Loss: 3.978
Epoch: 712, Loss: 4.149
Epoch: 713, Loss: 3.660
Epoch: 714, Loss: 3.902
Epoch: 715, Loss: 3.852
Epoch: 716, Loss: 3.668
Epoch: 717, Loss: 3.965
Epoch: 718, Loss: 3.568
Epoch: 719, Loss: 3.937
Epoch: 720, Loss: 3.412
Epoch: 721, Loss: 3.457
Epoch: 722, Loss: 3.617
Epoch: 723, Loss: 3.959
Epoch: 724, Loss: 4.052
Epoch: 725, Loss: 3.825
Epoch: 726, Loss: 4.071
Epoch: 727, Loss: 3.765
Epoch: 728, Loss: 4.163
Epoch: 729, Loss: 3.722
Epoch: 730, Loss: 3.686
Epoch: 731, Loss: 3.145
Epoch: 732, Loss: 3.379
Epoch: 733, Loss: 4.417
Epoch: 734, Loss: 4.057
Epoch: 735, Loss: 3.623
Epoch: 736, Loss: 3.896
Epoch: 737, Loss: 3.584
Epoch: 738, Loss: 4.349
Epoch: 739, Loss: 4.544
Epoch: 740, Loss: 4.178
Epoch: 741, Loss: 3.761
Epoch: 742, Loss: 4.083
Epoch: 743, Loss: 4.066
Epoch: 744, Loss: 4.211
Epoch: 745, Loss: 3.919
Epoch: 746, Loss: 4.097
Epoch: 747, Loss: 3.737
Epoch: 748, Loss: 3.989
Epoch: 749, Loss: 4.661
Epoch: 750, Loss