In [1]:
from sklearn import datasets
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline

digits = datasets.load_digits()

In [2]:
digits.data[0].shape

(64,)

In [3]:
class FullyConnect:
    def __init__(self, l_x, l_y):  # 两个参数分别为输入层的长度和输出层的长度
        # 使用随机数初始化参数，请暂时忽略这里为什么多了np.sqrt(l_x)
        self.weights = np.random.randn(l_y, l_x) / np.sqrt(l_x)
        self.bias = np.random.randn(l_y, 1)  # 使用随机数初始化参数
        self.lr = 0  # 先将学习速率初始化为0，最后统一设置学习速率
        

    def forward(self, x):
        self.x = x  # 把中间结果保存下来，以备反向传播时使用
        self.y = np.array([np.dot(self.weights, xx) +
                           self.bias for xx in x])  # 计算全连接层的输出
#         print('FullyConnect np.array([np.dot(self.weights, xx) + self.bias for xx in x]).shape', np.array([np.dot(self.weights, xx) + self.bias for xx in x]).shape)
        return self.y  # 将这一层计算的结果向前传递

    def backward(self, d):
        # 根据链式法则，将反向传递回来的导数值乘以x，得到对参数的梯度
        ddw = [np.dot(dd, xx.T) for dd, xx in zip(d, self.x)]
        # 每一条数据都能求出一个ddw，然后对他们取一个平均，得到平均的梯度变化
        self.dw = np.sum(ddw, axis=0) / self.x.shape[0]
        self.db = np.sum(d, axis=0) / self.x.shape[0]
        self.dx = np.array([np.dot(self.weights.T, dd) for dd in d])

        # 利用梯度下降的思想，更新参数。这里的lr就是步长的意思
        self.weights -= self.lr * self.dw
        self.bias -= self.lr * self.db
        return self.dx  # 反向传播梯度

In [4]:
class Sigmoid:
    def __init__(self):  # 无参数，不需初始化
        pass
    # 这里输入的变量的 x，其实就是上面公式的 z

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    # 完成正向传播，将输入的 z ，放入 Sigmoid 函数中，最终得到结果 h，并返回
    def forward(self, x):
        self.x = x
        self.y = self.sigmoid(x)
        return self.y

In [5]:
class Accuracy:
    def __init__(self):
        pass

    def forward(self, x, label):  # 只需forward
        self.accuracy = np.sum(
            [np.argmax(xx) == ll for xx, ll in zip(x, label)])  # 对预测正确的实例数求和
        self.accuracy = 1.0 * self.accuracy / x.shape[0]  # 也就是计算正确率 ,公式 7 的实现
        return self.accuracy

In [6]:
class QuadraticLoss:
    def __init__(self):
        pass
    # 正向传播和上文一样，具体注释参照上文
    def forward(self, x, label):
        self.x = x
        self.label = np.zeros_like(x)
        for a, b in zip(self.label, label):
            a[b] = 1.0
        # 对公式 8 实现
        self.loss = np.sum(np.square(x - self.label)) / \
        self.x.shape[0] / 2  # 求平均后再除以2是为了表示方便
        return self.loss

    # 定义反向传播
    def backward(self):
        # 这里的dx，就是我们求得函数关于x偏导数，也就是梯度，将它保存起来，后面更新的时候会用到
        self.dx = (self.x - self.label) / self.x.shape[0]  # 2被抵消掉了
        return self.dx

In [7]:
# 图片大小为 8*8
# 则此时一张图片就是一条数据，每张图片对呀一个 label（0-9范围内）
x = digits.data
labels = digits.target

# 开始搭建神经网络
inner_layers = []
inner_layers.append(FullyConnect(8 * 8, 10))
inner_layers.append(Sigmoid())
# 神经网络搭建完成

losslayer = QuadraticLoss()  # 计算损失
accuracy = Accuracy()  # 计算准确率

# 开始将数据送入神经网络进行正向传播
for layer in inner_layers:  # 前向计算
    x = layer.forward(x)

loss = losslayer.forward(x, labels)  # 调用损失层forward函数计算损失函数值
accu = accuracy.forward(x, labels)
print('loss:', loss, 'accuracy:', accu)

loss: 21.790288505475054 accuracy: 0.0


In [8]:
class Sigmoid:
    def __init__(self):  # 无参数，不需初始化
        pass
    # 即公式 5
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def forward(self, x):
        self.x = x
        self.y = self.sigmoid(x)
        return self.y
    # 即公式 9
    def backward(self, d):
        sig = self.sigmoid(self.x)
        self.dx = d * sig * (1 - sig)
        return self.dx  # 反向传递梯度

In [9]:
train_data,train_target = digits.data[:1500],digits.target[:1500]
test_data,test_target = digits.data[1500:-1],digits.target[1500:-1]
train_data.shape,train_target.shape,test_data.shape,test_target.shape

((1500, 64), (1500,), (296, 64), (296,))

In [10]:
inner_layers = []
layer1 = FullyConnect(64, 32)
inner_layers.append(layer1) # 因为每条数据的长度为 8*8=64，因此这里第一个全连接层，接收长度为64
inner_layers.append(Sigmoid())
inner_layers.append(FullyConnect(32, 16)) # 因为每条数据的长度为 8*8=64，因此这里第一个全连接层，接收长度为64
inner_layers.append(Sigmoid())
inner_layers.append(FullyConnect(16, 10))
inner_layers.append(Sigmoid())
inner_layers

[<__main__.FullyConnect at 0x2baf0b0db50>,
 <__main__.Sigmoid at 0x2baf0b0d5b0>,
 <__main__.FullyConnect at 0x2baf0b0da00>,
 <__main__.Sigmoid at 0x2baf0b0db80>,
 <__main__.FullyConnect at 0x2baf0b0d4f0>,
 <__main__.Sigmoid at 0x2baf0b0daf0>]

In [11]:
losslayer = QuadraticLoss()
accuracy = Accuracy()
for layer in inner_layers:
    layer.lr = 2000     #所有中间层设置学习速率
epochs = 350  # 对训练数据遍历的次数，也就是学习时间。
#在开始的时候，准确率会随之学习时间的增加而提高。
#当模型学习完训练数据中的所有信息后，准确率就会趋于稳定
losslayer,accuracy,epochs

(<__main__.QuadraticLoss at 0x2baf0b0d760>,
 <__main__.Accuracy at 0x2baf0b0de80>,
 350)

In [12]:
for i in range(epochs):
    losssum = 0
    iters = 0
    x = train_data
    label = train_target
    x = x.reshape(-1,64,1)
    for layer in inner_layers:  # 前向计算
        x = layer.forward(x)
    loss = losslayer.forward(x, label)  # 调用损失层forward函数计算损失函数值
    losssum += loss
    iters += 1
    d = losslayer.backward()  # 调用损失层backward函数层计算将要反向传播的梯度

    for layer in inner_layers[::-1]:  # 反向传播
        d = layer.backward(d)

    if i%10==0: 
        x = test_data
        label = test_target
        x = x.reshape(-1,64,1)
        for layer in inner_layers:
            x = layer.forward(x)
            
        accu = accuracy.forward(x, label)  # 调用准确率层forward()函数求出准确率
        print('epochs:{},loss:{},test_accuracy:{}'.format(i,losssum / iters,accu))

epochs:0,loss:1.7851388233987988,test_accuracy:0.10135135135135136
epochs:10,loss:0.4485045183610696,test_accuracy:0.2533783783783784
epochs:20,loss:0.4452012969483326,test_accuracy:0.33783783783783783
epochs:30,loss:0.4428102113599457,test_accuracy:0.30743243243243246
epochs:40,loss:0.43991951108548816,test_accuracy:0.32094594594594594
epochs:50,loss:0.4366099803506653,test_accuracy:0.3344594594594595
epochs:60,loss:0.43273574486054195,test_accuracy:0.3783783783783784
epochs:70,loss:0.4280262464050834,test_accuracy:0.4222972972972973
epochs:80,loss:0.4227234454506997,test_accuracy:0.44594594594594594
epochs:90,loss:0.41646655058200577,test_accuracy:0.46621621621621623
epochs:100,loss:0.40859995818705813,test_accuracy:0.4831081081081081
epochs:110,loss:0.3991420234540602,test_accuracy:0.5067567567567568
epochs:120,loss:0.38806232220141934,test_accuracy:0.5202702702702703
epochs:130,loss:0.3752889649102913,test_accuracy:0.5337837837837838
epochs:140,loss:0.36075885404393476,test_accurac

In [14]:
layer1.x.shape

(1500, 64, 1)

##### 