In [5]:
import network3
from network3 import Network, ConvPoolLayer, FullyConnectedLayer, SoftmaxLayer # import进行模块导入
#Softmax函数将多个标量映射为一个概率分布，其输出的每一个值范围在(0,1)，作为神经网络的最后一层，输出层实现多分类
# read data:
training_data, validation_data, test_data = network3.load_data_shared()
# mini-batch size:
mini_batch_size = 10

# chapter 6 - 一个隐藏层，中间全连接

net = Network([
    FullyConnectedLayer(n_in=784, n_out=100),
    SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)


# chapter 6 - 加入卷积池化层，5x5 local receptive fields（局部感受野）,
# 20 feature maps（不同卷积核提取出的不同特征）, max-pooling layer 2x2（即对2x2邻域内特征点取最大，能很好的保留纹理特征）

net1 = Network([
    ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28),
                  filter_shape=(20, 1, 5, 5),
                  poolsize=(2, 2)),
    FullyConnectedLayer(n_in=20*12*12, n_out=100),
    SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)
net1.SGD(training_data, 30, mini_batch_size, 0.1, validation_data, test_data)

# chapter 6 - inserting a second convolutional-pooling layer to the previous example => better accuracy

net2 = Network([
    ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28),
                  filter_shape=(20, 1, 5, 5),
                  poolsize=(2, 2)),
    ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12),
                  filter_shape=(40, 20, 5, 5),
                  poolsize=(2, 2)),
    FullyConnectedLayer(n_in=40*4*4, n_out=100),
    SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)
net2.SGD(training_data, 30, mini_batch_size, 0.1, validation_data, test_data)


# chapter 6 -  rectified linear units and some l2 regularization (lmbda=0.1) => even better accuracy
# sigmoid 函数的优点：输出映射在(0,1)内，单调连续，适合用作输出层，求导比较容易；缺点：软饱和性，一旦输入落入饱和区， f'(x)就会变得接近于 0，很容易产生梯度消失。
# tanh 函数也具有软饱和性。因为它的输出以 0 为中心，收敛速度比 sigmoid 要快。
# 改变激活函数，ReLU相对于Sigmoid一方面大大降低运算（生物运算也有成本）；另一方面在输入信号较强时，仍然能够保留信号之间的差别。

from network3 import ReLU
net3 = Network([
    ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28),
                  filter_shape=(20, 1, 5, 5),
                  poolsize=(2, 2),
                  activation_fn=ReLU),
    ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12),
                  filter_shape=(40, 20, 5, 5),
                  poolsize=(2, 2),
                  activation_fn=ReLU),
    FullyConnectedLayer(n_in=40*4*4, n_out=100, activation_fn=ReLU),
    SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)
net3.SGD(training_data, 30, mini_batch_size, 0.03, validation_data, test_data, lmbda=0.1)
#卷积实际上就是加法和乘法的组合，训练过程等同与训练每一个卷积层的滤波器，，让其对于特定特征具有提取功能，选取卷积核就是在对数据的不同特征进行选取，
#全连接层之前的作用是提取特征，全连接层的作用是分类
#反向传播步骤：进行前馈传导运算，求输出层激活值；对每个输出单元计算残差；节点处残差计算；计算偏导



Training mini-batch number 0
Training mini-batch number 1000
Training mini-batch number 2000
Training mini-batch number 3000
Training mini-batch number 4000
Epoch 0: validation accuracy 94.17%
This is the best validation accuracy to date.
The corresponding test accuracy is 93.62%
Training mini-batch number 5000
Training mini-batch number 6000
Training mini-batch number 7000
Training mini-batch number 8000
Training mini-batch number 9000
Epoch 1: validation accuracy 96.39%
This is the best validation accuracy to date.
The corresponding test accuracy is 96.04%
Training mini-batch number 10000
Training mini-batch number 11000
Training mini-batch number 12000
Training mini-batch number 13000
Training mini-batch number 14000
Epoch 2: validation accuracy 97.21%
This is the best validation accuracy to date.
The corresponding test accuracy is 96.88%
Training mini-batch number 15000
Training mini-batch number 16000
Training mini-batch number 17000
Training mini-batch number 18000
Training mini-

In [7]:
print(net)
print(net1)
print(net2)
print(net3)

<network3.Network object at 0x7f7db7840790>
<network3.Network object at 0x7f7dbe785450>
<network3.Network object at 0x7f7dbe6dc050>
<network3.Network object at 0x7f7db6a25b10>
