## 1.LeNet模型

In [3]:
import gluonbook as gb
import mxnet as mx
from mxnet import autograd, gluon, init, nd
from mxnet.gluon import loss as gloss, nn
import time

In [4]:
net = nn.Sequential()
net.add(nn.Conv2D(channels = 6, kernel_size = 5, activation = 'sigmoid'),
       nn.MaxPool2D(pool_size = 2, strides = 2),
       nn.Conv2D(channels = 16, kernel_size = 5, activation  = 'sigmoid'),
       nn.MaxPool2D(pool_size = 2, strides = 2),
       # Dense会默认将（批量大小，通道，高，宽）形状的输入转换成
       # （批量大小， 通道 ＊ 高 ＊ 宽）形状的输入)
        nn.Dense(120, activation = 'sigmoid'),
        nn.Dense(84, activation = 'sigmoid'),
        nn.Dense(10))

In [5]:
'''
构造一个高和宽均为28的单通道数据样本，并逐层进行前向计算来查看每个层的输出形状
'''
X = nd.random.uniform(shape = (1, 1, 28, 28))
net.initialize()
for layer in net:
    X = layer(X)
    print(layer.name, 'output shape:\t', X.shape)

conv0 output shape:	 (1, 6, 24, 24)
pool0 output shape:	 (1, 6, 12, 12)
conv1 output shape:	 (1, 16, 8, 8)
pool1 output shape:	 (1, 16, 4, 4)
dense0 output shape:	 (1, 120)
dense1 output shape:	 (1, 84)
dense2 output shape:	 (1, 10)


## 2.获取数据和训练

In [6]:
batch_size = 256
train_iter, test_iter = gb.load_data_fashion_mnist(batch_size = batch_size)

In [7]:
'''
尝试在gpu(0)上创建NDArray，如果成功则使用gpu(0)，否则仍然使用CPU
'''
def try_gpu4():
    try:
        ctx = mx.gpu()
        _ = nd.zeros((1, ), ctx = ctx)
    except mx.base.MXNetError:
        ctx = mx.cpu()
    return ctx

ctx = try_gpu4()
ctx

cpu(0)

In [None]:
'''
gluonbook模块没有accuracy属性，自己来定义
'''
def accuracy(y_hat, y):
    return (y_hat.argmax(axis = 1) == y.astype('float32')).mean().asscalar()

In [12]:
'''
as_in_context函数将数据复制到GPU上，例如gpu(0)
'''
def evaluate_accuracy(data_iter, net, ctx):
    acc = nd.array([0], ctx = ctx)
    for X, y in data_iter:
        # 如果ctx是GPU，将数据复制到GPU上
        X, y = X.as_in_context(ctx), y.as_in_context(ctx)
        acc += accuracy(net(X), y)
    return acc.asscalar() / len(data_iter)

In [14]:
def train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs):
    print('training on', ctx)
    loss = gloss.SoftmaxCrossEntropyLoss()
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, start = 0, 0, time.time()
        for X, y in train_iter:
            X, y = X.as_in_context(ctx), y.as_in_context(ctx)
            with autograd.record():
                y_hat = net(X)
                l = loss(y_hat, y)
            l.backward()
            trainer.step(batch_size)
            train_l_sum += l.mean().asscalar()
            train_acc_sum += accuracy(y_hat, y)
        test_acc = evaluate_accuracy(test_iter, net, ctx)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
             'time %.1f sec' % (epoch + 1, train_l_sum / len(train_iter),
                               train_acc_sum / len(train_iter),
                               test_acc, time.time() - start))

In [16]:
lr, num_epochs = 0.9, 10
net.initialize(force_reinit = True, ctx = ctx, init = init.Xavier())
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)

training on cpu(0)
epoch 1, loss 2.3194, train acc 0.101, test acc 0.099, time 13.1 sec
epoch 2, loss 2.2690, train acc 0.127, test acc 0.417, time 13.3 sec
epoch 3, loss 1.1754, train acc 0.529, test acc 0.670, time 12.5 sec
epoch 4, loss 0.7969, train acc 0.688, test acc 0.733, time 12.5 sec
epoch 5, loss 0.6778, train acc 0.733, test acc 0.751, time 12.6 sec
epoch 6, loss 0.6149, train acc 0.759, test acc 0.788, time 12.7 sec
epoch 7, loss 0.5648, train acc 0.777, test acc 0.793, time 12.6 sec
epoch 8, loss 0.5285, train acc 0.794, test acc 0.815, time 12.9 sec
epoch 9, loss 0.4929, train acc 0.808, test acc 0.806, time 12.8 sec
epoch 10, loss 0.4652, train acc 0.822, test acc 0.834, time 13.0 sec
