## 1.AlexNet

In [8]:
import gluonbook as gb
from mxnet import gluon, init, nd
from mxnet.gluon import data as gdata, nn
import os
import sys

In [9]:
net = nn.Sequential()
# 使用较大的11 x 11窗口来捕获物体，同时使用步幅4来较大减小输出高和宽
# 这里使用的输入通道数比LeNet中的也要大很多
net.add(nn.Conv2D(96, kernel_size = 11, strides = 4, activation = 'relu'),
       nn.MaxPool2D(pool_size = 3, strides = 2),
       # 减小卷积窗口，使用填充为2来使得输入输出高宽一致，且增大输出通道数
       nn.Conv2D(256, kernel_size = 5, padding = 2, activation = 'relu'),
       nn.MaxPool2D(pool_size = 3, strides = 2),
       # 连续三个卷积层，且使用更小的卷积窗口。除了最后的卷积层外，进一步增大了输出通道数
       # 前两个卷积层后不使用池化层来减小输入的高和宽
       nn.Conv2D(384, kernel_size = 3, padding = 1, activation = 'relu'),
       nn.Conv2D(384, kernel_size = 3, padding = 1, activation = 'relu'), 
       nn.Conv2D(256, kernel_size = 3, padding = 1, activation = 'relu'),
       nn.MaxPool2D(pool_size = 3, strides = 2),
       # 这里全连接层的输出个数比LeNet中的大数倍，使用丢弃层来缓解过拟合
       nn.Dense(4096, activation = 'relu'), nn.Dropout(0.5),
       nn.Dense(4096, activation = 'relu'), nn.Dropout(0.5),
       # 输出层。由于这里使用Fashion-MNIST，所以用类别数为10，而非论文中的1000
       nn.Dense(10))

In [10]:
X = nd.random.uniform(shape = (1, 1, 224, 224))
net.initialize()
for layer in net:
    X = layer(X)
    print(layer.name, 'output shape:\t', X.shape)

conv5 output shape:	 (1, 96, 54, 54)
pool3 output shape:	 (1, 96, 26, 26)
conv6 output shape:	 (1, 256, 26, 26)
pool4 output shape:	 (1, 256, 12, 12)
conv7 output shape:	 (1, 384, 12, 12)
conv8 output shape:	 (1, 384, 12, 12)
conv9 output shape:	 (1, 256, 12, 12)
pool5 output shape:	 (1, 256, 5, 5)
dense3 output shape:	 (1, 4096)
dropout2 output shape:	 (1, 4096)
dense4 output shape:	 (1, 4096)
dropout3 output shape:	 (1, 4096)
dense5 output shape:	 (1, 10)


## 2.读取数据

In [16]:
def load_data_fashion_mnist(batch_size, resize = None, root = os.path.join(
        '~', '.mxnet', 'datasets', 'fashion-mnist')):
    root = os.path.expanduser(root) # 展开用户路径'~'
    transformer = []
    if resize:
        transformer += [gdata.vision.transforms.Resize(resize)]
    transformer += [gdata.vision.transforms.ToTensor()]
    transformer = gdata.vision.transforms.Compose(transformer)
    mnist_train = gdata.vision.FashionMNIST(root = root, train = True)
    mnist_test = gdata.vision.FashionMNIST(root = root, train = False)
    num_workers = 0 if sys.platform.startswith('win32') else 4
    train_iter = gdata.DataLoader(
                mnist_train.transform_first(transformer), batch_size, shuffle = True,
                num_workers = num_workers)
    test_iter = gdata.DataLoader(
                mnist_test.transform_first(transformer), batch_size, shuffle = False,
                num_workers = num_workers)
    return train_iter, test_iter

In [17]:
batch_size = 128
train_iter, test_iter = load_data_fashion_mnist(batch_size, resize = 224)
train_iter, test_iter

(<mxnet.gluon.data.dataloader.DataLoader at 0x10b916fd0>,
 <mxnet.gluon.data.dataloader.DataLoader at 0x10b908d30>)

## 3.训练

In [None]:
lr, num_epochs, ctx = 0.01, 5, gb.try_gpu()
net.initialize(force_reinit = True, ctx = ctx, init = init.Xavier())
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
gb.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)

training on cpu(0)
