### Fashion-MNIST数据集

In [1]:
%matplotlib inline
import d2lzh as d2l
from mxnet.gluon import data as gdata
import sys
import time

In [2]:
mnist_train = gdata.vision.FashionMNIST(train=True)
mnist_test = gdata.vision.FashionMNIST(train=False)

Downloading C:\Users\Junhua\AppData\Roaming\mxnet\datasets\fashion-mnist\train-images-idx3-ubyte.gz from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/fashion-mnist/train-images-idx3-ubyte.gz...
Downloading C:\Users\Junhua\AppData\Roaming\mxnet\datasets\fashion-mnist\train-labels-idx1-ubyte.gz from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/fashion-mnist/train-labels-idx1-ubyte.gz...
Downloading C:\Users\Junhua\AppData\Roaming\mxnet\datasets\fashion-mnist\t10k-images-idx3-ubyte.gz from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/fashion-mnist/t10k-images-idx3-ubyte.gz...
Downloading C:\Users\Junhua\AppData\Roaming\mxnet\datasets\fashion-mnist\t10k-labels-idx1-ubyte.gz from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/fashion-mnist/t10k-labels-idx1-ubyte.gz...


In [3]:
# 训练集60000， 测试集10000
len(mnist_train), len(mnist_test)

(60000, 10000)

In [7]:
feature, label = mnist_train[0]

In [8]:
feature.shape, feature.dtype

((28, 28, 1), numpy.uint8)

In [9]:
label, type(label), label.dtype

(2, numpy.int32, dtype('int32'))

In [10]:
# 本函数已保存在d2lzh包中方便以后使用
def get_fashion_mnist_labels(labels):
    text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',
                   'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
    return [text_labels[int(i)] for i in labels]

In [12]:
# 本函数已保存在d2lzh包中方便以后使用
def show_fashion_mnist(images, labels):
    d2l.use_svg_display()
    # 这里的_表示我们忽略（不使用）的变量
    _, figs = d2l.plt.subplots(1, len(images), figsize=(12, 12))
    for f, img, lbl in zip(figs, images, labels):
        f.imshow(img.reshape((28, 28)).asnumpy())
        f.set_title(lbl)
        f.axes.get_xaxis().set_visible(False)
        f.axes.get_yaxis().set_visible(False)

In [None]:
X, y = mnist_train[0:9]
show_fashion_mnist(X, get_fashion_mnist_labels(y))

In [16]:
# 批量大小
batch_size = 1
# 通过ToTensor实例将图像数据从uint8格式变换成32位浮点数格式
# 并除以255使得所有像素的数值均在0到1之间
# 还将图像通道从最后一维移到最前一维来方便之后介绍的卷积神经网络计算
transformer = gdata.vision.transforms.ToTensor()
if sys.platform.startswith('win'): # windows暂不支持
    num_workers = 0  # 0表示不用额外的进程来加速读取数据
else:
    num_workers = 4
# 对每个数据样本先进行ToTensor的变换，transform_first即应用在第一个元素即图像之上
train_iter = gdata.DataLoader(mnist_train.transform_first(transformer),
                              batch_size, shuffle=True,
                              num_workers=num_workers)
test_iter = gdata.DataLoader(mnist_test.transform_first(transformer),
                             batch_size, shuffle=False,
                             num_workers=num_workers)

In [17]:
start = time.time()
for X, y in train_iter:
    continue
'%.2f sec' % (time.time() - start)

'9.46 sec'

## 练习

* 减小`batch_size`（如到1）会影响读取性能吗？
* 非Windows用户请尝试修改`num_workers`来查看它对读取性能的影响。
* 查阅MXNet文档，`mxnet.gluon.data.vision`里还提供了哪些别的数据集？
* 查阅MXNet文档，`mxnet.gluon.data.vision.transforms`还提供了哪些别的变换方法？