In [1]:
%matplotlib inline
import d2lzh as d2l
from mxnet import nd
from mxnet.gluon import loss as gloss

### 获取和读取数据

In [3]:
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

### 定义模型参数

In [4]:
num_inputs, num_outputs, num_hiddens = 784, 10, 256
# 第一层以num_inputs为输入，num_hiddens为输出
W1 = nd.random.normal(scale=0.01, shape=(num_inputs, num_hiddens))
b1 = nd.zeros(num_hiddens)
# 第二层以num_hiddens为输入，num_outputs为输出
W2 = nd.random.normal(scale=0.01, shape=(num_hiddens, num_outputs))
b2 = nd.zeros(num_outputs)

params = [W1, b1, W2, b2]

for param in params:
    param.attach_grad()

### 定义激活函数

In [5]:
def relu(X):
    return nd.maximum(X, 0)

### 定义模型

In [6]:
def net(X):
    # 先通过reshape函数将每张原始图像改成长度为num_inputs的向量
    X = X.reshape((-1, num_inputs))
    # 第一层先过线性模型再激活，输出
    H = relu(nd.dot(X, W1) + b1)
    # 第二层过线性模型，结果不用softmax，softmax放到损失函数那一块
    return nd.dot(H, W2) + b2

### 定义损失函数

In [7]:
# 为了得到更好的数值稳定性，我们直接使用Gluon提供的包括softmax运算和交叉熵损失计算的函数
loss = gloss.SoftmaxCrossEntropyLoss()

### 训练模型

In [8]:
num_epochs, lr = 5, 0.5
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,
              params, lr) # 不用trainer就要加上params,lr

epoch 1, loss 0.7826, train acc 0.704, test acc 0.819
epoch 2, loss 0.4775, train acc 0.823, test acc 0.848
epoch 3, loss 0.4245, train acc 0.843, test acc 0.859
epoch 4, loss 0.3914, train acc 0.855, test acc 0.840
epoch 5, loss 0.3705, train acc 0.863, test acc 0.873


In [11]:
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

num_inputs, num_outputs, num_hiddens = 784, 10, 500
# 第一层以num_inputs为输入，num_hiddens为输出
W1 = nd.random.normal(scale=0.01, shape=(num_inputs, num_hiddens))
b1 = nd.zeros(num_hiddens)
# 第二层以num_hiddens为输入，num_outputs为输出
W2 = nd.random.normal(scale=0.01, shape=(num_hiddens, num_outputs))
b2 = nd.zeros(num_outputs)

params = [W1, b1, W2, b2]

for param in params:
    param.attach_grad()
    
loss = gloss.SoftmaxCrossEntropyLoss()
num_epochs, lr = 5, 0.5
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,
              params, lr)

epoch 1, loss 0.7944, train acc 0.700, test acc 0.818
epoch 2, loss 0.4820, train acc 0.821, test acc 0.850
epoch 3, loss 0.4281, train acc 0.842, test acc 0.853
epoch 4, loss 0.3907, train acc 0.856, test acc 0.864
epoch 5, loss 0.3676, train acc 0.864, test acc 0.853


In [14]:
def net(X):
    # 先通过reshape函数将每张原始图像改成长度为num_inputs的向量
    X = X.reshape((-1, num_inputs))
    # 第一层先过线性模型再激活，输出
    H1 = relu(nd.dot(X, W1) + b1)
    H2 = relu(nd.dot(H1, W2) + b2)
    # 第二层过线性模型，结果不用softmax，softmax放到损失函数那一块
    return nd.dot(H2, W3) + b3

In [17]:
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

num_inputs, num_outputs, num_hiddens1, num_hiddens2 = 784, 10, 256, 64
# 第一层以num_inputs为输入，num_hiddens为输出
W1 = nd.random.normal(scale=0.01, shape=(num_inputs, num_hiddens1))
b1 = nd.zeros(num_hiddens1)
# 第二层以num_hiddens为输入，num_outputs为输出
W2 = nd.random.normal(scale=0.01, shape=(num_hiddens1, num_hiddens2))
b2 = nd.zeros(num_hiddens2)

W3 = nd.random.normal(scale=0.01, shape=(num_hiddens2, num_outputs))
b3 = nd.zeros(num_outputs)

params = [W1, b1, W2, b2, W3, b3]

for param in params:
    param.attach_grad()
    
loss = gloss.SoftmaxCrossEntropyLoss()
num_epochs, lr = 5, 0.5
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,
              params, lr)

epoch 1, loss 1.2189, train acc 0.521, test acc 0.754
epoch 2, loss 0.6001, train acc 0.774, test acc 0.836
epoch 3, loss 0.4941, train acc 0.817, test acc 0.844
epoch 4, loss 0.4258, train acc 0.842, test acc 0.857
epoch 5, loss 0.4010, train acc 0.850, test acc 0.868
