In [1]:
import torch
from torch import nn
from torch.nn import init
import numpy as np
import sys
import d2lzh_pytorch as d2l

In [3]:
# 读取数据
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

### 定义模型

In [6]:
num_inputs = 784
num_outputs = 10

In [7]:
# 1
class LinearNet(nn.Module):
    def __init__(self, num_inputs, num_outputs):
        super(LinearNet, self).__init__()
        self.linear = nn.Linear(num_inputs, num_outputs)
    def forward(self, x):
        y = self.linear(x.view(x.shape[0], -1))
        return y

In [10]:
# 2
# 定义形状转换功能
class FlattenLayer(nn.Module):
    def __init__(self):
        super().__init__()
    def forward(self,x):
        return x.view(x.shape[0], -1)
# 模型2
from collections import OrderedDict

net = nn.Sequential(
    OrderedDict([('flatten', FlattenLayer()),
                 ('linear', nn.Linear(num_inputs, num_outputs))
                ])
             )

**初始化参数**

In [12]:
init.normal_(net.linear.weight, mean=0, std=0.01);
init.constant_(net.linear.bias, val=0);

**softmax与交叉熵损失函数**<br>
由于softmax与交叉熵损失函数分开定义可能会导致数值不稳定https://blog.csdn.net/Shingle_/article/details/81988628<br>
因此，pytorch提供了一个同时包含softmax计算以及交叉熵损失计算的函数，提升了数值稳定性

In [13]:
loss = nn.CrossEntropyLoss()

**定义优化算法**

In [15]:
optimizer = torch.optim.SGD(net.parameters(), lr=0.1)

**训练模型**

In [17]:
def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0.0, 0
    for X, y in data_iter:
        acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
        n += y.shape[0]
    return acc_sum / n

In [18]:
num_epochs = 10
for epoch in range(num_epochs):
    train_l_sum, train_acc_sum, n = 0, 0, 0
    for X, y in train_iter:
        y_hat = net(X)
        l = loss(y_hat, y).sum()
        optimizer.zero_grad()
        l.backward()
        optimizer.step()
        train_l_sum += l.item()
        train_acc_sum += (y_hat.argmax(dim=1)==y).sum().item()
        n+=y.shape[0]
    test_acc = evaluate_accuracy(test_iter, net)
    print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))

epoch 1, loss 0.0022, train acc 0.813, test acc 0.811
epoch 2, loss 0.0021, train acc 0.825, test acc 0.818
epoch 3, loss 0.0020, train acc 0.832, test acc 0.821
epoch 4, loss 0.0019, train acc 0.837, test acc 0.825
epoch 5, loss 0.0019, train acc 0.839, test acc 0.827
epoch 6, loss 0.0018, train acc 0.843, test acc 0.826
epoch 7, loss 0.0018, train acc 0.844, test acc 0.828
epoch 8, loss 0.0018, train acc 0.846, test acc 0.817
epoch 9, loss 0.0018, train acc 0.848, test acc 0.833
epoch 10, loss 0.0017, train acc 0.850, test acc 0.830
