In [3]:
import torch
from torch import nn
from torch.nn import init
import sys
import numpy as np
import d2lzh_pytorch as d2l
import torchvision
import torchvision.transforms as transforms

#### 加载数据

In [5]:
batch_size = 256
mnist_train = torchvision.datasets.FashionMNIST(root='./Datasets/FashionMNIST',
                                               train=True,
                                               transform=transforms.ToTensor())
mnist_test = torchvision.datasets.FashionMNIST(root='./Datasets/FashionMNIST',
                                               train=False,
                                               transform=transforms.ToTensor())
train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size,
                                        shuffle=True, num_workers=4)
test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size,
                                        shuffle=True, num_workers=4)

#### 定义模型

In [23]:
# model1
class MLP(nn.Module):
    def __init__(self, num_inputs, num_hiddens, num_outputs):
        super().__init__()
        self.linear1 = nn.Linear(num_inputs, num_hiddens)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(num_hiddens, num_outputs)
    def forward(self,x):
        x1 = x.view(x.shape[0], -1)
        y1 = self.linear1(x1)
        h1 = self.relu(y1)
        y = self.linear2(h1)
        return y

In [28]:
# model2
class FlattenLayer(nn.Module):
    def __init__(self):
        super().__init__()
    def forward(self,x):
        y = x.view(x.shape[0], -1)
        return y

net = nn.Sequential(
        FlattenLayer(),
        nn.Linear(num_inputs, num_hiddens),
        nn.ReLU(),
        nn.Linear(num_hiddens, num_outputs), 
        )

In [27]:
num_inputs, num_outputs, num_hiddens = 784, 10, 256
net = MLP(num_inputs, num_hiddens, num_outputs)
# 初始化参数
for params in net.parameters():
    init.normal_(params, mean=0, std=0.01)

#### 定义损失函数、优化器

In [18]:
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.5)

In [20]:
def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0.0, 0
    for X, y in data_iter:
        acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
        n += y.shape[0]
    return acc_sum / n

In [21]:
num_epochs = 10
for epoch in range(num_epochs):
    train_l_sum, train_acc_sum, n = 0, 0, 0
    for X, y in train_iter:
        y_hat = net.forward(X)
        l = loss(y_hat, y).sum()
        optimizer.zero_grad()
        l.backward()
        optimizer.step()
        train_l_sum += l.item()
        train_acc_sum += (y_hat.argmax(dim=1)==y).sum().item()
        n+=y.shape[0]
    test_acc = evaluate_accuracy(test_iter, net)
    print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))

epoch 1, loss 0.0019, train acc 0.826, test acc 0.780
epoch 2, loss 0.0016, train acc 0.844, test acc 0.802
epoch 3, loss 0.0015, train acc 0.855, test acc 0.818
epoch 4, loss 0.0014, train acc 0.865, test acc 0.829
epoch 5, loss 0.0014, train acc 0.872, test acc 0.853
epoch 6, loss 0.0013, train acc 0.877, test acc 0.857
epoch 7, loss 0.0013, train acc 0.882, test acc 0.843
epoch 8, loss 0.0012, train acc 0.886, test acc 0.799
epoch 9, loss 0.0012, train acc 0.888, test acc 0.864
epoch 10, loss 0.0011, train acc 0.893, test acc 0.871
