In [4]:
import torch 
from torch import nn
from torch.nn import init
import numpy as np
import sys
import torchvision
sys.path.append('..')
import d2lzh_pytorch as d2l


In [5]:
def load_data_fashion_mnist(batch_size, resize=None, root='~/Datasets/FashionMNIST'):
    """Download the fashion mnist dataset and then load into memory."""
    trans = []
    if resize:
        trans.append(torchvision.transforms.Resize(size=resize))
    trans.append(torchvision.transforms.ToTensor())
    
    transform = torchvision.transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=transform)
    mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform)
    if sys.platform.startswith('win'):
        num_workers = 0  # 0表示不用额外的进程来加速读取数据
    else:
        # num_workers = 4
        num_workers = 0
    train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=0)
    test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=0)

    return train_iter, test_iter


In [6]:
batch_size = 256
train_iter, test_iter = load_data_fashion_mnist(batch_size)

In [7]:
num_inputs = 784
num_outputs = 10

class LinearNet(nn.Module):
    def __init__(self, num_inputs, num_outputs):
        super(LinearNet, self).__init__()
        self.linear = nn.Linear(num_inputs, num_outputs)

    def forward(self, x): # x shape: (batch, 1, 28, 28)
        y = self.linear(x.view(x.shape[0], -1)) # batch, 784
        return y

In [8]:
net = LinearNet(num_inputs, num_outputs)
net

LinearNet(
  (linear): Linear(in_features=784, out_features=10, bias=True)
)

In [9]:
class FlattenLayer(nn.Module):
    def __init__(self):
        super(FlattenLayer, self).__init__()

    def forward(self, x):
        return x.view(x.shape[0], -1)

In [10]:
# way 2
from collections import OrderedDict

net = nn.Sequential(
    # FlattenLayer(),
    # nn.Linear(num_inputs, num_outputs)
    OrderedDict(
        [
            ('flatten', FlattenLayer()),
            ('linear', nn.Linear(num_inputs, num_outputs))
        ]
    )
)

In [11]:
init.normal_(net.linear.weight, mean=0, std=0.01)
init.constant_(net.linear.bias, val=0)

Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)

In [12]:
loss = nn.CrossEntropyLoss()

In [13]:
optimizer = torch.optim.SGD(net.parameters(), lr=0.1)

In [14]:
num_epochs = 5
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)

epoch 1, loss 0.0031, train acc 0.750, test acc 0.786
epoch 2, loss 0.0022, train acc 0.813, test acc 0.805
epoch 3, loss 0.0021, train acc 0.826, test acc 0.799
epoch 4, loss 0.0020, train acc 0.832, test acc 0.815
epoch 5, loss 0.0019, train acc 0.837, test acc 0.828
