# softmax回归的简洁实现

In [24]:
import torchvision
import torchvision.transforms as transforms
from torch import nn
from torch.nn import init
import torch
import torchvision
import numpy as np
import sys

## 获取和读取数据

In [25]:
def load_data_fashion_mnist(batch_size, root='../data/FashionMNIST'):
    """Download the fashion mnist dataset and then load into memory."""
    transform = transforms.ToTensor()
    mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=transform)
    mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform)
    if sys.platform.startswith('win'):
        num_workers = 0  # 0表示不用额外的进程来加速读取数据
    else:
        num_workers = 4
    train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    return train_iter, test_iter

In [26]:
batch_size = 256
train_iter, test_iter = load_data_fashion_mnist(batch_size)

初始化模型参数，建造网络

In [27]:
num_inputs = 784
num_outputs = 10

class LinearNet(nn.Module):
    def __init__(self, num_inputs, num_outputs):
        super(LinearNet, self).__init__()
        self.linear = nn.Linear(num_inputs,num_outputs)
    def forward(self, x):
        y = self.linear(x.view(x.shape[0], -1))
        return y
net = LinearNet(num_inputs, num_outputs)

另一种实现方式

In [6]:
from collections import OrderedDict
class FlattenLayer(nn.Module):
    def __init__(self):
        super(FlattenLayer, self).__init__()
    def forward(self, x):
        return x.view(x.shape[0], -1) 
 
net = nn.Sequential(
    OrderedDict([
        ('flatten', FlattenLayer()),
        ('linear', nn.Linear(num_inputs, num_outputs))
    ]
    )
)

参数初始化

In [28]:
init.normal_(net.linear.weight, mean = 0,std = 0.01)
init.constant_(net.linear.bias, val= 0)

Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)

使用softmax和交叉熵，并定义优化算法。

In [29]:
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(),lr = 0.1)

训练

In [31]:
def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0.0, 0
    for X, y in data_iter:
        acc_sum += (net(X).argmax(dim = 1)==y).float().sum().item()
        n += y.shape[0]
    return acc_sum/n

def sgd(params, lr, batch_size):
    # 为了和原书保持一致，这里除以了batch_size，但是应该是不用除的，因为一般用PyTorch计算loss时就默认已经
    # 沿batch维求了平均了。
    for param in params:
        param.data -= lr * param.grad / batch_size # 注意这里更改param时用的param.data


def train(net, train_iter, test_iter, loss, num_epochs, batch_size, params=None, lr=None, optimizer = None):
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
        for X, y in train_iter:
            y_hat = net(X)
            l = loss(y_hat, y).sum()
            
            # 梯度清零
            if optimizer is not None:
                optimizer.zero_grad()
            elif params is not None and params[0].grad is not None:
                for param in params:
                    param.grad.data.zero_()
            
            l.backward()
            optimizer.step()
            
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(dim=1)==y).sum().item()
            n+= y.shape[0]
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f' % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))            
               

In [32]:
num_epochs = 10
train(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)

epoch 1, loss 0.0031, train acc 0.747, test acc 0.787
epoch 2, loss 0.0022, train acc 0.813, test acc 0.776
epoch 3, loss 0.0021, train acc 0.826, test acc 0.820
epoch 4, loss 0.0020, train acc 0.832, test acc 0.826
epoch 5, loss 0.0019, train acc 0.837, test acc 0.819
epoch 6, loss 0.0019, train acc 0.840, test acc 0.821
epoch 7, loss 0.0018, train acc 0.843, test acc 0.820
epoch 8, loss 0.0018, train acc 0.844, test acc 0.832
epoch 9, loss 0.0018, train acc 0.847, test acc 0.825
epoch 10, loss 0.0018, train acc 0.848, test acc 0.835
