# 3.7 softmax回归的简洁实现

In [1]:
import torch
from torch import nn
from torch.nn import init
import numpy as np
import sys
sys.path.append("..") 
import d2lzh_pytorch as d2l
from collections import OrderedDict

print(torch.__version__)

1.8.1


## 3.7.1 获取和读取数据

In [2]:
batch_size = 250
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
for X, y in train_iter:
    print(X.shape)
    break
print(train_iter)

len(mnist_train) 60000
len(mnist_test) 10000
torch.Size([250, 1, 28, 28])
<torch.utils.data.dataloader.DataLoader object at 0x7f8b07f50550>


In [3]:
# 28 x 28
num_inputs = 784
num_outputs = 10

class FlattenLayer(nn.Module):
    def __init__(self):
        # python2 写法 super(FlattenLayer, self).__init__()
        super().__init__()
    def forward(self, x): # x shape: (batch, *, *, ...)
        # print('x.shape', x.shape) torch.size([250, 1, 28, 28])
        # x.shape[0] == 250, x.shape[1] == 1, x.shape[2] == 28, x.shape[3] == 28
        # x 为 250 张 单通道的 28 x 28 像素的 tensor
        # 返回 size 为 250 x 784 的 tensor
        return x.view(x.shape[0], -1)


net = nn.Sequential(
        # FlattenLayer(),
        # nn.Linear(num_inputs, num_outputs)
        OrderedDict([
          ('flatten', FlattenLayer()),
          ('linear', nn.Linear(num_inputs, num_outputs))])
        )
print(net, '\n')
print(net.parameters(), '\n')
print(net.flatten, '\n')
print(net.linear, '\n')


Sequential(
  (flatten): FlattenLayer()
  (linear): Linear(in_features=784, out_features=10, bias=True)
) 

<generator object Module.parameters at 0x7f8b07f5f8e0> 

FlattenLayer() 

Linear(in_features=784, out_features=10, bias=True) 



In [4]:
# 初始化 linear 层权重与偏差
init.normal_(net.linear.weight, mean=0, std=0.01)
init.constant_(net.linear.bias, val=0)
print(net.linear)

Linear(in_features=784, out_features=10, bias=True)


## 3.7.3 softmax和交叉熵损失函数

In [5]:
# 交叉熵损失函数
loss = nn.CrossEntropyLoss()

## 3.7.4 定义优化算法

In [6]:
# 随机梯度下降算法优化器
optimizer = torch.optim.SGD(net.parameters(), lr=0.02)

## 3.7.5 训练模型

In [7]:
def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,
              params=None, lr=None, optimizer=None):
    for epoch in range(num_epochs):
        # 初始化训练损失， 训练精度
        train_l_sum, train_acc_sum, n = 0.0, 0.0, 0


        # 遍历训练迭代器中的样本与标签, 循环次数 = 样本总数 / batch_size
        for X, y in train_iter:
            # y_hat 预测值
            y_hat = net(X)
            # torch.Size([250, 10])
            # print('y_hat.shape', y_hat.shape)
            # y.shape torch.Size([250])
            # print('y.shape', y.shape)



            # 计算损失
            l = loss(y_hat, y).sum()
            # print(l)
            # print(l.item())
            
            # 梯度清零
            if optimizer is not None:
                optimizer.zero_grad()
            elif params is not None and params[0].grad is not None:
                for param in params:
                    param.grad.data.zero_()

            # 反向传播更新权重
            l.backward()

            if optimizer is None:
                sgd(params, lr, batch_size)
            else:
                optimizer.step()  # 启动优化算法
            
            # 训练损失总和标量 eg. l: tensor(0.5213, grad_fn=<SumBackward0>) l.item(): 0.5213115215301514
            train_l_sum += l.item() 

            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
            n += y.shape[0]
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))

def evaluate_accuracy(data_iter, net, device=None):
    if device is None and isinstance(net, torch.nn.Module):
        # 如果没指定 device 就使用 net 的 device
        device = list(net.parameters())[0].device 
    acc_sum, n = 0.0, 0
    with torch.no_grad():
        for X, y in data_iter:
            if isinstance(net, torch.nn.Module):
                net.eval() # 评估模式, 这会关闭 dropout
                acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
                net.train() # 改回训练模式
            else: # 自定义的模型, 3.13节之后不会用到, 不考虑GPU
                if('is_training' in net.__code__.co_varnames): # 如果有is_training这个参数
                    # 将is_training设置成False
                    acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum().item() 
                else:
                    acc_sum += (net(X).argmax(dim=1) == y).float().sum().item() 
            n += y.shape[0]
    return acc_sum / n



num_epochs = 10

train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)

epoch 1, loss 0.0046, train acc 0.674, test acc 0.718
epoch 2, loss 0.0031, train acc 0.760, test acc 0.759
epoch 3, loss 0.0027, train acc 0.784, test acc 0.780
epoch 4, loss 0.0026, train acc 0.797, test acc 0.790
epoch 5, loss 0.0024, train acc 0.805, test acc 0.797
epoch 6, loss 0.0024, train acc 0.811, test acc 0.803
epoch 7, loss 0.0023, train acc 0.816, test acc 0.806
epoch 8, loss 0.0022, train acc 0.819, test acc 0.809
epoch 9, loss 0.0022, train acc 0.822, test acc 0.811
epoch 10, loss 0.0022, train acc 0.825, test acc 0.814
