In [1]:
import torch
from IPython import display
from d2l import torch as d2l

### 3.6.1 初始化模型参数

In [25]:
num_inputs = 784
num_outputs = 10

W = torch.normal(0, 0.01, size = (num_inputs, num_outputs), requires_grad = True)  # 784 * 10
b = torch.zeros(num_outputs, requires_grad = True)  # 10

### 3.6.2 定义softmax操作

In [4]:
# 调用sum运算时，可以指定保持在原张量的轴数，而不折叠求和的维度
X = torch.tensor([[1,2,3], [4,5,6]])
X.sum(0, keepdim = True), X.sum(1, keepdim = True)

(tensor([[5, 7, 9]]),
 tensor([[ 6],
         [15]]))

In [5]:
def softmax(X):
    X_exp = torch.exp(X)
    partition = X_exp.sum(1, keepdim=True)
    return X_exp / partition  # 对一行的每个元素使用了广播机制

### 3.6.3 定义模型

In [18]:
def net(X):
    return softmax(torch.matmul(X.reshape((-1, W.shape[0])), W) + b)

### 3.6.4 定义损失函数

In [7]:
def cross_entropy(y_hat, y):
    return -torch.log(y_hat[range(len(y_hat)), y])

In [9]:
y = torch.tensor([0, 2])
y_hat = torch.tensor([[0.1,0.3,0.6], [0.3,0.2,0.5]])
y_hat[[0,1], y]

tensor([0.1000, 0.5000])

In [10]:
cross_entropy(y_hat, y)

tensor([2.3026, 0.6931])

### 3.6.5 分类精度

In [11]:
def accuracy(y_hat, y):
    '''计算预测正确的数量'''
    if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
        y_hat = y_hat.argmax(axis = 1)  # 多分类问题，使用argmax函数获取最大值索引
    cmp = y_hat.type(y.dtype) == y
    return float(cmp.type(y.dtype).sum())

In [13]:
print(accuracy(y_hat, y) / len(y))

0.5


In [14]:
class Accumulator:
    ''' 在n个变量上累加 '''
    def __init__(self, n):
        self.data = [0.0] * n
    
    def add(self, *args):
        self.data = [a + float(b) for a, b in zip(self.data, args)]
    
    def reset(self):
        self.data = [0.0] * len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx]

In [15]:
def evaluate_accuracy(net, data_iter):
    '''计算在指定数据集上模型的精度'''
    if isinstance(net, torch.nn.Module):
        net.eval()   # 设置为评估模式, 关闭dropout
    metric = Accumulator(2)
    with torch.no_grad():
        for X, y in data_iter:
            metric.add(accuracy(net(X), y), y.numel())  # y.numel()返回y的元素个数
    return metric[0] / metric[1]

In [16]:
train_iter, test_iter = d2l.load_data_fashion_mnist(32, resize=64)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ../data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100.0%


Extracting ../data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ../data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ../data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100.0%


Extracting ../data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ../data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ../data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100.0%


Extracting ../data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ../data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ../data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100.0%

Extracting ../data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/FashionMNIST/raw






In [29]:
# 定义一个简单的测试数据迭代器
test_data = [(torch.randn(16, 28, 28), torch.randint(0, 10, (16,))) for _ in range(10)]  # 10个批次，每个批次16个样本
test_iter = iter(test_data)

evaluate_accuracy(net, test_iter)

0.1125

### 3.6.6 训练

In [30]:
def train_epoch_ch3(net, train_iter, loss, updater):
    '''训练模型一轮'''
    if isinstance(net, torch.nn.Module):
        net.train()
    # 训练损失总和、训练准确度总和、样本数
    metric = Accumulator(3)
    for X, y in train_iter:
        y_hat = net(X)
        l = loss(y_hat, y)
        if isinstance(updater, torch.optim.optimizer):
            # 使用PyTorch内置的优化器和损失函数
            updater.zero_grad()
            l.mean().backward()
            updater.step()
        else:
            # 使用自定义的优化器和损失函数
            l.sum().backward()
            updater(X.shape[0])
        metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())
    # 返回训练损失和训练准确度
    return metric[0] / metric[2], metric[1] / metric[2]

In [31]:
def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater):
    ''' 训练模型 '''
    animator = Animator(xlabel='epoch', xlim=[1, num_epochs], ylim=[0.3, 0.9],
                        legend=['train loss', 'train acc', 'test acc'])
    for epoch in range(num_epochs):
        train_metrics = train_epoch_ch3(net, train_iter, loss, updater)
        test_acc = evaluate_accuracy(net, test_iter)
        animator.add(epoch+1, train_metrics + (test_acc,))
    train_loss, train_acc = train_metrics
    assert train_loss < 0.01, train_loss  # 条件检查，训练误差不应该太大
    assert train_acc <= 1 and train_acc > 0.7, train_acc  # 条件检查，训练精度应该在0.7到1之间
    assert test_acc <= 1 and test_acc > 0.7, test_acc  # 条件检查，测试精度应该在0.7到1之间

In [32]:
lr = 0.1  # 学习率

def updater(batch_size):
    return d2l.sgd([W, b], lr, batch_size)

In [34]:
num_epochs = 10

train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, updater)

NameError: name 'Animator' is not defined