In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# 1. 定义超参数
BATCH_SIZE = 16
EPOCHS = 20
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 2. 数据预处理
pipeline = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# 3. 下载并加载数据集
train_set = datasets.MNIST("data", train=True, download=True, transform=pipeline)
test_set = datasets.MNIST("data", train=False, download=True, transform=pipeline)  # 测试集

train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=True)  # 修正变量名

# 4. 定义模型
class Digit(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 10, 5)
        self.conv2 = nn.Conv2d(10, 20, 3)
        self.fc1 = nn.Linear(20 * 10 * 10, 500)
        self.fc2 = nn.Linear(500, 10)

    def forward(self, x):
        x = self.conv1(x)          # 输入: [B,1,28,28] → 输出: [B,10,24,24]
        x = F.relu(x)
        x = F.max_pool2d(x, 2, 2)  # 输出: [B,10,12,12]
        
        x = self.conv2(x)          # 输出: [B,20,10,10]
        x = F.relu(x)
        
        x = x.view(x.size(0), -1)  # 展平: [B, 20*10*10=2000]
        x = self.fc1(x)            # 输出: [B,500]
        x = F.relu(x)
        x = self.fc2(x)            # 输出: [B,10]
        return F.log_softmax(x, dim=1)  # 输出 log probabilities

# 5. 初始化模型和优化器
model = Digit().to(DEVICE)
optimizer = optim.Adam(model.parameters())

# 6. 训练函数
def train_model(model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)  # 使用 nll_loss 替代 cross_entropy
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:  # 调整打印频率以便观察
            print(f"Epoch {epoch} | Batch {batch_idx} | Loss: {loss.item():.6f}")

# 7. 测试函数
def test_model(model, device, test_loader):
    model.eval()
    test_loss = 0.0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target).item()  # 使用 nll_loss
            pred = output.argmax(dim=1)
            correct += pred.eq(target).sum().item()
    
    test_loss /= len(test_loader)  # 除以批次数量而非总样本数
    accuracy = 100.0 * correct / len(test_loader.dataset)
    print(f"Test Loss: {test_loss:.4f} | Accuracy: {accuracy:.2f}%\n")

# 8. 训练和测试循环
for epoch in range(1, EPOCHS + 1):
    train_model(model, DEVICE, train_loader, optimizer, epoch)
    test_model(model, DEVICE, test_loader)  # 修正函数名和加载器名

Epoch 1 | Batch 0 | Loss: 2.296793
Epoch 1 | Batch 100 | Loss: 0.390136
Epoch 1 | Batch 200 | Loss: 0.553724
Epoch 1 | Batch 300 | Loss: 0.123437
Epoch 1 | Batch 400 | Loss: 0.022159
Epoch 1 | Batch 500 | Loss: 0.006714
Epoch 1 | Batch 600 | Loss: 0.083356
Epoch 1 | Batch 700 | Loss: 0.144991
Epoch 1 | Batch 800 | Loss: 0.045870
Epoch 1 | Batch 900 | Loss: 0.006900
Epoch 1 | Batch 1000 | Loss: 0.067014
Epoch 1 | Batch 1100 | Loss: 0.006246
Epoch 1 | Batch 1200 | Loss: 0.004457
Epoch 1 | Batch 1300 | Loss: 0.209896
Epoch 1 | Batch 1400 | Loss: 0.053329
Epoch 1 | Batch 1500 | Loss: 0.005773
Epoch 1 | Batch 1600 | Loss: 0.006275
Epoch 1 | Batch 1700 | Loss: 0.007195
Epoch 1 | Batch 1800 | Loss: 0.535523
