In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
import numpy as np
import random

# 设置随机数种子
seed = 666
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)

np.random.seed(seed)
random.seed(seed)

# 保证CuDNN可重复性
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# 检查是否有可用的GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Using device: {device}')

# 定义MLP模型
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)  # 第一隐藏层
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)  # 第二隐藏层
        self.fc3 = nn.Linear(hidden_size2, output_size)  # 输出层
        self.dropout = nn.Dropout(0.5)  # Dropout层，防止过拟合

    def forward(self, x):
        x = x.view(-1, 28*28)  # 将28x28的图片展开成一维向量
        x = F.relu(self.fc1(x))  # 第一隐藏层 + ReLU
        x = self.dropout(x)      # Dropout层
        x = F.relu(self.fc2(x))  # 第二隐藏层 + ReLU
        x = self.fc3(x)          # 输出层
        return x

# 超参数
input_size = 784  # MNIST 28x28 图像，展开成784维
hidden_size1 = 256  # 第一隐藏层神经元数量
hidden_size2 = 128  # 第二隐藏层神经元数量
output_size = 10    # MNIST 共有10个分类（数字0-9）
learning_rate = 0.001
batch_size = 64
num_epochs = 10
validation_split = 0.1  # 验证集占比

# 数据预处理
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))  # 标准化
])

# 加载MNIST数据集
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)


# 将训练集划分为训练集和验证集
train_size = int((1 - validation_split) * len(train_dataset))
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

print(train_size,val_size)

# 数据加载器
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)

# 初始化模型、损失函数和优化器
model = MLP(input_size, hidden_size1, hidden_size2, output_size).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# 添加学习率调度器
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

Using device: cuda
54000 6000


In [6]:
for epoch in range(num_epochs):
    # 训练阶段
    model.train()  # 切换到训练模式
    running_loss = 0.0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)  # 将数据和标签移到GPU
        
        # 前向传播
        outputs = model(data)
        loss = criterion(outputs, target)

        # 反向传播和优化
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    
    # 学习率调度器步进
    scheduler.step()

    # 打印训练损失
    print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {running_loss/len(train_loader):.4f}')
    
    # 验证阶段
    model.eval()  # 切换到评估模式，不进行梯度更新
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():  # 不需要梯度计算
        for data, target in val_loader:
            data, target = data.to(device), target.to(device)  # 将验证数据移到GPU
            
            # 前向传播
            outputs = model(data)
            loss = criterion(outputs, target)
            val_loss += loss.item()

            # 计算准确率
            _, predicted = torch.max(outputs.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()

    val_accuracy = 100 * correct / total
    print(f'Epoch [{epoch+1}/{num_epochs}], Validation Loss: {val_loss/len(val_loader):.4f}, Validation Accuracy: {val_accuracy:.2f}%')


Epoch [1/10], Training Loss: 0.3362
Epoch [1/10], Validation Loss: 0.1749, Validation Accuracy: 94.45%
Epoch [2/10], Training Loss: 0.1843
Epoch [2/10], Validation Loss: 0.1390, Validation Accuracy: 95.62%
Epoch [3/10], Training Loss: 0.1517
Epoch [3/10], Validation Loss: 0.1146, Validation Accuracy: 96.52%
Epoch [4/10], Training Loss: 0.1338
Epoch [4/10], Validation Loss: 0.1168, Validation Accuracy: 96.40%
Epoch [5/10], Training Loss: 0.1232
Epoch [5/10], Validation Loss: 0.1155, Validation Accuracy: 96.57%
Epoch [6/10], Training Loss: 0.0885
Epoch [6/10], Validation Loss: 0.0862, Validation Accuracy: 97.40%
Epoch [7/10], Training Loss: 0.0793
Epoch [7/10], Validation Loss: 0.0837, Validation Accuracy: 97.47%
Epoch [8/10], Training Loss: 0.0766
Epoch [8/10], Validation Loss: 0.0816, Validation Accuracy: 97.45%
Epoch [9/10], Training Loss: 0.0723
Epoch [9/10], Validation Loss: 0.0836, Validation Accuracy: 97.52%
Epoch [10/10], Training Loss: 0.0710
Epoch [10/10], Validation Loss: 0.07

In [4]:
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

model.eval()
test_correct = 0
test_total = 0
with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        outputs = model(data)
        _, predicted = torch.max(outputs.data, 1)
        test_total += target.size(0)
        test_correct += (predicted == target).sum().item()

test_accuracy = 100 * test_correct / test_total
print(f'Test Accuracy: {test_accuracy:.2f}%')

Test Accuracy: 97.87%
