# task
1. 使用pytorch搭建神经网络模型，实现对KMNIST数据集的训练。

https://pytorch.org/vision/stable/generated/torchvision.datasets.KMNIST.html#torchvision.datasets.KMNIST

image.png

2. 尝试调整模型结构（变更神经元数量，增加隐藏层）来提升模型预测的准确率

3. 调试超参数，观察学习率和批次大小对训练的影响。

使用pytorch 搭建网络模型，实现KMNIST数据集的训练

搭建神经网络模型

1. 导入包 
2. 加载数据集
3. 定义模型 
4. 定制损失函数和优化器
5. 训练模型



In [None]:
# 导包
import torch 
import torch.nn as nn 
import torchvision
from torchvision.datasets import KMNIST
from torchvision.transforms.V2  import ToTensor
import torch.optim as optim
from torch.utils.data import DataLoader

In [None]:
# 加载数据
train_data = KMNIST(root='./data', train=True, download=True, transform=ToTensor())
test_data = KMNIST(root='./data', train=False, download=True, transform=ToTensor())

In [None]:
#定义超参数
LR = 1e-3
epochs = 20
BATCH_SIZE = 256


In [None]:
#使用数据加载器，批量加载数据
train_dl = DataLoader(train_data , batch_size=BATCH_SIZE, shuffle=True)
test_dl = DataLoader(test_data , batch_size=BATCH_SIZE, shuffle=True)

In [None]:
#定义模型
model = nn.Sequential(
    nn.Linear(784, 512),
    nn.BatchNorm1d(512),  # 新增批标准化
    nn.ReLU(),
    nn.Dropout(0.4),      # 调整丢弃率
    
    nn.Linear(512, 256),
    nn.BatchNorm1d(256),
    nn.ReLU(),
    nn.Dropout(0.3),
    
    nn.Linear(256, 128),
    nn.BatchNorm1d(128),
    nn.ReLU(),
    nn.Dropout(0.2),
    
    nn.Linear(128, 10)
)

In [None]:
#loss function
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parmeters(),lr=LR)
 

In [None]:
for epoch in range(epochs):
    # 提取训练数据
    for data, target in train_dl:
        # 前向运算
        output = model(data.reshape(-1, 784))
        # 计算损失
        loss = loss_fn(output, target)
        # 反向传播
        optimizer.zero_grad()  # 所有参数梯度清零
        loss.backward()     # 计算梯度（参数.grad）
        optimizer.step()    # 更新参数

    print(f'Epoch:{epoch} Loss: {loss.item()}')

# 测试

correct = 0
total = 0
with torch.no_grad():  # 不计算梯度
    for data, target in test_dl:
        output = model(data.reshape(-1, 784))
        _, predicted = torch.max(output, 1)  # 返回每行最大值和索引
        total += target.size(0)  # size(0) 等效 shape[0]
        correct += (predicted == target).sum().item()

print(f'Accuracy: {correct/total*100}%')

In [None]:
# 导包
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.datasets import KMNIST
from torchvision.transforms import Compose, ToTensor, Normalize
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt

# 数据预处理
transform = Compose([
    ToTensor(),
    Normalize((0.5,), (0.5,))  # 归一化到[-1,1]范围
])

# 加载数据集
train_data = KMNIST(
    root='./data', 
    train=True,
    download=True,
    transform=transform
)
test_data = KMNIST(
    root='./data',
    train=False,
    download=True,
    transform=transform
)

# 超参数设置
BATCH_SIZE = 256
LR = 0.001
EPOCHS = 20

# 创建数据加载器
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False)

# 优化后的模型结构
class AdvancedNet(nn.Module):
    def __init__(self):
        super(AdvancedNet, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(784, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.4),
            
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.3),
            
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.2),
            
            nn.Linear(128, 10)
        )
    
    def forward(self, x):
        return self.model(x.view(-1, 784))

model = AdvancedNet()
optimizer = optim.Adam(model.parameters(), lr=LR, weight_decay=1e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
criterion = nn.CrossEntropyLoss()

# 训练记录
train_loss = []
test_acc = []
best_acc = 0.0

# 训练循环
for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0
    
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    
    scheduler.step()
    
    # 验证
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in test_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    accuracy = 100 * correct / total
    train_loss.append(running_loss/len(train_loader))
    test_acc.append(accuracy)
    
    # 保存最佳模型
    if accuracy > best_acc:
        best_acc = accuracy
        torch.save(model.state_dict(), 'best_model.pth')
    
    print(f'Epoch [{epoch+1}/{EPOCHS}] Loss: {train_loss[-1]:.4f} Acc: {accuracy:.2f}%')

# 可视化训练结果
plt.figure(figsize=(12,5))
plt.subplot(1,2,1)
plt.plot(train_loss, label='Training Loss')
plt.legend()
plt.subplot(1,2,2)
plt.plot(test_acc, label='Test Accuracy')
plt.legend()
plt.savefig('training_result.png')
plt.show()

# 加载最佳模型测试
model.load_state_dict(torch.load('best_model.pth'))
model.eval()
correct, total = 0, 0
with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Final Test Accuracy: {100*correct/total:.2f}%')