In [1]:
import numpy as np
import matplotlib.pyplot as plt
import idx2numpy
from tqdm import tqdm

import torch
from torch import nn
from torch import optim
from torch.utils.data import TensorDataset, DataLoader

In [2]:
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"device: {device}")

device: cuda


In [3]:
class MNIST(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Flatten(),
            nn.Linear(64 * 7 * 7, 128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, 10),
        )

        for m in self.net.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.Linear):
                nn.init.xavier_normal_(m.weight)
                nn.init.zeros_(m.bias)

    def forward(self, x):
        return self.net(x)

In [4]:
def one_hot(labels, num_classes):
    # one-hot 编码
    one_hot_labels = np.zeros((labels.shape[0], num_classes))
    for i in range(labels.shape[0]):
        one_hot_labels[i, labels[i]] = 1
    return one_hot_labels

In [5]:
datapath = "../data/MNIST/train-images.idx3-ubyte"
labelpath = "../data/MNIST/train-labels.idx1-ubyte"
modelpath = "../model/task2/MNIST.pth"

data = idx2numpy.convert_from_file(datapath)    # (60000, 28, 28)
data = np.expand_dims(data, axis=1)             # 添加通道维度 -> (60000, 1, 28, 28)
data = torch.from_numpy(data).float()

label = idx2numpy.convert_from_file(labelpath)
one_hot_labels = one_hot(label, 10)
one_hot_labels = torch.from_numpy(one_hot_labels).float()


  data = torch.from_numpy(data).float()


In [None]:
valid_datapath = "../valid/MNIST/t10k-images.idx3-ubyte"
valid_labelpath = "../valid/MNIST/t10k-labels.idx1-ubyte"

valid_data = idx2numpy.convert_from_file(valid_datapath)
valid_data = np.expand_dims(valid_data, axis=1)
valid_data = torch.from_numpy(valid_data).float()

valid_label = idx2numpy.convert_from_file(valid_labelpath)
valid_one_hot_labels = one_hot(valid_label, 10)
valid_one_hot_labels = torch.from_numpy(valid_one_hot_labels).float()

In [None]:
train_dataset = TensorDataset(data, one_hot_labels)
valid_dataset = TensorDataset(valid_data, valid_one_hot_labels)

In [9]:
epochs = 100            # 训练轮数

batch_size = 128        # 批大小
inital_lr = 0.001       # 初始学习率
lr_patience = 10        # 学习率衰减的耐心
lr_decay = 0.5          # 学习率衰减系数

best_accuracy = 0.0     # 最佳准确率

In [None]:
train_loader = DataLoader(train_dataset, batch_size, True)
valid_loader = DataLoader(valid_dataset, batch_size, False)

model = MNIST().to(device)
loss_func = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=inital_lr, weight_decay=1e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=lr_decay, patience=lr_patience)

In [None]:
pbar = tqdm(range(epochs), desc="Training")
for i in pbar:
    model.train()
    running_loss = 0.0
    test_loss = 0.0
    accuracy = 0.0
    for x, y in train_loader:
        # 加载进GPU
        x = x.to(device)
        y = y.to(device)
        # 清空梯度
        optimizer.zero_grad()
        # 前向传播
        output = model(x)
        # 计算损失
        loss = loss_func(output, y)
        running_loss += loss.item()
        # 反向传播
        loss.backward()
        # 更新参数
        optimizer.step()

    # 计算验证集损失和准确率
    model.eval()
    with torch.no_grad():
        for x, y in valid_loader:
            x = x.to(device)
            y = y.to(device)
            # 预测
            pred = model(x)
            # 计算损失
            loss = loss_func(pred, y)
            test_loss += loss.item()
            # 计算准确率
            accuracy += torch.sum(torch.argmax(pred, dim=1) == torch.argmax(y, dim=1)).item()

    running_loss /= len(train_loader)
    test_loss /= len(valid_loader)
    accuracy /= len(valid_loader.dataset)
    scheduler.step(test_loss)  # 更新学习率

    if accuracy > best_accuracy:
        best_accuracy = accuracy
        torch.save(model.state_dict(), modelpath)

    pbar.set_postfix(
        loss=running_loss,
        test_loss=test_loss,
        accuracy=f"{accuracy*100:.2f}%",
        best_accuracy=f"{best_accuracy*100:.2f}%",
        lr=optimizer.param_groups[0]['lr'],
    )

In [None]:
print("architecture:", model)
print("param", sum(p.numel() for p in model.parameters()))
print("savepath:", modelpath)
print("best_accuracy:", best_accuracy)

In [11]:
# For interview
interview_data_path = "../valid/MNIST/t10k-images.idx3-ubyte"
interview_label_path = "../valid/MNIST/t10k-labels.idx1-ubyte"

interview_data = idx2numpy.convert_from_file(interview_data_path)
interview_data = np.expand_dims(interview_data, axis=1)
interview_data = torch.from_numpy(interview_data).float().to(device)

interview_label = idx2numpy.convert_from_file(interview_label_path)
interview_labels = one_hot(interview_label, 10)
interview_labels = torch.from_numpy(interview_labels).float().to(device)

In [12]:
# For interview
model.load_state_dict(torch.load(modelpath))

model.eval()
with torch.no_grad():
    pred = model(interview_data)
    pred_labels = torch.argmax(pred, dim=1)
    true_labels = torch.argmax(interview_labels, dim=1)
    accuracy = torch.sum(pred_labels == true_labels).item() / len(interview_data)
    print("Interview Accuracy: {:.2f}%".format(accuracy * 100))

Interview Accuracy: 99.31%
