In [None]:


# Step 1: 导入必要的库
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import wandb # 用于可视化实验结果,别用matplotlib了！在此之前，先命令行登录wandb
import swanlab # 如果wandb对你来说获取困难
import os
from config import swanlab_api_key
# wandb.login(key = os.environ['wandb_api_key']) # 替换为你的wandb API key,如果你没保存为环境变量
swanlab.login(api_key = swanlab_api_key)

ModuleNotFoundError: No module named 'config'

In [2]:
# Step 2: 数据预处理和加载 MNIST 数据集
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

# 下载训练数据和测试数据
trainset = torchvision.datasets.MNIST(root='../data', train=True, download=True, transform=transform)
testset = torchvision.datasets.MNIST(root='../data', train=False, download=True, transform=transform)

# 创建 DataLoader，batch_size 可调整
batch_size = 64  # 初始 batch_size，后续可以调整
trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True)
testloader = DataLoader(testset, batch_size=batch_size, shuffle=False)

# 此处会下载数据集，存在./data

In [3]:
# Step 3: 定义一个简单的全连接神经网络模型
class SimpleNN(nn.Module):
    def __init__(self, model_size='small'):
        super(SimpleNN, self).__init__()
        
        # 根据模型大小选择不同的隐藏层大小
        if model_size == 'small':
            self.fc1 = nn.Linear(28*28, 128)
            self.fc2 = nn.Linear(128, 64)
        elif model_size == 'medium':
            self.fc1 = nn.Linear(28*28, 256)
            self.fc2 = nn.Linear(256, 128)
        elif model_size == 'large':
            self.fc1 = nn.Linear(28*28, 512)
            self.fc2 = nn.Linear(512, 256)
        
        self.fc3 = nn.Linear(64, 10)  # 输出层，10 类
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = x.view(-1, 28*28)  # 展平输入
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x


In [5]:

# Step 4: 定义训练函数
def train_model(model, trainloader, optimizer, criterion, device,num_epochs=10):
    model.train()
    train_loss = []
    global_step = 0  # 用于记录全局步数
    for epoch in range(num_epochs):
        running_loss = 0.0
        for images, labels in trainloader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            global_step += 1  # 每处理一个 batch，步数加一
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            # wandb.log({"step_loss": loss.item()},step=global_step)
            swanlab.log({"step_loss": loss.item()},step=global_step)
        train_loss.append(running_loss / len(trainloader))
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / len(trainloader):.4f}')
        # wandb.log({"epoch": epoch+1, "loss": running_loss / len(trainloader)})
        swanlab.log({"epoch": epoch+1, "loss": running_loss / len(trainloader)})
    
    return train_loss

# Step 5: 定义测试函数
def test_model(model, testloader,device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Accuracy of the model on the test set: {100 * correct / total:.2f}%')
    # wandb.log({"accuracy": 100 * correct / total})
    swanlab.log({"accuracy": 100 * correct / total})  # 如果使用 swanlab


In [6]:

# Step 6: 设置设备
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Step 7: 执行不同设置的实验
# 设置不同的 batch_size, learning_rate 和 model_size
batch_size = 4 #[16, 64, 128]  测试不同的 batch size
learning_rate = 1e-2 #[0.01, 0.001, 0.0001]  测试不同的 learning rate
model_size = 'small' #['small', 'medium', 'large']  测试不同的 model size
swanlab.login()


Output()

In [7]:

# Step 8: 结果存储
results = {}
# wandb.login()
print(f"Training with batch_size={batch_size}, learning_rate={learning_rate}, model_size={model_size}")
# wandb.init(project="mnist_experiment", config={
#     "batch_size": batch_size,
#     "learning_rate": learning_rate,
#     "model_size": model_size
# })
swanlab.init(project="mnist_experiment", config={
    "batch_size": batch_size,
    "learning_rate": learning_rate,
    "model_size": model_size
})
# 创建模型和优化器
model = SimpleNN(model_size=model_size).to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

# 训练模型
train_loss = train_model(model, trainloader, optimizer, criterion, device,num_epochs=5)

# 测试模型
test_model(model, testloader,device)



Training with batch_size=4, learning_rate=0.01, model_size=small


Output()

Epoch [1/5], Loss: 0.4033
Epoch [2/5], Loss: 0.2808
Epoch [3/5], Loss: 0.2673
Epoch [4/5], Loss: 0.2521
Epoch [5/5], Loss: 0.2520
Accuracy of the model on the test set: 93.71%
