# 导入PyTorch库

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# 数据集加载

使用[MNIST数据集](https://www.kaggle.com/datasets/hojjatk/mnist-dataset)作为训练样本

In [2]:
# 数据预处理
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# 加载MNIST数据集
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True)

testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=32, shuffle=False)

# 查看数据集大小
print(f'Train set size: {len(trainset)}')
print(f'Test set size: {len(testset)}')


Train set size: 60000
Test set size: 10000


# 神经网络定义

In [7]:
# 定义简单的神经网络
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(28*28, 128) # 输入层到隐含层
        self.fc2 = nn.Linear(128, 64)   # 隐含层到隐含层
        self.fc3 = nn.Linear(64, 10)   # 隐含层到输出层

    def forward(self, x):
        x = x.view(-1, 28*28)  # 将图像展平
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

net = SimpleNN()
print(net)

# 查看每个层的初始权重和偏置
print("Default initialized weights and biases:")
for name, param in net.named_parameters():
    if 'weight' in name:
        print(f'{name} initialized as:\n{param.data}\n')
    elif 'bias' in name:
        print(f'{name} initialized as:\n{param.data}\n')


SimpleNN(
  (fc1): Linear(in_features=784, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=10, bias=True)
)
Default initialized weights and biases:
fc1.weight initialized as:
tensor([[ 0.0207, -0.0045,  0.0141,  ...,  0.0060,  0.0299, -0.0118],
        [-0.0219, -0.0326,  0.0286,  ..., -0.0280, -0.0178,  0.0306],
        [-0.0071, -0.0327,  0.0153,  ...,  0.0099,  0.0126, -0.0348],
        ...,
        [-0.0208, -0.0355,  0.0034,  ...,  0.0041,  0.0107,  0.0049],
        [-0.0188,  0.0219,  0.0192,  ..., -0.0016, -0.0178, -0.0255],
        [-0.0318, -0.0277, -0.0231,  ...,  0.0186, -0.0181,  0.0257]])

fc1.bias initialized as:
tensor([ 1.5798e-02,  2.5667e-02,  3.1254e-03,  1.8888e-02, -2.2549e-02,
        -1.0482e-02,  1.1059e-03, -3.2768e-02, -3.4180e-02, -2.6660e-02,
        -1.2232e-02,  3.0705e-02, -1.8898e-02, -1.5001e-02, -1.0096e-02,
         2.6169e-03, -1.3435e-02, -2.9629e-02, -7.2478e-03,

# 定义损失函数

In [4]:
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss() # 使用交叉熵损失函数
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9) # 使用SGD优化器，学习率为0.01，动量为0.9

# 训练神经网络

In [5]:
# 训练神经网络
for epoch in range(5):  # 训练5个epoch
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # 将梯度清零
        optimizer.zero_grad()

        # 前向传播
        outputs = net(inputs)
        loss = criterion(outputs, labels)

        # 反向传播
        loss.backward()

        # 更新参数
        optimizer.step()

        # 打印统计信息
        running_loss += loss.item()
        if i % 200 == 199:    # 每200个小批量打印一次
            print(f'[Epoch {epoch + 1}, Batch {i + 1}] loss: {running_loss / 200:.3f}')
            running_loss = 0.0

print('Finished Training')

[Epoch 1, Batch 200] loss: 0.988
[Epoch 1, Batch 400] loss: 0.421
[Epoch 1, Batch 600] loss: 0.348
[Epoch 1, Batch 800] loss: 0.303
[Epoch 1, Batch 1000] loss: 0.264
[Epoch 1, Batch 1200] loss: 0.256
[Epoch 1, Batch 1400] loss: 0.239
[Epoch 1, Batch 1600] loss: 0.207
[Epoch 1, Batch 1800] loss: 0.201
[Epoch 2, Batch 200] loss: 0.167
[Epoch 2, Batch 400] loss: 0.180
[Epoch 2, Batch 600] loss: 0.173
[Epoch 2, Batch 800] loss: 0.143
[Epoch 2, Batch 1000] loss: 0.152
[Epoch 2, Batch 1200] loss: 0.153
[Epoch 2, Batch 1400] loss: 0.152
[Epoch 2, Batch 1600] loss: 0.133
[Epoch 2, Batch 1800] loss: 0.140
[Epoch 3, Batch 200] loss: 0.116
[Epoch 3, Batch 400] loss: 0.123
[Epoch 3, Batch 600] loss: 0.112
[Epoch 3, Batch 800] loss: 0.112
[Epoch 3, Batch 1000] loss: 0.122
[Epoch 3, Batch 1200] loss: 0.113
[Epoch 3, Batch 1400] loss: 0.114
[Epoch 3, Batch 1600] loss: 0.117
[Epoch 3, Batch 1800] loss: 0.110
[Epoch 4, Batch 200] loss: 0.098
[Epoch 4, Batch 400] loss: 0.091
[Epoch 4, Batch 600] loss: 0

# 测试训练效果

In [6]:
# 在测试集上评估网络
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct / total}%')

Accuracy of the network on the 10000 test images: 97.27%
