In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


In [2]:
# 定义数据预处理，包括转换为Tensor并标准化
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# 下载并加载训练集，设定批量大小为64，打乱数据
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

# 下载并加载测试集，设定批量大小为64，不打乱数据
testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)


In [3]:
# 定义卷积神经网络模型
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # 定义第一个卷积层，输入通道为1，输出通道为32，卷积核大小为3x3
        # 输入: [batch_size, 1, 28, 28]
        # 输出: [batch_size, 32, 26, 26] (因为卷积核大小为3x3, 步幅为1, 无填充)
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        
        # 定义第一个最大池化层，池化窗口大小为2x2
        # 输入: [batch_size, 32, 26, 26]
        # 输出: [batch_size, 32, 13, 13] (因为池化窗口大小为2x2, 步幅为2)
        self.pool1 = nn.MaxPool2d(2)
        
        # 定义第二个卷积层，输入通道为32，输出通道为64，卷积核大小为3x3
        # 输入: [batch_size, 32, 13, 13]
        # 输出: [batch_size, 64, 11, 11] (因为卷积核大小为3x3, 步幅为1, 无填充)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        
        # 定义第二个最大池化层，池化窗口大小为2x2
        # 输入: [batch_size, 64, 11, 11]
        # 输出: [batch_size, 64, 5, 5] (因为池化窗口大小为2x2, 步幅为2)
        self.pool2 = nn.MaxPool2d(2)
        
        # 定义 dropout 层，丢弃概率为0.25
        self.dropout1 = nn.Dropout2d(0.25)
        
        # 定义第一个全连接层，将输入特征数 64*5*5 转换为128
        # 输入: [batch_size, 64*5*5]
        # 输出: [batch_size, 128]
        self.fc1 = nn.Linear(64 * 5 * 5, 128)
        
        # 定义 dropout 层，丢弃概率为0.5
        self.dropout2 = nn.Dropout(0.5)
        
        # 定义第二个全连接层，将输入特征数 128 转换为10（10个类别）
        # 输入: [batch_size, 128]
        # 输出: [batch_size, 10]
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)  # 第一个卷积层
        x = torch.relu(x)  # ReLU 激活函数
        x = self.pool1(x)  # 第一个最大池化层
        x = self.conv2(x)  # 第二个卷积层
        x = torch.relu(x)  # ReLU 激活函数
        x = self.pool2(x)  # 第二个最大池化层
        x = self.dropout1(x)  # Dropout 层
        x = x.view(x.size(0), -1)  # 将特征展平成一维向量
        x = self.fc1(x)  # 第一个全连接层
        x = torch.relu(x)  # ReLU 激活函数
        x = self.dropout2(x)  # Dropout 层
        x = self.fc2(x)  # 第二个全连接层
        return x

net = Net().to(device)  # 实例化网络并移到 GPU
print(net)  # 打印网络结构

Net(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (dropout1): Dropout2d(p=0.25, inplace=False)
  (dropout2): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=1600, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)


In [4]:
# 定义损失函数为交叉熵损失
criterion = nn.CrossEntropyLoss()

# 定义优化器为随机梯度下降，学习率为0.01，动量为0.9
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)


In [5]:
# 训练模型
for epoch in range(10):  # 训练10个epoch
    running_loss = 0.0  # 初始化损失值
    for i, data in enumerate(trainloader, 0):  # 遍历训练数据集
        inputs, labels = data  # 获取输入数据和标签
        inputs, labels = inputs.to(device), labels.to(device)  # 将数据移动到GPU上
        
        optimizer.zero_grad()  # 将梯度缓存清零
        outputs = net(inputs)  # 前向传播
        loss = criterion(outputs, labels)  # 计算损失
        loss.backward()  # 反向传播计算梯度
        optimizer.step()  # 更新参数
        running_loss += loss.item()  # 累积损失

    print(f"[{epoch + 1}, {i + 1}] loss: {running_loss / 100}")  # 打印平均损失
    running_loss = 0.0  # 重置损失值

print('Finished Training')  # 训练完成


[1, 938] loss: 3.361268589682877
[2, 938] loss: 1.0603206931520253
[3, 938] loss: 0.81486511511961
[4, 938] loss: 0.6786605882644653
[5, 938] loss: 0.5897748069348745
[6, 938] loss: 0.5474595459620468
[7, 938] loss: 0.4845461610052735
[8, 938] loss: 0.4427096894546412
[9, 938] loss: 0.4074727630848065
[10, 938] loss: 0.3852352365129627
Finished Training


In [9]:
# 测试模型
correct = 0  # 初始化正确预测数
total = 0  # 初始化总数
with torch.no_grad():  # 禁用梯度计算
    for data in testloader:  # 遍历测试数据集
        inputs, labels = data  # 获取输入数据和标签
        print(f"labels: {labels}")
        inputs, labels = inputs.to(device), labels.to(device)  # 将数据移动到GPU上
        outputs = net(inputs)  # 前向传播
        _, predicted = torch.max(outputs.data, 1)  # 获取预测结果
        print(f"predicted: {predicted}")
        total += labels.size(0)  # 更新总数
        correct += (predicted == labels).sum().item()  # 更新正确预测数

print(f'Accuracy of the network on the 10000 test inputs: {100 * correct / total}%')  # 打印测试集上的准确率

labels: tensor([7, 2, 1, 0, 4, 1, 4, 9, 5, 9, 0, 6, 9, 0, 1, 5, 9, 7, 3, 4, 9, 6, 6, 5,
        4, 0, 7, 4, 0, 1, 3, 1, 3, 4, 7, 2, 7, 1, 2, 1, 1, 7, 4, 2, 3, 5, 1, 2,
        4, 4, 6, 3, 5, 5, 6, 0, 4, 1, 9, 5, 7, 8, 9, 3])
predicted: tensor([7, 2, 1, 0, 4, 1, 4, 9, 5, 9, 0, 6, 9, 0, 1, 5, 9, 7, 5, 4, 9, 6, 6, 5,
        4, 0, 7, 4, 0, 1, 3, 1, 3, 4, 7, 2, 7, 1, 2, 1, 1, 7, 4, 2, 3, 5, 1, 2,
        4, 4, 6, 3, 5, 5, 6, 0, 4, 1, 9, 5, 7, 8, 5, 3], device='cuda:0')
labels: tensor([7, 4, 6, 4, 3, 0, 7, 0, 2, 9, 1, 7, 3, 2, 9, 7, 7, 6, 2, 7, 8, 4, 7, 3,
        6, 1, 3, 6, 9, 3, 1, 4, 1, 7, 6, 9, 6, 0, 5, 4, 9, 9, 2, 1, 9, 4, 8, 7,
        3, 9, 7, 4, 4, 4, 9, 2, 5, 4, 7, 6, 7, 9, 0, 5])
predicted: tensor([7, 4, 6, 4, 3, 0, 7, 0, 2, 9, 1, 7, 3, 2, 9, 7, 7, 6, 2, 7, 8, 4, 7, 3,
        6, 1, 3, 6, 9, 3, 1, 4, 1, 7, 6, 9, 6, 0, 5, 4, 9, 9, 2, 6, 9, 4, 8, 7,
        3, 9, 7, 4, 4, 4, 9, 2, 5, 4, 7, 6, 7, 9, 0, 5], device='cuda:0')
labels: tensor([8, 5, 6, 6, 5, 7, 8, 1, 0, 1, 6, 4, 6, 7, 3,

In [11]:
a = torch.randn(4, 4)
print(f"a: {a}")
_, predicted = torch.max(a, 1)
print(f"predicted: {predicted}")
print(f"-: {_}")

a: tensor([[-0.7635,  0.2213,  0.5510,  0.3903],
        [ 2.3035, -1.3630,  1.9605, -1.1413],
        [ 0.9520, -0.0111,  0.0792, -1.1584],
        [ 0.6448, -0.5473, -1.0915, -0.8756]])
predicted: tensor([2, 0, 0, 0])
-: tensor([0.5510, 2.3035, 0.9520, 0.6448])
