This notebook is for sorting MNIST dataset.

In [None]:
# import MNIST dataset
import torch
from torchvision import datasets, transforms

# Define transforms to apply to the data
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5,), (0.5,))])

# Load the dataset
trainset = datasets.MNIST('MNIST_data/', download=True, train=True, transform=transform)
testset = datasets.MNIST('MNIST_data/', download=True, train=False, transform=transform)

# MPS加速支持
device_mps = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

# Define the data loaders
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

# Define the model
from torch import nn

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__() # ？
        self.fc1 = nn.Linear(784, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 10)

    def forward(self, x):
        x = x.view(-1, 784) # 作用是将输入的二维图像数据展平为一维数据
        x = nn.functional.relu(self.fc1(x)) # fc1是输入层，relu是激活函数
        x = nn.functional.relu(self.fc2(x)) # fc2是隐藏层，relu是激活函数
        x = self.fc3(x) # fc3是输出层
        return x

model = Net().to(device_mps)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Train the model
for epoch in range(10):
    running_loss = 0.0 # 记录每一个epoch的loss
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device_mps), labels.to(device_mps)

        optimizer.zero_grad() # 梯度清零

        outputs = model(inputs) # 前向传播
        loss = criterion(outputs, labels) # 计算loss
        loss.backward() # 反向传播
        optimizer.step() # 更新参数

        running_loss += loss.item() # 累计loss
        if i % 2000 == 1999:
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

    correct = 0      
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images, labels = images.to(device_mps), labels.to(device_mps)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))

Accuracy of the network on the 10000 test images: 88 %
Accuracy of the network on the 10000 test images: 90 %
Accuracy of the network on the 10000 test images: 91 %
Accuracy of the network on the 10000 test images: 92 %
Accuracy of the network on the 10000 test images: 93 %
Accuracy of the network on the 10000 test images: 93 %
Accuracy of the network on the 10000 test images: 93 %
Accuracy of the network on the 10000 test images: 94 %
Accuracy of the network on the 10000 test images: 94 %
Accuracy of the network on the 10000 test images: 95 %


In neuro network, one of the most important task is to interpret the meaning of the intermediate layers, which is called feature visualization.