In [37]:
import torch
import numpy as np
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import time

In [46]:
def load_data():
    # 加载数据MNIST, 训练集和测试集(train=True/False), 转换为张量(附带归一化 /255), 下载数据
    train_data = datasets.MNIST(root='data', train=True, transform=transforms.ToTensor(), download=True)
    test_data = datasets.MNIST(root='data', train=False, transform=transforms.ToTensor(), download=True)
    
    # 数据加载器 DataLoader, 用于将数据分批次加载, shuffle=True表示打乱数据
    train_loader = DataLoader(dataset=train_data, batch_size=256, shuffle=True)
    test_loader = DataLoader(dataset=test_data, batch_size=256, shuffle=False)
    return train_loader, test_loader

In [47]:
class MLP(torch.nn.Module):
    """
        
    """
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = torch.nn.Linear(28*28, 256)
        self.fc2 = torch.nn.Linear(256, 10)
        self.relu = torch.nn.ReLU()
        self.softmax = torch.nn.Softmax(dim=1)
        
    def forward(self, x):
        x = x.view(-1, 28*28)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.softmax(x)
        return x

In [48]:
def train(model, train_loader, optimizer, criterion, device):
    model.train()
    for i, (images, labels) in enumerate(train_loader):
        # 记录训练时间
        start_time = time.time()
        # 优化器梯度清零
        optimizer.zero_grad()
        # 数据加载到设备
        images, labels = images.to(device), labels.to(device)
        # 前向传播
        outputs = model(images)
        # 计算损失, 反向传播, 更新参数
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        # 打印训练信息
        if (i+1) % 200 == 0:
            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Time: {:.4f}'.format(epoch+1, num_epochs, i+1, len(train_loader), loss.item(), time.time()-start_time))

In [49]:
def test(model, test_loader, device):
    model.eval()
    correct = 0
    total = 0
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))

In [50]:
# 开始训练
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("device: ", device)
train_loader, test_loader = load_data()
model = MLP().to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
num_epochs = 3

for epoch in range(num_epochs):
    train(model, train_loader, optimizer, criterion, device)
    test(model, test_loader, device)

device:  cuda
Epoch [1/3], Step [200/235], Loss: 1.5699, Time: 0.0000
Accuracy of the network on the 10000 test images: 92.1 %
Epoch [2/3], Step [200/235], Loss: 1.5404, Time: 0.0000
Accuracy of the network on the 10000 test images: 93.86 %
Epoch [3/3], Step [200/235], Loss: 1.5274, Time: 0.0156
Accuracy of the network on the 10000 test images: 94.71 %
