In [1]:
import numpy as np
from tqdm import tqdm
import torch
import torch.nn.functional as F
from torch import nn, optim
from torch.nn import init, Parameter
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from logger import MeterLogger
import time

准备数据和定义超参数

In [2]:
batch_size = 128
learning_rate = 1e-2
num_epoches = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # whether GPU is supportted

In [3]:
train_dataset = datasets.MNIST('../_data/mnist', train=True, transform=transforms.ToTensor(), download=False)
test_dataset = datasets.MNIST('../_data/mnist', train=False, transform=transforms.ToTensor())

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [4]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # 28*28: c_in=1, c_out=50, ker=5*5 -> out=12*12, from (28-5+1)/2
        self.conv2d1 = nn.Conv2d(1, 50, 5, stride=2)
        init.kaiming_normal_(self.conv2d1.weight)
        init.constant_(self.conv2d1.bias, 0)
        # 12*12: c_in=50, c_out=50, ker=5*5 -> out=4*4, from (12-5+1)/2
        self.conv2d2 = nn.Conv2d(50, 50, 5, stride=2)
        init.kaiming_normal_(self.conv2d2.weight)
        init.constant_(self.conv2d2.bias, 0)
        # 800 = channel * w * h = 50 * 4 * 4
        self.linear1 = nn.Linear(800, 512)
        init.kaiming_normal_(self.linear1.weight)
        init.constant_(self.linear1.bias, 0)
        self.linear2 = nn.Linear(512, 10)
        init.kaiming_normal_(self.linear2.weight)
        init.constant_(self.linear2.bias, 0)
    
    def forward(self, x):
        x = self.conv2d1(x)
        x = F.relu(x)
        x = self.conv2d2(x)
        x = F.relu(x)
        x = x.reshape(-1, 800)
        x = self.linear1(x)
        x = F.relu(x)
        x = self.linear2(x)
        return x

In [5]:
model = CNN().to(device)

In [6]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=0.0005)

In [7]:
def train(model, train_loader, criterion, optimizer, device):
    """ Epoch operation in training phase """
    model.train()
    train_loss = 0.
    train_acc = 0.
    for batch in tqdm(train_loader, desc='  - (Training)  '):
        sample, label = batch
        sample, label = sample.to(device), label.to(device)
        # forward propagation
        out = model(sample)
        loss = criterion(out, label)
        train_loss += loss * label.size(0)
        _, pred = torch.max(out, 1)
        num_correct = (pred == label).sum().item()
        train_acc += num_correct
        # backward propagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    return train_loss, train_acc

In [8]:
begin = time.time()
train_loss, train_acc = np.zeros(num_epoches), np.zeros(num_epoches)
for epoch in range(num_epoches):
#     print(f'Epoch {epoch+1}', '*' * 20)
    loss, acc = train(model, train_loader, criterion, optimizer, device)
    train_loss[epoch], train_acc[epoch] = loss/len(train_dataset), acc/len(train_dataset)
#     print(f'Finish {epoch+1} training epoch, Loss: {train_loss[epoch]:.6f}, Acc: {train_acc[epoch]:.6f}')
print(f"Total training time: {(time.time()-begin)/60:.2f} mins.")

  - (Training)  : 100%|██████████| 469/469 [00:04<00:00, 96.40it/s]
  - (Training)  : 100%|██████████| 469/469 [00:04<00:00, 97.20it/s]
  - (Training)  : 100%|██████████| 469/469 [00:04<00:00, 97.68it/s]
  - (Training)  : 100%|██████████| 469/469 [00:04<00:00, 97.22it/s]
  - (Training)  : 100%|██████████| 469/469 [00:06<00:00, 72.79it/s]
  - (Training)  : 100%|██████████| 469/469 [00:06<00:00, 74.25it/s]
  - (Training)  : 100%|██████████| 469/469 [00:05<00:00, 84.76it/s]
  - (Training)  : 100%|██████████| 469/469 [00:06<00:00, 71.19it/s]
  - (Training)  : 100%|██████████| 469/469 [00:06<00:00, 74.01it/s]
  - (Training)  : 100%|██████████| 469/469 [00:06<00:00, 73.72it/s]

Total training time: 0.95 mins.



