2021 Kosuke Mori and Takahiro Shinozaki @ Tokyo Tech

Quick introduction of image recognition based on neural network with MNIST dataset

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, random_split
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import matplotlib.pyplot as plt

# Check if GPU is available

In [None]:
if torch.cuda.is_available():
    print('CUDA（GPU）is available')
    device = 'cuda'
else:
    print('CUDA（GPU）is not available')
    device = 'cpu'

# Load the MNIST dataset

In [None]:
dataset_traindev = torchvision.datasets.MNIST(
    root='./data', train=True, download=True, transform=transforms.ToTensor()
)
dataset_test = torchvision.datasets.MNIST(
    root='./data', train=False, download=True, transform=transforms.ToTensor()
)

# Display data

In [None]:
img, label = dataset_traindev[0]
c, h, w = img.size()
print(f'Image data info: C={c}, H={h}, W={w} ({img.size()})')

In [None]:
fig = plt.figure(figsize=(10, 5))
for i in range(10):
    for j in range(len(dataset_traindev)):
        img, label = dataset_traindev[j]
        if label == i:
            break
    ax = fig.add_subplot(2, 5, i+1)
    ax.axis('off')
    ax.set_title('Number:' + str(label))
    ax.imshow(img.squeeze(0), cmap='gray')
fig.subplots_adjust(wspace=0.1, hspace=0.1)
fig.suptitle('Image examples in MNIST', fontsize=18)
plt.show()

# Prepare training, development, and test sets

In [None]:
TrainRate = 0.8
BatchSize = 512

# Split the dataset for training into the training and development sets
num_train = int(len(dataset_traindev) * TrainRate)
num_val = len(dataset_traindev) - num_train
dataset_train, dataset_dev = random_split(dataset_traindev, [num_train, num_val])

# Prepare data loader for mini-batch training
loader_train = DataLoader(dataset_train, batch_size=BatchSize, shuffle=True, drop_last=True)
loader_dev = DataLoader(dataset_dev, batch_size=BatchSize)
loader_test = DataLoader(dataset_test, batch_size=BatchSize)

print('# train samples:', len(dataset_train))
print('# development samples:', len(dataset_dev))
print('# test samples:', len(dataset_test))

# Define a neural network model

In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.layer1 = nn.Linear(28 * 28, 100)
        self.layer2 = nn.Linear(100, 20)
        self.layer_out = nn.Linear(20, 10)

    def forward(self, z):
        z = nn.Sigmoid()(self.layer1(z))
        z = nn.Sigmoid()(self.layer2(z))
        z = self.layer_out(z)  # Pre-softmax output for nn.CrossEntropyLoss
        return z

In [None]:
# Make an instance of the neural network
model = NeuralNetwork().to(device)
params = model.state_dict()
print('params =', params)
print(params['layer1.weight'].device)

# Prepare an optimizer and a loss function

In [None]:
# optimizer = optim.SGD(model.parameters(), lr=0.02)
optimizer = optim.Adam(model.parameters(), lr=5e-3)
criterion = nn.CrossEntropyLoss()

# Define a step of batch processing

In [None]:
def train_step(x, y):
    model.train() # set train mode

    out = model(x) # forward propagation

    optimizer.zero_grad() # clear gradients
    loss = criterion(out, y)
    loss.backward() # calculate gradient

    optimizer.step() # update network parameters

    with torch.no_grad(): # no gradient computation
        # NOTE: If we omit this softmax operation, the inference results will be the same
        out = nn.Softmax(1)(out) # apply softmax function for the model outputs
        num_crr = (y == torch.argmax(out, 1)).sum()

    return (loss.item(), num_crr.item()) # touple of loss and correct count

def test_step(x, y):
    model.eval() # set evaluation mode

    out = model(x)

    loss = criterion(out, y)

    with torch.no_grad():
        # NOTE: If we omit this softmax operation, the inference results will be the same
        out = nn.Softmax(1)(out) # apply softmax function for the model outputs
        num_crr = (y == torch.argmax(out, 1)).sum()

    return (loss.item(), num_crr.item())

# Train the model

In [None]:
NumEpocs = 20
log = {'train_loss': [], 'train_acc': [], 'dev_loss': [], 'dev_acc': []}
for epoch in range(NumEpocs):
    train_loss_total = 0.0
    train_num_crr_total = 0.0
    dev_loss_total = 0.0
    dev_num_crr_total = 0.0
    num_train_sample = 0
    num_dev_sample = 0

    # mini-batch processings
    for x, y in loader_train:
        b, c, h, w = x.size()
        x, y = x.view(b, c * h * w).to(device), y.to(device)
        loss, num_crr = train_step(x, y)
        train_loss_total += b * loss
        train_num_crr_total += num_crr
        num_train_sample += len(y)

    for x, y in loader_dev:
        b, c, h, w = x.size()
        x, y = x.view(b, c * h * w).to(device), y.to(device)
        loss, num_crr = test_step(x, y)
        dev_loss_total += b * loss
        dev_num_crr_total += num_crr
        num_dev_sample += len(y)

    train_loss_avg = train_loss_total / num_train_sample
    train_acc_avg = train_num_crr_total / num_train_sample
    dev_loss_avg = dev_loss_total / num_dev_sample
    dev_acc_avg = dev_num_crr_total / num_dev_sample

    log['train_loss'].append(train_loss_avg)
    log['dev_loss'].append(dev_loss_avg)
    log['train_acc'].append(train_acc_avg)
    log['dev_acc'].append(dev_acc_avg)

    print(f'Epoch {epoch+1}/{NumEpocs},' \
          f' train_loss: {train_loss_avg:.5f}, train_acc: {train_acc_avg:.5f},' \
          f' dev_loss: {dev_loss_avg:.5f}, dev_acc: {dev_acc_avg:.5f}')

print('Done training')

# Display the learning curve

In [None]:
fig = plt.figure(figsize=(10, 4))
epochs = range(1, NumEpocs+1)
# Draw the loss curve
ax1 = fig.add_subplot(1, 2, 1)
ax1.plot(epochs, log['train_loss'], label='Training')
ax1.plot(epochs, log['dev_loss'], label='Development')
ax1.set_title('Loss curve', fontsize=16)
ax1.set_xlabel('Epochs', fontsize=14)
ax1.set_ylabel('Loss', fontsize=14)
ax1.set_xlim(1, NumEpocs)
ax1.set_ylim(0,)
ax1.grid(linestyle='--')
ax1.legend(loc='upper right')
# Draw the accuracy curve
ax2 = fig.add_subplot(1, 2, 2)
ax2.plot(epochs, log['train_acc'], label='Training')
ax2.plot(epochs, log['dev_acc'], label='Development')
ax2.set_title('Accuracy curve', fontsize=16)
ax2.set_xlabel('Epochs', fontsize=14)
ax2.set_ylabel('Accuracy', fontsize=14)
ax2.set_xlim(1, NumEpocs)
ax2.grid(linestyle='--')
ax2.legend(loc='lower right')
fig.subplots_adjust(wspace=0.3)
plt.show()

# Evaluate the trained model on the test set

In [None]:
model.eval() # set evaluation mode
test_num_crr_total = 0.0
num_test_sample = 0
for x, y in loader_test:
    b, c, h, w = x.size()
    x, y = x.view(b, c * h * w).to(device), y.to(device)
    _, num_crr = test_step(x, y)
    test_num_crr_total += num_crr
    num_test_sample += len(y)
test_acc = test_num_crr_total / num_test_sample
print('test_acc =', test_acc)