# setting environment in colab

In [1]:
# install ignite

!pip install pytorch-ignite



In [2]:
# mount my google drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import os

work_dir = '/content/drive/MyDrive/Colab/05_classification_models/'
os.chdir(work_dir)

In [4]:
import torch
import torch.nn

In [5]:
import sys
import numpy as np
import matplotlib.pyplot as plt

from mnist_classification.data_loader import load_mnist

# MNIST classification with FC model

In [6]:
!python main.py --model_fn model_fc_1.pt --model fc

  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)
Train: 48000
Valid: 12000
Test: 10000
Epoch 1 - |param|=46.375 |g_param|=1.394 loss=0.2232 accuracy=0.9343
Validation - loss=0.1453 accuracy=0.9582 best_loss=inf
Epoch 2 - |param|=47.201 |g_param|=0.951 loss=0.1011 accuracy=0.9709
Validation - loss=0.1034 accuracy=0.9668 best_loss=0.1453
Epoch 3 - |param|=47.928 |g_param|=0.845 loss=0.0763 accuracy=0.9769
Validation - loss=0.0868 accuracy=0.9761 best_loss=0.1034
Epoch 4 - |param|=48.594 |g_param|=0.741 loss=0.0564 accuracy=0.9836
Validation - loss=0.1038 accuracy=0.9699 best_loss=0.0868
Epoch 5 - |param|=49.273 |g_param|=0.706 loss=0.0556 accuracy=0.9832
Validation - loss=0.0800 accuracy=0.9763 best_loss=0.0868
Epoch 6 - |param|=49.921 |g_param|=0.600 loss=0.0381 accuracy=0.9875
Validation - loss=0.1027 accuracy=0.9711 best_loss=0.0800
Epoch 7 - |param|=50.543 |g_param|=0.558 loss=0.0368 accuracy=0.9887
Validation - loss=0.0766 accuracy=0.9769 best_loss=0.0800
Epoch 8 

# Test FC model

In [7]:
model_fn = "./model_fc_1.pt"

In [8]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [9]:
def load(fn, device):
    d = torch.load(fn, map_location=device)
    
    return d['config'], d['model']

In [10]:
def plot(x, y_hat):
    for i in range(x.size(0)):
        img = (np.array(x[i].detach().cpu(), dtype='float')).reshape(28,28)

        plt.imshow(img, cmap='gray')
        plt.show()
        print("Predict:", int(torch.argmax(y_hat[i], dim=-1)))

In [11]:
def test(model, x, y, to_be_shown=True):
    model.eval()
    
    with torch.no_grad():
        y_hat = model(x)

        correct_cnt = (y.squeeze() == torch.argmax(y_hat, dim=-1)).sum()
        total_cnt = float(x.size(0))
        
        accuracy = correct_cnt / total_cnt
        print("Accuracy: %.4f" % accuracy)
        
        if to_be_shown:
            plot(x, y_hat)

In [12]:
from main import get_model

train_config, state_dict = load(model_fn, device)

model = get_model(train_config).to(device)
model.load_state_dict(state_dict)

print(model)

FullyConnectedClassifier(
  (layers): Sequential(
    (0): Linear(in_features=784, out_features=500, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
    (2): BatchNorm1d(500, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Linear(in_features=500, out_features=400, bias=True)
    (4): LeakyReLU(negative_slope=0.01)
    (5): BatchNorm1d(400, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Linear(in_features=400, out_features=300, bias=True)
    (7): LeakyReLU(negative_slope=0.01)
    (8): BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): Linear(in_features=300, out_features=200, bias=True)
    (10): LeakyReLU(negative_slope=0.01)
    (11): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): Linear(in_features=200, out_features=100, bias=True)
    (13): LeakyReLU(negative_slope=0.01)
    (14): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_runni

In [13]:
# Load MNIST test set.
x, y = load_mnist(is_train=False, flatten=True if train_config.model == 'fc' else False)
x, y = x.to(device), y.to(device)

test(model, x, y, to_be_shown=False)

Accuracy: 0.9803


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


# MNIST classification with CNN model

In [14]:
!python main.py --model_fn model_cnn_1.pt --model cnn

  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)
Train: 48000
Valid: 12000
Test: 10000
Epoch 1 - |param|=53.722 |g_param|=1.518 loss=0.1489 accuracy=0.9562
Validation - loss=0.0616 accuracy=0.9818 best_loss=inf
Epoch 2 - |param|=54.330 |g_param|=0.788 loss=0.0592 accuracy=0.9832
Validation - loss=0.0694 accuracy=0.9827 best_loss=0.0616
Epoch 3 - |param|=54.818 |g_param|=0.629 loss=0.0379 accuracy=0.9891
Validation - loss=0.0452 accuracy=0.9869 best_loss=0.0616
Epoch 4 - |param|=55.278 |g_param|=0.540 loss=0.0276 accuracy=0.9915
Validation - loss=0.0534 accuracy=0.9875 best_loss=0.0452
Epoch 5 - |param|=55.726 |g_param|=0.442 loss=0.0208 accuracy=0.9937
Validation - loss=0.0351 accuracy=0.9885 best_loss=0.0452
Epoch 6 - |param|=56.194 |g_param|=0.450 loss=0.0229 accuracy=0.9934
Validation - loss=0.0317 accuracy=0.9926 best_loss=0.0351
Epoch 7 - |param|=56.741 |g_param|=0.387 loss=0.0186 accuracy=0.9948
Validation - loss=0.0288 accuracy=0.9927 best_loss=0.0317
Epoch 8 

# Test CNN model

In [15]:
model_fn = "./model_cnn_1.pt"

train_config, state_dict = load(model_fn, device)

model = get_model(train_config).to(device)
model.load_state_dict(state_dict)

print(model)

ConvolutionalClassifier(
  (blocks): Sequential(
    (0): ConvolutionBlock(
      (layers): Sequential(
        (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): ReLU()
        (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        (4): ReLU()
        (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): ConvolutionBlock(
      (layers): Sequential(
        (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): ReLU()
        (2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        (4): ReLU()
        (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): ConvolutionBlock(
      (layers): Seq

In [16]:
# Load MNIST test set.
x, y = load_mnist(is_train=False, flatten=True if train_config.model == 'fc' else False)
x, y = x.to(device), y.to(device)

test(model, x, y, to_be_shown=False)

Accuracy: 0.9904


# MNIST classification with RNN (LSTM) model

In [17]:
# bidirectional lstm
!python main.py --model_fn model_lstm_1.pt --model rnn --hidden_size 28


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)
Train: 48000
Valid: 12000
Test: 10000
Epoch 1 - |param|=32.112 |g_param|=1.367 loss=0.7420 accuracy=0.7556
Validation - loss=0.4217 accuracy=0.8623 best_loss=inf
Epoch 2 - |param|=33.729 |g_param|=1.788 loss=0.2694 accuracy=0.9187
Validation - loss=0.2283 accuracy=0.9363 best_loss=0.4217
Epoch 3 - |param|=34.570 |g_param|=1.530 loss=0.1774 accuracy=0.9448
Validation - loss=0.1220 accuracy=0.9583 best_loss=0.2283
Epoch 4 - |param|=35.158 |g_param|=1.375 loss=0.1449 accuracy=0.9563
Validation - loss=0.1398 accuracy=0.9612 best_loss=0.1220
Epoch 5 - |param|=35.628 |g_param|=1.257 loss=0.1137 accuracy=0.9652
Validation - loss=0.1087 accuracy=0.9651 best_loss=0.1220
Epoch 6 - |param|=36.045 |g_param|=1.143 loss=0.1019 accuracy=0.9706
Validation - loss=0.0972 accuracy=0.9733 best_loss=0.1087
Epoch 7 - |param|=36.443 |g_param|=1.167 loss=0.0921 accuracy=0.9716
Validation - loss=0.0661 accuracy=0.9785 best_loss=0.0972
Epoch 8 

# Test RNN model

In [18]:
model_fn = "./model_lstm_1.pt"

train_config, state_dict = load(model_fn, device)

model = get_model(train_config).to(device)
model.load_state_dict(state_dict)

print(model)

SequenceClassifier(
  (rnn): LSTM(28, 28, num_layers=4, batch_first=True, dropout=0.2, bidirectional=True)
  (layers): Sequential(
    (0): ReLU()
    (1): BatchNorm1d(56, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): Linear(in_features=56, out_features=10, bias=True)
    (3): LogSoftmax(dim=-1)
  )
)


In [19]:
# Load MNIST test set.
x, y = load_mnist(is_train=False, flatten=True if train_config.model == 'fc' else False)
x, y = x.to(device), y.to(device)

test(model, x, y, to_be_shown=False)

Accuracy: 0.9826
