In [None]:
# For Colab users
colab_env = True

if colab_env:
    from google.colab import drive
    drive.mount('/content/drive', force_remount=True)

    import sys
    sys.path.insert(0,'/content/drive/MyDrive/숨고/주재영님(cnn-rnn)')

Mounted at /content/drive


In [None]:
import os
os.chdir('/content/drive/MyDrive/숨고/주재영님(cnn-rnn)')

In [None]:
%cd /content/drive/MyDrive/숨고/주재영님(cnn-rnn)/data

!unzip -qq "/content/drive/MyDrive/숨고/주재영님(cnn-rnn)/data/data.zip"

In [None]:
import os
import numpy as np
import torch
import torch.nn as nn
import random
import argparse
import torchvision.transforms as transforms
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from data_utils import Mydataset, Mytensordataset, collate_fn
from models import ConvLSTM

## (Optional) Sample Visualization
You can see actual sample images and correct answers. Additional matplotlib package is needed.

In [None]:
# Just for reference: see actual samples
if not colab_env:
    import matplotlib.pyplot as plt

    alphabet = {
            'A(a)' : '0',
            'B(b)' : '1',
            'C(c)' : '2',
            'D(d)' : '3',
            'E(e)' : '4',
            'F(f)' : '5',
            'G(g)' : '6',
            'H(h)' : '7',
            'I(i)' : '8',
            'J(j)' : '9',
            'K(k)' : '10',
            'L(l)' : '11',
            'M(m)' : '12',
            'N(n)' : '13',
            'O(o)' : '14',
            'P(p)' : '15',
            'Q(q)' : '16',
            'R(r)' : '17',
            'S(s)' : '18',
            'T(t)' : '19',
            'U(u)' : '20',
            'V(v)' : '21',
            'W(w)' : '22',
            'X(x)' : '23',
            'Y(y)' : '24',
            'Z(z)' : '25'
        }

    # Just for reference: see actual samples
    idx = 10
    sample = np.load(f'./data/emnist/train/numpy/{idx}.npy')
    sample_target = np.loadtxt('./data/emnist/train/label.txt')[idx]
    sample_target_length = np.loadtxt('./data/emnist/train/label_length.txt')[idx]

    plt.figure(figsize=(10,10))
    for i in range(10):
        plt.subplot(1, 10, i+1)
        ax = plt.gca()
        ax.axes.xaxis.set_ticklabels([])
        ax.axes.yaxis.set_ticklabels([])
        plt.imshow(sample[i], cmap='gray')

    plt.show()
    print("sorted label: ", end=' ')
    label_str = '('
    for i in range(int(sample_target_length)):
        print(int(sample_target[i].item()), end=' ')
        label_str += " " + list(alphabet.keys())[int(sample_target[i].item())]
    label_str += " )"
    print()
    print(label_str)

In [None]:
# Use 0th GPU for training
if not colab_env:
    torch.cuda.set_device(0)

In [None]:
# fix random seed to increase reproducibility
# NOTE: Do not modify here!
INPUT_LENGTH = 10
OUTPUT_LENGTH = 25
NUM_CLASSES = 26

random_seed = 7
torch.manual_seed(random_seed)
os.environ['PYTHONHASHSEED'] = str(random_seed)
np.random.seed(random_seed)
random.seed(random_seed)
torch.cuda.manual_seed(random_seed)

torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
%env CUBLAS_WORKSPACE_CONFIG=:16:8

def seed_worker(worker_seed):
    np.random.seed(worker_seed)
    random.seed(worker_seed)

num_workers = 8

env: CUBLAS_WORKSPACE_CONFIG=:16:8


In [None]:
# NOTE: you can modify mean and std for normalization
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,)),
])

In [None]:
# NOTE: you can freely modify or add training hyperparameters
print_interval = 15
max_epoch = 20
batch_size = 256
learning_rate = 0.0005

In [None]:
# NOTE: modify path for your setting

if not colab_env:
    # Option 1: use Mydataset (For local users)
    train_ds = Mydataset('./data/emnist/train/numpy', './data/emnist/train/label.txt', './data/emnist/train/label_length.txt', transform=transform)
    valid_ds = Mydataset('./data/emnist/valid/numpy', './data/emnist/valid/label.txt', './data/emnist/valid/label_length.txt', False, transform=transform)
else:
    # Option 2: use Mytensordataset (For Colab users)
    train_ds = Mytensordataset('./data/emnist/Colab/train/img.pt', './data/emnist/Colab/train/label.pt', './data/emnist/Colab/train/label_length.pt', True, transform=transform)
    valid_ds = Mytensordataset('./data/emnist/Colab/valid/img.pt', './data/emnist/Colab/valid/label.pt', './data/emnist/Colab/valid/label_length.pt', False, transform=transform)

train_dl = DataLoader(train_ds, batch_size=batch_size, num_workers=num_workers, worker_init_fn=seed_worker, collate_fn=collate_fn, shuffle=True)
valid_dl = DataLoader(valid_ds, batch_size=batch_size, num_workers=num_workers, worker_init_fn=seed_worker, collate_fn=collate_fn, shuffle=True)



In [None]:
# You can add or modify your ConvLSTM's hyperparameter (keys and values)
kwargs = {
    'cnn_input_dim': 1,
    'cnn_hidden_size': 256,
    'rnn_hidden_size': 512,
    'rnn_num_layers': 1,
    'rnn_dropout': 0.1,
    'num_classes': NUM_CLASSES
}

In [None]:
# ConvLSTM learning
model = ConvLSTM(input_length=INPUT_LENGTH, output_length=OUTPUT_LENGTH, **kwargs).cuda()
print(model)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
loss_func = nn.CrossEntropyLoss(ignore_index=26)
# NOTE: you can define additional components



ConvLSTM(
  (conv): CustomCNN(
    (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu1): ReLU()
    (maxpool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu2): ReLU()
    (maxpool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu3): ReLU()
    (maxpool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (fc): Linear(in_features=576, out_features=256, bias=True)
  )
  (embedding): Embedding(28, 256)
  (lstm_model): LSTM(
    (lstm): LSTM(256, 512, dropout=0.1)
    (fc_in): Linear(in_features=512, out_features=512, bias=True)
    (fc_out): Linear(in_features=512, out_features=26, bias=True)
  )
)


In [None]:
def train(model, optimizer, loss_func, max_epoch, train_dl, valid_dl, load_path=None, save_path='./model.pt'):
    # Load your states
    loaded_epoch = 0
    loaded_best_acc = -1
    if load_path is not None:
        state = torch.load(load_path)
        model.load_state_dict(state["model"])
        optimizer.load_state_dict(state["optimizer"])
        loaded_epoch = state["epoch"]
        loaded_best_acc = state["best_acc"]

    best_valid_accuracy = 0 if loaded_best_acc == -1 else loaded_best_acc

    for epoch in np.array(list(range(max_epoch - loaded_epoch))) + loaded_epoch:
        n_samples = 0
        n_correct = 0
        model.train()
        for step, sample in enumerate(train_dl):
            img, label, label_length = sample  # (Bxinput_seq, C=1, H, W), (Bxoutput_seq), (B)
            img = img.cuda()
            label = label.cuda()
            outputs = model((img, label))

            optimizer.zero_grad()
            loss = loss_func(outputs, label)
            loss.backward()
            optimizer.step()

            n_samples += outputs.size(0)
            n_correct += (outputs.argmax(-1) == label).sum().item()
            if (step + 1) % print_interval == 0:
                print('epoch:', epoch + 1, 'step:', step + 1, 'loss:', loss.item(), 'accuracy:', 100 * (n_correct / n_samples))

        n_samples = 0
        n_correct = 0
        with torch.no_grad():
            model.eval()
            for step, sample in enumerate(valid_dl):
                img, label, label_length = sample
                img = img.cuda()
                outputs = model(img)
                filter_id = torch.where(label != 26)
                pred = np.argmax(outputs.cpu().data.numpy(), axis=1)
                label = label.data.numpy()
                n_samples += label[filter_id].shape[0]
                n_correct += (pred[filter_id] == label[filter_id]).astype(float).sum()
            valid_accuracy = 100 * (n_correct/n_samples)
            if valid_accuracy > best_valid_accuracy:
                print("New best valid accuracy, saving model")
                # Save your states
                state = {
                    "model": model.state_dict(),
                    "optimizer": optimizer.state_dict(),
                    "epoch": epoch + 1,
                    "best_acc": best_valid_accuracy
                }
                torch.save(state, save_path)
                best_valid_accuracy = valid_accuracy
            print('Valid epoch: %d, Valid accuracy: %.2f, Best valid accuracy: %.2f' % (epoch + 1, valid_accuracy, best_valid_accuracy))

In [None]:
load_path = None
train(model, optimizer, loss_func, max_epoch, train_dl, valid_dl, load_path=load_path, save_path='./model_ConvLSTM.pt')

epoch: 1 step: 15 loss: 0.08599603921175003 accuracy: 67.92083333333333
epoch: 1 step: 30 loss: 0.09113630652427673 accuracy: 67.93489583333333
epoch: 1 step: 45 loss: 0.08300435543060303 accuracy: 68.02048611111111
epoch: 1 step: 60 loss: 0.08599334955215454 accuracy: 68.03125
epoch: 1 step: 75 loss: 0.09326621145009995 accuracy: 68.02416666666666
epoch: 1 step: 90 loss: 0.08836139738559723 accuracy: 68.02274305555555
epoch: 1 step: 105 loss: 0.09679275006055832 accuracy: 68.01830357142858
epoch: 1 step: 120 loss: 0.08444685488939285 accuracy: 68.02174479166668
epoch: 1 step: 135 loss: 0.08561353385448456 accuracy: 68.02060185185185
epoch: 1 step: 150 loss: 0.09435335546731949 accuracy: 67.99374999999999
epoch: 1 step: 165 loss: 0.09997058659791946 accuracy: 67.98068181818182
epoch: 1 step: 180 loss: 0.10024190694093704 accuracy: 67.97864583333333
epoch: 1 step: 195 loss: 0.09382077306509018 accuracy: 67.97524038461539
New best valid accuracy, saving model
Valid epoch: 1, Valid accura

In [None]:
def eval(valid_dl, load_path):
    state = torch.load(load_path)
    model.load_state_dict(state["model"])
    n_samples = 0
    n_correct = 0
    with torch.no_grad():
        model.eval()
        for step, sample in enumerate(valid_dl):
            img, label, label_length = sample
            img = img.cuda()
            outputs = model(img)
            filter_id = torch.where(label != 26)
            pred = np.argmax(outputs.cpu().data.numpy(), axis=1)
            label = label.data.numpy()
            n_samples += label[filter_id].shape[0]
            n_correct += (pred[filter_id] == label[filter_id]).astype(float).sum()
        valid_accuracy = 100 * (n_correct/n_samples)
        print('Valid accuracy: %.2f' % (valid_accuracy))

In [None]:
# load and evaluate ConvLSTM model
load_path = './model_ConvLSTM.pt'
eval(valid_dl, load_path)