In [1]:
# *------- Basic setup -------*
import numpy as np
import pandas as pd
import os, random, time
import copy
from tqdm.notebook import tqdm
from multiprocessing import cpu_count
import matplotlib.pyplot as plt
from PIL import Image

# *------- torch -------*
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import torchvision
from torchsummary import summary

from torchvision import transforms, models

# *------- albumentations -------*
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

# *------- sklearn -------*
from sklearn.model_selection import train_test_split

## Configuration

In [2]:
def set_seed(seed):
    np.random.seed(seed)
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

SEED = 42
set_seed(SEED)

DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
IMG_SIZE = (64, 64)
BATCH_SIZE = 64
LEARNING_RATE = 0.001
NUM_CLASSES = 256
NUM_EPOCHS = 20
NUM_CPU = cpu_count()

print(DEVICE)

cpu


## Split Dataset

In [4]:
def split_dataset(csv_path, train_save_path, valid_save_path, test_size=0.2, random_seed=SEED):
    """
    주어진 CSV 파일을 train과 val 데이터셋으로 나누어 저장하는 함수.

    Args:
    - csv_path: 입력 CSV 파일 경로
    - train_save_path: 학습 데이터셋 저장 경로
    - valid_save_path: 검증 데이터셋 저장 경로
    - test_size: 검증 데이터셋의 비율 (default: 0.2)
    - random_seed: 데이터 분할시 사용되는 랜덤 시드 (default: 42)

    Returns:
    - None
    """
    
    # CSV 파일 읽기
    df = pd.read_csv(csv_path, header=None, names=['path', 'label'])

    # 데이터를 train과 val로 나누기
    train_df, valid_df = train_test_split(df, test_size=test_size, random_state=random_seed, stratify=df['label'])

    # 나눠진 데이터를 CSV 파일로 저장
    train_df.to_csv(train_save_path, index=False, header=False)
    valid_df.to_csv(valid_save_path, index=False, header=False)


In [38]:
split_dataset('image-data/labels-map.csv', 'image-data/train-labels.csv', 'image-data/valid-labels.csv')

## Dataset

In [8]:
class KoreanHandwritingDataset(Dataset):
    """
    한글 손글씨 데이터셋을 로드하고 처리하기 위한 클래스.

    Attributes:
    - dataset (DataFrame): CSV 파일에서 읽어들인 데이터.
    - root_dir (str): 이미지 파일이 저장된 기본 디렉터리 경로.
    - transform (callable, optional): 샘플에 적용될 변환 (예: 데이터 증강).
    """
    
    def __init__(self, csv_file, root_dir, transform=None):
        """
        Parameters:
        - csv_file (str): CSV 파일의 경로.
        - root_dir (str): 모든 이미지가 저장된 디렉터리 경로.
        - transform (callable, optional): 샘플에 적용할 선택적 변환.
        """
        self.dataset = pd.read_csv(csv_file, header=None, names=['path', 'label'])
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        """
        데이터셋의 길이를 반환합니다.
        
        Returns:
        - int: 데이터셋 내의 샘플 수.
        """
        return len(self.dataset)

    def __getitem__(self, idx):
        """
        주어진 인덱스에 해당하는 샘플을 반환합니다.
        
        Parameters:
        - idx (int): 반환할 샘플의 인덱스.

        Returns:
        - dict: 'image' 및 'label' 키를 포함하는 사전.
        """
        img_path = os.path.join(self.root_dir, self.dataset.iloc[idx]['path'])
        # Gray 이미지를 RGB로 열기
        image = Image.open(img_path).convert("RGB")
        #label = self.dataset.iloc[idx]['label']
        label = ord(label)  # 유니코드 값으로 변환
        
        sample = {'image': image, 'label': label}

        if self.transform:
            sample = self.transform(sample)

        return sample

In [9]:
from torchvision.transforms import Resize
from torchvision.transforms.functional import to_tensor

class ToTensor(object):
    """Sample의 'image'와 'label'을 PyTorch Tensor로 변환합니다."""
    
    def __init__(self, output_size=(64, 64)):
        assert isinstance(output_size, (int, tuple))
        self.output_size = output_size
        self.resizer = Resize(self.output_size)
        
    def __call__(self, sample):
        """
        sample로부터 'image'와 'label'을 추출하여 Tensor로 변환합니다.
        
        Args:
        - sample (dict): 'image'와 'label' key를 가진 사전
        
        Returns:
        - dict: 변환된 'image'와 'label'을 포함한 사전
        """
        
        # 이미지와 라벨 추출
        image, label = sample['image'], sample['label']
        
        # 이미지를 리사이즈
        image = self.resizer(image)
        
        # 이미지를 PyTorch Tensor로 변환
        # to_tensor는 [0, 1] 범위로 이미 정규화해줍니다.
        image = to_tensor(image)
        
        # 라벨을 PyTorch Tensor로 변환 (한글 문자의 유니코드 값으로 변환)
        label = torch.tensor(ord(label))
        
        return {
            'image': image,
            'label': label
        }

In [10]:
TRAIN_CSV_FILE = "./image-data/train-labels.csv"
VALID_CSV_FILE = "./image-data/train-labels.csv"
ROOT_DIR = "./image-data/hangul-images"

train_ds = KoreanHandwritingDataset(TRAIN_CSV_FILE, ROOT_DIR, transforms.Compose([ToTensor()]))
valid_ds = KoreanHandwritingDataset(TRAIN_CSV_FILE, ROOT_DIR, transforms.Compose([ToTensor()]))

train_size = len(train_ds)
valid_size = len(valid_ds)
print(train_size, valid_size)

train_loader = DataLoader(train_ds, batch_size = BATCH_SIZE, shuffle=True, num_workers = NUM_CPU)
valid_loader = DataLoader(valid_ds, batch_size = BATCH_SIZE, shuffle=False, num_workers = NUM_CPU)

In [16]:
# 사전학습된 가중치를 가져오지 않도록 pretrained는 Fasle
model = torchvision.models.resnet34(pretrained = False)

# number of features in the input of the linear layer
num_ftrs = model.fc.in_features

# sets the number of features of the linear layer
model.fc = torch.nn.Linear(num_ftrs, NUM_CLASSES)
model = model.to(DEVICE)

# parameters
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = LEANING_RATE)


## model summary
summary(model, (3, IMG_SIZE[0], IMG_SIZE[1]), BATCH_SIZE)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [64, 64, 32, 32]           9,408
       BatchNorm2d-2           [64, 64, 32, 32]             128
              ReLU-3           [64, 64, 32, 32]               0
         MaxPool2d-4           [64, 64, 16, 16]               0
            Conv2d-5           [64, 64, 16, 16]          36,864
       BatchNorm2d-6           [64, 64, 16, 16]             128
              ReLU-7           [64, 64, 16, 16]               0
            Conv2d-8           [64, 64, 16, 16]          36,864
       BatchNorm2d-9           [64, 64, 16, 16]             128
             ReLU-10           [64, 64, 16, 16]               0
       BasicBlock-11           [64, 64, 16, 16]               0
           Conv2d-12           [64, 64, 16, 16]          36,864
      BatchNorm2d-13           [64, 64, 16, 16]             128
             ReLU-14           [64, 64,

In [None]:
# function to train model
def train_model(model, criterion, optimizer, num_epochs, train_loader, val_loader):
    since = time.time()
    best_model = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch + 1, num_epochs))
        
        # train
        model.train()
        running_loss = 0.0
        running_corrects = 0
        
        for step, (inputs, labels) in enumerate(train_loader):
            inputs = inputs.to(DEVICE)
            labels = labels.to(DEVICE)
            optimizer.zero_grad()
            with torch.set_grad_enabled(True):
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
        epoch_loss = running_loss / train_size
        epoch_acc = running_corrects.double() / train_size
        print('Train Loss: {:.4f} Train Acc: {:.4f}'.format(epoch_loss, epoch_acc))

        # validate
        model.eval()
        running_loss = 0.0
        running_corrects = 0
        for inputs, labels in val_loader:
            inputs = inputs.to(DEVICE)
            labels = labels.to(DEVICE)
            optimizer.zero_grad()
            with torch.set_grad_enabled(False):
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
        epoch_loss = running_loss / val_size
        epoch_acc = running_corrects.double() / val_size
        print('Val Loss: {:.4f} Val Acc: {:.4f}'.format(epoch_loss, epoch_acc))
        print('-' * 30)
        if epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model = copy.deepcopy(model.state_dict())
        
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best Val Acc: {:.4f}'.format(best_acc))
    model.load_state_dict(best_model)
    return model

# train the model
model = train_model(model, criterion, optimizer, NUM_EPOCHS, train_loader, valid_loader)

Epoch 1/50
