## Import Libraries

In [1]:
import os
import time
import copy


# 데이터 분석 라이브러리
import numpy as np
import pandas as pd

# 시각화 라이브러리
import matplotlib as mpl
import matplotlib.pyplot as plt

from PIL import Image

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.backends.cudnn as cudnn
import timm

from __future__ import print_function, division
from torch.optim import lr_scheduler
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader
from tqdm.notebook import tqdm
from sklearn.metrics import f1_score

cudnn.benchmark = True
plt.ion()   # interactive mode

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 
device

device(type='cuda', index=0)

## Bring csv file with path & label

In [4]:
traindata_path = pd.read_csv('train_data.csv')
valdata_path = pd.read_csv('val_data.csv')
testdata_path = pd.read_csv('test_data.csv')

In [5]:
print(len(traindata_path))

15126


## Hyper Parameter
Resize([224,224]) - https://www.programcreek.com/python/example/104834/torchvision.transforms.Resize

In [6]:
transform = transforms.Compose([
        transforms.Resize([224,224]),
#         transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
batch_size = 256
num_workers = 2

## Dataset

In [7]:
def readImage(path_data):
    path_to_image = []
    for path in path_data:
        img = Image.open(path)
        path_to_image.append(img)
    return path_to_image

In [8]:
class MyDataset(Dataset):
    def __init__(self, data, transform, train):
        self._repr_indent = 4
        self.data = data
        self.X = readImage(self.data['img_path'])
        self.y = self.data['label']
        self.transform = transform
        self.train = train
        self.classes = list(set(self.y))
    
    def __getitem__(self, idx):
        X, y = self.X[idx], None
#         X, y = self.X[idx], -1

        if self.transform:
            X = self.transform(X)
        if self.train:
            y = self.y[idx]

        return X,y
#         return torch.tensor(X), torch.tensor(y)
#         return X.clone().detach(), y.clone().detach()
    
    def __len__(self):
        return len(self.X)
    
    def __repr__(self):
        '''
        https://github.com/pytorch/vision/blob/master/torchvision/datasets/vision.py
        '''
        head = "My Custom Dataset : Mask Dataset"
        num_data = self._repr_indent*" " + "Number of datapoints: {}".format(self.__len__())
        num_classes = self._repr_indent*" " + "Number of classes: {}".format(len(self.classes))
        return '\n'.join([head, num_data, num_classes])

In [9]:
train_dataset = MyDataset(
    data = traindata_path,
    transform = transform,
    train = True
)
val_dataset = MyDataset(
    data = valdata_path,
    transform = transform,
    train = True
)
test_dataset = MyDataset(
    data = testdata_path,
    transform = transform,
    train = False
)

In [10]:
train_dataset

My Custom Dataset : Mask Dataset
    Number of datapoints: 15126
    Number of classes: 18

In [11]:
val_dataset

My Custom Dataset : Mask Dataset
    Number of datapoints: 3774
    Number of classes: 18

In [12]:
test_dataset

My Custom Dataset : Mask Dataset
    Number of datapoints: 12600
    Number of classes: 1

## DataLoader

In [13]:
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size = batch_size,
    num_workers = num_workers,
    shuffle = True,
    drop_last = False
)
val_loader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size = batch_size,
    num_workers = num_workers,
    shuffle = True,
    drop_last = False
)
test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size = batch_size,
    num_workers = num_workers,
    shuffle = True,
    drop_last = False
)

In [14]:
dataloaders = {
    'train': train_loader,
    'val': val_loader,
    'test': test_loader
}

In [15]:
dataloaders

{'train': <torch.utils.data.dataloader.DataLoader at 0x7fe2d4af6310>,
 'val': <torch.utils.data.dataloader.DataLoader at 0x7fe2d4af6370>,
 'test': <torch.utils.data.dataloader.DataLoader at 0x7fe2d4af6460>}

## Model
model 생성 및 확인 - https://towardsdatascience.com/getting-started-with-pytorch-image-models-timm-a-practitioners-guide-4e77b4bf9055

In [16]:
timm.list_models('*vit*',pretrained=True)

['convit_base',
 'convit_small',
 'convit_tiny',
 'crossvit_9_240',
 'crossvit_9_dagger_240',
 'crossvit_15_240',
 'crossvit_15_dagger_240',
 'crossvit_15_dagger_408',
 'crossvit_18_240',
 'crossvit_18_dagger_240',
 'crossvit_18_dagger_408',
 'crossvit_base_240',
 'crossvit_small_240',
 'crossvit_tiny_240',
 'levit_128',
 'levit_128s',
 'levit_192',
 'levit_256',
 'levit_384',
 'vit_base_patch8_224',
 'vit_base_patch8_224_in21k',
 'vit_base_patch16_224',
 'vit_base_patch16_224_in21k',
 'vit_base_patch16_224_miil',
 'vit_base_patch16_224_miil_in21k',
 'vit_base_patch16_384',
 'vit_base_patch16_sam_224',
 'vit_base_patch32_224',
 'vit_base_patch32_224_in21k',
 'vit_base_patch32_384',
 'vit_base_patch32_sam_224',
 'vit_base_r50_s16_224_in21k',
 'vit_base_r50_s16_384',
 'vit_huge_patch14_224_in21k',
 'vit_large_patch16_224',
 'vit_large_patch16_224_in21k',
 'vit_large_patch16_384',
 'vit_large_patch32_224_in21k',
 'vit_large_patch32_384',
 'vit_large_r50_s32_224',
 'vit_large_r50_s32_224_i

In [17]:
model = timm.create_model('vit_small_patch16_224', pretrained=True, num_classes=18)

# num_ftrs = model.fc.in_features
# model.fc = nn.Linear(num_ftrs, 18)

model.get_classifier()

Linear(in_features=384, out_features=18, bias=True)

In [18]:
model(torch.randn(64,3,224,224)).shape

torch.Size([64, 18])

In [19]:
model = model.to(device)

loss_fn = torch.nn.CrossEntropyLoss() 

# Observe that all parameters are being optimized
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001) 

In [20]:
model.default_cfg

{'url': 'https://storage.googleapis.com/vit_models/augreg/S_16-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npz',
 'num_classes': 1000,
 'input_size': (3, 224, 224),
 'pool_size': None,
 'crop_pct': 0.9,
 'interpolation': 'bicubic',
 'fixed_input_size': True,
 'mean': (0.5, 0.5, 0.5),
 'std': (0.5, 0.5, 0.5),
 'first_conv': 'patch_embed.proj',
 'classifier': 'head',
 'architecture': 'vit_small_patch16_224'}

In [21]:
import torchsummary
torchsummary.summary(model,(3,224,224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 384, 14, 14]         295,296
          Identity-2             [-1, 196, 384]               0
        PatchEmbed-3             [-1, 196, 384]               0
           Dropout-4             [-1, 197, 384]               0
         LayerNorm-5             [-1, 197, 384]             768
            Linear-6            [-1, 197, 1152]         443,520
           Dropout-7          [-1, 6, 197, 197]               0
            Linear-8             [-1, 197, 384]         147,840
           Dropout-9             [-1, 197, 384]               0
        Attention-10             [-1, 197, 384]               0
         Identity-11             [-1, 197, 384]               0
        LayerNorm-12             [-1, 197, 384]             768
           Linear-13            [-1, 197, 1536]         591,360
             GELU-14            [-1, 19

## Train
transfer learning train model - https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html   
save checkpoint - https://tutorials.pytorch.kr/recipes/recipes/saving_and_loading_a_general_checkpoint.html   
f1 score - https://eunsukimme.github.io/ml/2019/10/21/Accuracy-Recall-Precision-F1-score/   

In [22]:
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [25]:
def train_model(model, loss_fn, optimizer, num_epochs=25):
    since = time.time()
    
#     best_model_wts = copy.deepcopy(model.state_dict()) 
    best_acc = 0.0

    for epoch in range(num_epochs):
        # Each epoch has a training and validation phase
        for phase in ["train", "val"]:
            running_loss = 0.
#             running_f1 = 0.
            running_acc = 0.
            if phase == "train":
                model.train() 
            elif phase == "val":
                model.eval() 

            for images, labels in dataloaders[phase]:
                images = images.to(device)
                labels = labels.to(device)

                optimizer.zero_grad() 

                with torch.set_grad_enabled(phase == "train"):
                    logits = model(images)
                    _, preds = torch.max(logits, 1)
                    loss = loss_fn(logits, labels)
#                     f1 = f1_score(labels, logits)

                    if phase == "train":
                        loss.backward() 
                        optimizer.step() 

                running_loss += loss.item() * images.size(0) 
#                 running_f1 += f1 * images.size(0)
                running_acc += torch.sum(preds == labels.data) 

            epoch_loss = running_loss / len(dataloaders[phase].dataset) 
#             epoch_f1 = running_f1 / len(dataloaders[phase].dataset)
            epoch_acc = running_acc / len(dataloaders[phase].dataset)

#             print('{} Loss: {:.4f} F1_Score:{:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_f1, epoch_acc))
            print('Epoch {}/{} - {} Loss: {:.4f} Acc: {:.4f}'.format(epoch, num_epochs-1, phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
#                 best_model_wts = copy.deepcopy(model.state_dict())
            torch.save({
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'loss': epoch_loss
                }, f"./results/{epoch:03}_loss_{epoch_loss:4.2}.pt")
        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
#     model.load_state_dict(best_model_wts)
    return model

In [26]:
model = train_model(model, loss_fn, optimizer, num_epochs=25)

Epoch 0/24 - train Loss: 1.5625 Acc: 0.5132
Epoch 0/24 - val Loss: 1.1337 Acc: 0.6592

Epoch 1/24 - train Loss: 1.1200 Acc: 0.6257
Epoch 1/24 - val Loss: 1.4338 Acc: 0.5318

Epoch 2/24 - train Loss: 0.8526 Acc: 0.7003
Epoch 2/24 - val Loss: 1.5129 Acc: 0.5074

Epoch 4/24 - train Loss: 0.4598 Acc: 0.8431
Epoch 4/24 - val Loss: 1.8374 Acc: 0.4730

Epoch 5/24 - train Loss: 0.3011 Acc: 0.9035
Epoch 5/24 - val Loss: 2.0665 Acc: 0.4671

Epoch 6/24 - train Loss: 0.1749 Acc: 0.9482
Epoch 6/24 - val Loss: 2.4244 Acc: 0.4422

Epoch 7/24 - train Loss: 0.1282 Acc: 0.9637
Epoch 7/24 - val Loss: 2.5813 Acc: 0.4436

Epoch 8/24 - train Loss: 0.1024 Acc: 0.9701
Epoch 8/24 - val Loss: 2.7437 Acc: 0.4255

Epoch 9/24 - train Loss: 0.1826 Acc: 0.9425
Epoch 9/24 - val Loss: 2.7532 Acc: 0.4149

Epoch 10/24 - train Loss: 0.1187 Acc: 0.9635
Epoch 10/24 - val Loss: 2.7033 Acc: 0.4491

Epoch 11/24 - train Loss: 0.0772 Acc: 0.9782
Epoch 11/24 - val Loss: 2.8808 Acc: 0.4568

Epoch 12/24 - train Loss: 0.1246 Acc: 0