# Import Libraries

In [21]:
import os
import time
import copy

# 데이터 분석 라이브러리
import numpy as np
import pandas as pd

# 시각화 라이브러리
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.image as img

from PIL import Image
from skimage.util import random_noise

In [22]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.backends.cudnn as cudnn
import timm

from __future__ import print_function, division
from torch.optim import lr_scheduler
from torchvision import transforms, models, datasets
from torchvision.utils import save_image
from torch.utils.data import Dataset, DataLoader

from tqdm.notebook import tqdm
from sklearn.metrics import f1_score

cudnn.benchmark = True
plt.ion()   # interactive mode

In [23]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 
device

device(type='cuda', index=0)

In [24]:
IMAGE_PATH = "../input/data"
TRAIN_PATH = os.path.join(IMAGE_PATH, 'train')
TEST_PATH = os.path.join(IMAGE_PATH, 'eval')
TRAIN_IMAGE_PATH = os.path.join(TRAIN_PATH, 'images/')
TEST_IMAGE_PATH = os.path.join(TEST_PATH, 'images/')
TRAIN_CSV_PATH = os.path.join(TRAIN_PATH, 'train.csv')
TEST_CSV_PATH = os.path.join(TEST_PATH, 'info.csv')

DATASET_PATH = os.path.join(TRAIN_PATH, 'dataset')
DATASET_IMAGE_PATH = os.path.join(DATASET_PATH, 'train/')

In [25]:
print(TRAIN_PATH)
print(TRAIN_IMAGE_PATH)
print(TRAIN_CSV_PATH)

../input/data/train
../input/data/train/images/
../input/data/train/train.csv


# HyperParameter

In [26]:
image_width = 224
image_height = 224

In [27]:
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize([image_width,image_height]),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([      
        transforms.Resize([image_width,image_height]),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}
batch_size = 256
num_workers = 2

# Dataset

In [28]:
data_dir = '/opt/ml/input/data/train/dataset'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                        data_transforms[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes

In [29]:
image_datasets

{'train': Dataset ImageFolder
     Number of datapoints: 15116
     Root location: /opt/ml/input/data/train/dataset/train
     StandardTransform
 Transform: Compose(
                Resize(size=[224, 224], interpolation=PIL.Image.BILINEAR)
                ToTensor()
                Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ),
 'val': Dataset ImageFolder
     Number of datapoints: 3784
     Root location: /opt/ml/input/data/train/dataset/val
     StandardTransform
 Transform: Compose(
                Resize(size=[224, 224], interpolation=PIL.Image.BILINEAR)
                ToTensor()
                Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            )}

# DataLoader

In [30]:
dataloaders = {x: torch.utils.data.DataLoader(
    image_datasets[x], 
    batch_size=batch_size, 
    shuffle=True, num_workers = num_workers
    )
    for x in ['train', 'val']
}
dataset_sizes = {
    x: len(image_datasets[x]) for x in ['train','val']
}

In [31]:
dataloaders

{'train': <torch.utils.data.dataloader.DataLoader at 0x7f4c0c116d00>,
 'val': <torch.utils.data.dataloader.DataLoader at 0x7f4c0c116a90>}

# Loss Function

In [32]:
class FocalLoss(nn.Module):
    def __init__(self, weight=None,
                 gamma=2., reduction='mean'):
        nn.Module.__init__(self)
        self.weight = weight
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, input_tensor, target_tensor):
        log_prob = F.log_softmax(input_tensor, dim=-1)
        prob = torch.exp(log_prob)
        return F.nll_loss(
            ((1 - prob) ** self.gamma) * log_prob,
            target_tensor,
            weight=self.weight,
            reduction=self.reduction
        )

# Model

In [33]:
model = timm.create_model('vit_small_patch16_224', pretrained=True, num_classes=18)
model.get_classifier()

Linear(in_features=384, out_features=18, bias=True)

In [34]:
model(torch.randn(64,3,224,224)).shape

torch.Size([64, 18])

In [35]:
model = model.to(device)

# loss_fn = FocalLoss()
loss_fn = torch.nn.CrossEntropyLoss() 

# Observe that all parameters are being optimized
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [36]:
model.default_cfg

{'url': 'https://storage.googleapis.com/vit_models/augreg/S_16-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npz',
 'num_classes': 1000,
 'input_size': (3, 224, 224),
 'pool_size': None,
 'crop_pct': 0.9,
 'interpolation': 'bicubic',
 'fixed_input_size': True,
 'mean': (0.5, 0.5, 0.5),
 'std': (0.5, 0.5, 0.5),
 'first_conv': 'patch_embed.proj',
 'classifier': 'head',
 'architecture': 'vit_small_patch16_224'}

In [37]:
import torchsummary
# torchsummary.summary(model,(3,224,224))

# Train

In [38]:
from sklearn.metrics import f1_score

# from torch.utils.tensorboard import SummaryWriter
# writer = SummaryWriter()

In [39]:
def train_model(model, loss_fn, optimizer, scheduler, num_epochs=25):
#     NUM_ACC = 2
    since = time.time()   
    best_acc = 0.0

    for epoch in range(num_epochs):
        # Each epoch has a training and validation phase
        for phase in ["train", "val"]:
            i=0
            running_loss = 0.
            running_acc = 0.
#             running_f1 = 0.
            if phase == "train":
                model.train() 
            elif phase == "val":
                model.eval() 
            optimizer.zero_grad() 

            for images, labels in tqdm(dataloaders[phase]):
#             for images, labels in dataloaders[phase]:
                images = images.to(device)
                labels = labels.to(device)

                with torch.set_grad_enabled(phase == "train"):
                    logits = model(images)
                    _, preds = torch.max(logits, 1)
                    loss = loss_fn(logits, labels)
#                     score = f1_score(labels.detach().cpu().numpy(), preds.detach().cpu().numpy(), average='macro')


                    if phase == "train":
                        loss.backward() 
                        optimizer.step()
#                         i+=1
#                         if i % NUM_ACC ==0: 
#                             optimizer.step() 
#                             optimizer.zero_grad() 

                running_loss += loss.item() * images.size(0) 
                running_acc += torch.sum(preds == labels.data)
#                 running_f1 += score * images.size(0) 
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / len(dataloaders[phase].dataset) 
            epoch_acc = running_acc / len(dataloaders[phase].dataset)
#             epoch_f1 = running_f1 / len(dataloaders[phase].dataset)
#             writer.add_scalar(f"Loss/{phase}",epoch_loss,epoch)
#             writer.add_scalar(f"Acc/{phase}",epoch_acc,epoch)
#             print('Epoch {}/{} - {} Loss: {:.4f} Acc: {:.4f} F1 {:.4f} '.format(epoch, num_epochs-1, phase, epoch_loss, epoch_acc, epoch_f1))
            print('Epoch {}/{} - {} Loss: {:.4f} Acc: {:.4f}'.format(epoch, num_epochs-1, phase, epoch_loss, epoch_acc))
            
            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
#                 torch.save({
#                     'epoch': epoch,
#                     'model_state_dict': model.state_dict(),
#                     'optimizer_state_dict': optimizer.state_dict(),
#                     'loss': epoch_loss
#                 }, f"./vit224_results/{epoch:03}_loss_{epoch_loss:4.2}.pt")
                
        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    return model

In [40]:
model = train_model(model, loss_fn, optimizer, exp_lr_scheduler, num_epochs=50)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))


Epoch 0/49 - train Loss: 1.9916 Acc: 0.3728


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


Epoch 0/49 - val Loss: 2.2918 Acc: 0.2844



HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))


Epoch 1/49 - train Loss: 2.4690 Acc: 0.2543


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


Epoch 1/49 - val Loss: 2.8641 Acc: 0.1934



HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))


Epoch 2/49 - train Loss: 2.6121 Acc: 0.2127


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


Epoch 2/49 - val Loss: 2.6442 Acc: 0.1934



HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))




KeyboardInterrupt: 