## Dataset
- EDA를 진행하면서 동시에 csv를 만들었습니다.
- image와 label을 반환합니다.
- albumentations augmentation을 염두해두고 설계했기 때문에 이미지를 opencv로 읽었습니다.

In [1]:
import os
import cv2
import numpy as np

from torch.utils.data import Dataset

In [2]:
class ArtPaintDataset(Dataset):
    def __init__(self, df, transform=None):
        super().__init__()
        self.df = df.reset_index()
        self.image_id = self.df.image_id
        self.labels = self.df.label
        self.transform = transform        
    
    def __len__(self):
        return len(self.df)

    def set_transform(self, transform):        
        self.transform = transform

    def __getitem__(self, idx):
        image_path = self.image_id[idx]
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        label = self.labels[idx]
        
        if self.transform:
            image = self.transform(image=np.array(image))['image']

        return {'image' : image, 'label' : label}

## Import Lib

In [3]:
import os
import math
import timm
import yaml
import torch
import random
import numpy as np
import pandas as pd
import torch.nn as nn
import albumentations
import albumentations.pytorch

from tqdm.auto import tqdm
from madgrad import MADGRAD
from adamp import AdamP
from easydict import EasyDict
from prettyprinter import cpprint
from torchsummary import summary as summary_
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import _LRScheduler
from torch.optim.lr_scheduler import CosineAnnealingLR
from sklearn.model_selection import StratifiedKFold, train_test_split


## ConfigManager
- .yaml 파일을 사용하여 config를 관리했습니다.
- config를 바꿔가며 실험했습니다.
- 최종적으로 사용한 config는 다음과 같습니다.
---
```
base:
  seed: 77
  model_arc: 'nfnet_l0'
  num_classes: 7
  input_dir: './train/train.csv'
  output_dir: './results/'
  train_only: False
  image_size: 227
  cutmix_args:
    use_cutmix: False
    beta: 1.0
    cutmix_prob: 0.5
  train_args:
    num_epochs: 6
    train_batch_size: 32
    val_batch_size: 32
    max_lr: 0.0001
    min_lr: 0.00001
    cycle: 3
    gamma: 0.5
    weight_decay: 0.000001
    log_intervals: 10
    eval_metric: 'accuracy'    
    n_splits: 5
```

---

```
swin:
  seed: 777
  model_arc: 'swin_base_patch4_window7_224'
  num_classes: 7
  input_dir: './train/train.csv'
  output_dir: './results/'
  train_only: False
  image_size: 224
  cutmix_args:
    use_cutmix: True
    beta: 1.0
    cutmix_prob: 0.5
  train_args:
    num_epochs: 10
    train_batch_size: 16
    val_batch_size: 16
    max_lr: 0.0001
    min_lr: 0.00001
    cycle: 3
    gamma: 0.5
    weight_decay: 0.000001
    log_intervals: 10
    eval_metric: 'accuracy'    
    n_splits: 5
```


In [4]:
# Set Config
class YamlConfigManager:
    def __init__(self, config_file_path='./config.yml', config_name='xception'):
        super().__init__()
        self.values = EasyDict()        
        if config_file_path:
            self.config_file_path = config_file_path
            self.config_name = config_name
            self.reload()
    
    def reload(self):
        self.clear()
        if self.config_file_path:
            with open(self.config_file_path, 'r') as f:
                self.values.update(yaml.safe_load(f)[self.config_name])

    def clear(self):
        self.values.clear()
    
    def update(self, yml_dict):
        for (k1, v1) in yml_dict.items():
            if isinstance(v1, dict):
                for (k2, v2) in v1.items():
                    if isinstance(v2, dict):
                        for (k3, v3) in v2.items():
                            self.values[k1][k2][k3] = v3
                    else:
                        self.values[k1][k2] = v2
            else:
                self.values[k1] = v1

    def export(self, save_file_path):
        if save_file_path:
            with open(save_file_path, 'w') as f:
                yaml.dump(dict(self.values), f)

In [5]:
cfg = YamlConfigManager()

SEED = cfg.values.seed
INPUT_DIR = cfg.values.input_dir
TRAIN_ONLY = cfg.values.train_only
IMAGE_SIZE = cfg.values.image_size
TRAIN_BATCH_SIZE = cfg.values.train_args.train_batch_size
VAL_BATCH_SIZE = cfg.values.train_args.val_batch_size

### SEED 고정

In [6]:
# Fix random seed
def seed_everything(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  # type: ignore
    torch.backends.cudnn.deterministic = True  # type: ignore
    torch.backends.cudnn.benchmark = True  # type: ignore

In [7]:
seed_everything(SEED)

### Load Data
- 만들어둔 csv 파일을 불러왔습니다.

### Augmentation
- HorizontalFlip() : TTA를 염두해두고 선택하였습니다.
- ToGray() : 그림 이미지라서 색조가 너무 다양할 것이라고 생각하여 gray scale에서도 feature를 잡을 수 있었으면 했습니다.
- Blur() : 그림 이미지 특성상 blurring된 효과가 많이 있을 것이라 생각하여 채택했습니다.
- Normalize RGB mean, std 값은 eda.ipynb를 통해 직접 계산했습니다.

In [8]:
# # Caculate mean and std

# # pixel count
# count = len(train_df) * 227 * 227

# # mean and std
# total_mean = psum / count
# total_var  = (psum_sq / count) - (total_mean ** 2)
# total_std  = torch.sqrt(total_var)

# # output
# print('mean: '  + str(total_mean))
# print('std:  '  + str(total_std))

In [9]:
whole_df = pd.read_csv(INPUT_DIR)
whole_label = whole_df['label'].values

train_transform = albumentations.Compose([
    albumentations.OneOf([
        albumentations.HorizontalFlip(),
        albumentations.ToGray(),
        albumentations.Blur()
    ]), 
    albumentations.Resize(IMAGE_SIZE, IMAGE_SIZE),
    albumentations.Normalize(mean=(0.4569, 0.5074, 0.5557), std=(0.2888, 0.2743, 0.2829)),
    albumentations.pytorch.transforms.ToTensorV2()])

val_transform = albumentations.Compose([
    albumentations.Resize(IMAGE_SIZE, IMAGE_SIZE),
    albumentations.Normalize(mean=(0.4569, 0.5074, 0.5557), std=(0.2888, 0.2743, 0.2829)),
    albumentations.pytorch.transforms.ToTensorV2()])

## Model
- timm library를 사용했습니다.
- NFNet, EfficientNet
    - kaggle ailen signal search 대회에 참가 중인데 nfnet과 efficientnet이 대체로 성능이 좋았습니다.
    - 첫 제출 당시에 NFNet을 사용했는데 1.0이 나와서 놀랐습니다. 
    - 문제가 쉬운 대회인 만큼 shake up이 심할거라 생각해서 다른 모델들도 테스트했지만 val score가 좋지 않았습니다.
    - LB score
        - efficientnet_b0 : `90.0`
        - nfnet_l0 : `100.0`
- Swin-transformer
    - naver boostcamp에서 competition 진행했었는데 detection 대회에서 swin-transformer를 사용해서 2등을 했습니다.
    - 당시에 성능이 좋았습니다.
    - 구조가 다른 모델끼리 앙상블할 경우에 generalized performance가 올라갈 것이라고 생각했습니다. 
    - LB score
        - swin-base-224-22k : `97.14`

In [10]:
class PretrainedModel(nn.Module):
    def __init__(self, model_arc='resnet18d', num_classes=7):
        super().__init__()
        self.net = timm.create_model(model_arc, pretrained=True, num_classes=num_classes)
    
    def forward(self, x):
        x = self.net(x)

        return x

### Utils
- 여러가지 유틸 함수들을 정의했습니다.

In [11]:
def get_dataloader(df, transform, batch_size, shuffle):
    dataset = ArtPaintDataset(df=df, transform=transform)

    loader = DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=shuffle
    )

    return loader

### CosineAnnealingWarmupRestarts
- gamma 비율로 감소하는 cosine annealing warmup restart scheduler가 lr 분석하기 제일 편했습니다.
- 경험에 의해서 선택했으며, wandb와 함께 사용할 경우 optimizer 분석에 매우 용이했습니다.

In [12]:
class CosineAnnealingWarmupRestarts(_LRScheduler):
    """
        optimizer (Optimizer): Wrapped optimizer.
        first_cycle_steps (int): First cycle step size.
        cycle_mult(float): Cycle steps magnification. Default: -1.
        max_lr(float): First cycle's max learning rate. Default: 0.1.
        min_lr(float): Min learning rate. Default: 0.001.
        warmup_steps(int): Linear warmup step size. Default: 0.
        gamma(float): Decrease rate of max learning rate by cycle. Default: 1.
        last_epoch (int): The index of last epoch. Default: -1.
    """
    
    def __init__(self,
                 optimizer : torch.optim.Optimizer,
                 first_cycle_steps : int,
                 cycle_mult : float = 1.,
                 max_lr : float = 0.1,
                 min_lr : float = 0.001,
                 warmup_steps : int = 0,
                 gamma : float = 1.,
                 last_epoch : int = -1
        ):
        assert warmup_steps < first_cycle_steps
        
        self.first_cycle_steps = first_cycle_steps # first cycle step size
        self.cycle_mult = cycle_mult # cycle steps magnification
        self.base_max_lr = max_lr # first max learning rate
        self.max_lr = max_lr # max learning rate in the current cycle
        self.min_lr = min_lr # min learning rate
        self.warmup_steps = warmup_steps # warmup step size
        self.gamma = gamma # decrease rate of max learning rate by cycle
        
        self.cur_cycle_steps = first_cycle_steps # first cycle step size
        self.cycle = 0 # cycle count
        self.step_in_cycle = last_epoch # step size of the current cycle
        
        super(CosineAnnealingWarmupRestarts, self).__init__(optimizer, last_epoch)
        
        # set learning rate min_lr
        self.init_lr()
    
    def init_lr(self):
        self.base_lrs = []
        for param_group in self.optimizer.param_groups:
            param_group['lr'] = self.min_lr
            self.base_lrs.append(self.min_lr)
    
    def get_lr(self):
        if self.step_in_cycle == -1:
            return self.base_lrs
        elif self.step_in_cycle < self.warmup_steps:
            return [(self.max_lr - base_lr)*self.step_in_cycle / self.warmup_steps + base_lr for base_lr in self.base_lrs]
        else:
            return [base_lr + (self.max_lr - base_lr) \
                    * (1 + math.cos(math.pi * (self.step_in_cycle-self.warmup_steps) \
                                    / (self.cur_cycle_steps - self.warmup_steps))) / 2
                    for base_lr in self.base_lrs]

    def step(self, epoch=None):
        if epoch is None:
            epoch = self.last_epoch + 1
            self.step_in_cycle = self.step_in_cycle + 1
            if self.step_in_cycle >= self.cur_cycle_steps:
                self.cycle += 1
                self.step_in_cycle = self.step_in_cycle - self.cur_cycle_steps
                self.cur_cycle_steps = int((self.cur_cycle_steps - self.warmup_steps) * self.cycle_mult) + self.warmup_steps
        else:
            if epoch >= self.first_cycle_steps:
                if self.cycle_mult == 1.:
                    self.step_in_cycle = epoch % self.first_cycle_steps
                    self.cycle = epoch // self.first_cycle_steps
                else:
                    n = int(math.log((epoch / self.first_cycle_steps * (self.cycle_mult - 1) + 1), self.cycle_mult))
                    self.cycle = n
                    self.step_in_cycle = epoch - int(self.first_cycle_steps * (self.cycle_mult ** n - 1) / (self.cycle_mult - 1))
                    self.cur_cycle_steps = self.first_cycle_steps * self.cycle_mult ** (n)
            else:
                self.cur_cycle_steps = self.first_cycle_steps
                self.step_in_cycle = epoch
                
        self.max_lr = self.base_max_lr * (self.gamma**self.cycle)
        self.last_epoch = math.floor(epoch)
        for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
            param_group['lr'] = lr

### CutMix
- Robust한 모델을 만들기 위해 선택한 방법입니다.
- 쉬운 문제에서 사용할 경우 성능이 많이 좋아졌었던 경험이 있습니다.
- 학습을 더욱 복잡하게 만들어서 generalized performance의 향상을 노렸습니다.
- NFNet에서는 사용하지 않았고, Swin Transformer에만 사용해서 앙상블했습니다.

In [13]:
def rand_bbox(size, lam):
    width = size[2]
    height = size[3]
    cut_ratio = np.sqrt(1. - lam)
    cut_width = np.int(width * cut_ratio)
    cut_height = np.int(height * cut_ratio)

    # uniform
    cx = np.random.randint(width)
    cy = np.random.randint(height)

    bbx1 = np.clip(cx - cut_width // 2, 0, width)
    bby1 = np.clip(cy - cut_height // 2, 0, height)
    bbx2 = np.clip(cx + cut_width // 2, 0, width)
    bby2 = np.clip(cy + cut_height // 2, 0, height)

    return bbx1, bby1, bbx2, bby2

In [14]:
class CutMix(object):
    def __init__(self, beta, cutmix_prob) -> None:
        super().__init__()
        self.beta = beta
        self.cutmix_prob = cutmix_prob 

    def forward(self, images, labels):
        # generate mixed sample
        lam = np.random.beta(self.beta, self.beta)
        rand_index = torch.randperm(images.size()[0]).cuda()
        label_1 = labels
        label_2 = labels[rand_index]
        bbx1, bby1, bbx2, bby2 = rand_bbox(images.size(), lam)
        images[:, :, bbx1:bbx2, bby1:bby2] = images[rand_index, :, bbx1:bbx2, bby1:bby2]

        # adjust lambda to exactly match pixel ratio
        lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (images.size()[-1] * images.size()[-2]))

        return {'lam' : lam, 'image' : images, 'label_1' : label_1, 'label_2' : label_2}

In [15]:
class ComputeMetric(object):
    def __init__(self, metric) -> None:
        super().__init__() 
        self.metric = metric    

    def cutmix_accuracy(self, logits, labels, topk=(1, 5)):
        max_k = max(topk)
        batch_size = labels.size(0)

        _, pred = logits.topk(max_k, 1, True, True)
        pred = pred.t()
        matches = pred.eq(labels.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            matches_k = matches[:k].reshape(-1).float().sum(0, keepdim=True)
            wrong_k = batch_size - matches_k
            res.append(matches_k.mul_(100.0 / batch_size))

        return res

    def compute(self, logits, labels, topk=(1, 5)):
        if self.metric == 'accuracy':
            out = self.cutmix_accuracy(logits=logits, labels=labels, topk=topk)

        return out

### 지표 계산을 위한 average meter

In [16]:
class AverageMeter(object):
    def __init__(self) -> None:
        super().__init__()
        self.reset()
    
    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

## Training
 - 5 Fold cross validation 사용
 - 모델 별로 lr, epoch 조절해가면서 다르게 학습했습니다.
 - EDA(eda.ipynb 파일 만들어서 진행) 해봤을 때 guitar와 person같이 class imbalance가 있어 stratified하게 train과 validation 분할 했습니다.

In [17]:
# import matplotlib as pyplot
# import seaborn as sns

# ax = sns.countplot(train_df['label'])

# for p, label in zip(ax.patches, label_list):
#     ax.annotate(label, (p.get_x(), p.get_height() + 0.15))

In [18]:
def train(cfg, k, train_loader, val_loader):
    # Set Config
    MODEL_ARC = cfg.values.model_arc
    OUTPUT_DIR = cfg.values.output_dir
    NUM_CLASSES = cfg.values.num_classes
    TRAIN_ONLY = cfg.values.train_only

    os.makedirs(OUTPUT_DIR, exist_ok=True)

    # Set train arguments
    num_epochs = cfg.values.train_args.num_epochs
    train_batch_size = cfg.values.train_args.train_batch_size
    log_intervals = cfg.values.train_args.log_intervals
    max_lr = cfg.values.train_args.max_lr
    min_lr = cfg.values.train_args.min_lr
    cycle = cfg.values.train_args.cycle
    gamma = cfg.values.train_args.gamma

    # Set CutMix arguments
    USE_CUTMIX = cfg.values.cutmix_args.use_cutmix    
    beta = cfg.values.cutmix_args.beta
    cutmix_prob = cfg.values.cutmix_args.cutmix_prob    
    cutmix = CutMix(beta=beta, cutmix_prob=cutmix_prob)
    
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    model = PretrainedModel(model_arc=MODEL_ARC, num_classes=NUM_CLASSES)
    model.to(device)
    if k < 2:
        summary_(model, (3, IMAGE_SIZE, IMAGE_SIZE), batch_size=train_batch_size)

    optimizer = MADGRAD(model.parameters(), lr=max_lr, weight_decay=cfg.values.train_args.weight_decay)
    first_cycle_steps = len(train_loader) * num_epochs // cycle
    scheduler = CosineAnnealingWarmupRestarts(
        optimizer, 
        first_cycle_steps=first_cycle_steps, 
        cycle_mult=1.0,
        max_lr=max_lr, 
        min_lr=min_lr, 
        warmup_steps=int(first_cycle_steps * 0.2), 
        gamma=gamma
    )
    criterion = nn.CrossEntropyLoss()

    eval_metric = ComputeMetric(cfg.values.train_args.eval_metric)
    best_acc = 0.
    os.makedirs(os.path.join(OUTPUT_DIR, MODEL_ARC), exist_ok=True)

    for epoch in range(num_epochs):
        model.train()

        loss_values = AverageMeter()
        top1 = AverageMeter()
        top5 = AverageMeter()

        for i, train_batch in enumerate(tqdm(train_loader, desc=f'Training')):
            sample = train_batch
            images = sample['image'].float().to(device)
            labels = sample['label'].long().to(device)

            ratio = np.random.rand(1)

            if USE_CUTMIX:
                if beta > 0 and ratio < cutmix_prob:
                    # generate mixed sample
                    sample = cutmix.forward(images, labels)

                    logits = model(sample['image'])                    
                    loss = criterion(logits, sample['label_1']) * sample['lam'] + criterion(logits, sample['label_2']) * (1. - sample['lam'])
                else:
                    logits = model(images)
                    loss = criterion(logits, labels)
            else:
                logits = model(images)
                loss = criterion(logits, labels)

            # measure evaluation metric and record loss
            top1_err, top5_err = eval_metric.compute(logits.data, labels, topk=(1, 5))

            loss_values.update(loss.item(), images.size(0))
            top1.update(top1_err.item(), images.size(0))
            top5.update(top5_err.item(), images.size(0))

            # compute gradient and do optimizer step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            scheduler.step()

            if i % log_intervals == 0:
                current_lr = scheduler.get_lr()[0]
                tqdm.write(f'Epoch : [{epoch + 1}/{num_epochs}][{i}/{len(train_loader)}] || '
                           f'LR : {current_lr:.5f} || '
                           f'Train Loss : {loss_values.val:.4f} ({loss_values.avg:.4f}) || '                        
                           f'Train Top 1-acc : {top1.val:.3f}% ({top1.avg:.3f})% || '
                           f'Train Top 5-acc : {top5.val:.3f}% ({top5.avg:.3f})%')

        loss_values = AverageMeter()
        top1 = AverageMeter()
        top5 = AverageMeter()
        
        if not TRAIN_ONLY:
            with torch.no_grad():
                model.eval()

                loss_values = AverageMeter()
                top1 = AverageMeter()
                top5 = AverageMeter()

                for i, val_batch in enumerate(tqdm(val_loader, desc=f'Validation')):
                    sample = val_batch
                    images = sample['image'].float().to(device)
                    labels = sample['label'].long().to(device)

                    logits = model(images)
                    loss = criterion(logits, labels)
                    preds = torch.argmax(logits, -1)

                    top1_err, top5_err = eval_metric.compute(logits.data, labels, topk=(1, 5))
                    loss_values.update(loss.item(), images.size(0))
                    top1.update(top1_err.item(), images.size(0))
                    top5.update(top5_err.item(), images.size(0))

            tqdm.write(f'Epoch : [{epoch + 1}/{num_epochs}] || '
                       f'Val Loss : {loss_values.avg:.4f} || '                        
                       f'Val Top 1-acc : {top1.avg:.3f}% || '
                       f'Val Top 5-acc : {top5.avg:.3f}%')

            is_best = top1.avg >= best_acc
            best_acc = max(top1.avg, best_acc)

            if is_best:
                if k > 0:
                    os.makedirs(os.path.join(OUTPUT_DIR, MODEL_ARC, f'{k}_fold'), exist_ok=True)
                    torch.save(model.state_dict(), os.path.join(OUTPUT_DIR, MODEL_ARC, f'{k}_fold', f'{epoch + 1}_epoch_{best_acc:.2f}%_with_val.pth'))
                else:                    
                    torch.save(model.state_dict(), os.path.join(OUTPUT_DIR, MODEL_ARC, f'{epoch + 1}_epoch_{best_acc:.2f}%_with_val.pth'))
        
        else:
            torch.save(model.state_dict(), os.path.join(OUTPUT_DIR, MODEL_ARC, f'_{epoch + 1}_epoch_{top1.avg:.2f}%_only_train.pth'))

In [19]:
kfold = StratifiedKFold(n_splits=cfg.values.train_args.n_splits)
k = 1
for train_idx, val_idx in kfold.split(whole_df, whole_label):
    print('\n')
    cpprint('=' * 15 + f'{k}-Fold Cross Validation' + '=' * 15)
    train_df = whole_df.iloc[train_idx]
    val_df = whole_df.iloc[val_idx]

    train_loader = get_dataloader(df=train_df, transform=train_transform, batch_size=TRAIN_BATCH_SIZE, shuffle=True)
    val_loader = get_dataloader(df=val_df, transform=val_transform, batch_size=VAL_BATCH_SIZE, shuffle=False)

    train(cfg, k, train_loader, val_loader)

    k += 1



----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [32, 32, 113, 113]             864
       BatchNorm2d-2         [32, 32, 113, 113]              64
              ReLU-3         [32, 32, 113, 113]               0
            Conv2d-4         [32, 64, 111, 111]          18,432
       BatchNorm2d-5         [32, 64, 111, 111]             128
              ReLU-6         [32, 64, 111, 111]               0
            Conv2d-7         [32, 64, 111, 111]             576
            Conv2d-8        [32, 128, 111, 111]           8,192
   SeparableConv2d-9        [32, 128, 111, 111]               0
      BatchNorm2d-10        [32, 128, 111, 111]             256
             ReLU-11        [32, 128, 111, 111]               0
           Conv2d-12        [32, 128, 111, 111]           1,152
           Conv2d-13        [32, 128, 111, 111]          16,384
  SeparableConv2d-14        [32, 128,

Training:   0%|          | 0/43 [00:00<?, ?it/s]

Epoch : [1/5][0/43] || LR : 0.00002 || Train Loss : 1.9203 (1.9203) || Train Top 1-acc : 21.875% (21.875)% || Train Top 5-acc : 81.250% (81.250)%
Epoch : [1/5][10/43] || LR : 0.00008 || Train Loss : 1.8480 (1.9122) || Train Top 1-acc : 37.500% (23.864)% || Train Top 5-acc : 90.625% (80.682)%
Epoch : [1/5][20/43] || LR : 0.00010 || Train Loss : 1.5186 (1.8257) || Train Top 1-acc : 50.000% (29.762)% || Train Top 5-acc : 96.875% (85.714)%
Epoch : [1/5][30/43] || LR : 0.00008 || Train Loss : 1.1210 (1.6956) || Train Top 1-acc : 75.000% (37.399)% || Train Top 5-acc : 96.875% (89.012)%
Epoch : [1/5][40/43] || LR : 0.00006 || Train Loss : 0.7559 (1.5560) || Train Top 1-acc : 84.375% (44.512)% || Train Top 5-acc : 96.875% (91.159)%


Validation:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch : [1/5] || Val Loss : 0.4834 || Val Top 1-acc : 92.647% || Val Top 5-acc : 100.000%


Training:   0%|          | 0/43 [00:00<?, ?it/s]

Epoch : [2/5][0/43] || LR : 0.00005 || Train Loss : 0.7675 (0.7675) || Train Top 1-acc : 78.125% (78.125)% || Train Top 5-acc : 100.000% (100.000)%
Epoch : [2/5][10/43] || LR : 0.00003 || Train Loss : 0.5338 (1.0495) || Train Top 1-acc : 87.500% (69.318)% || Train Top 5-acc : 100.000% (95.739)%
Epoch : [2/5][20/43] || LR : 0.00001 || Train Loss : 0.7972 (1.0243) || Train Top 1-acc : 87.500% (69.345)% || Train Top 5-acc : 100.000% (95.982)%
Epoch : [2/5][30/43] || LR : 0.00002 || Train Loss : 1.0051 (0.9466) || Train Top 1-acc : 90.625% (71.169)% || Train Top 5-acc : 100.000% (96.169)%
Epoch : [2/5][40/43] || LR : 0.00005 || Train Loss : 0.9586 (0.8539) || Train Top 1-acc : 87.500% (76.143)% || Train Top 5-acc : 100.000% (96.951)%


Validation:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch : [2/5] || Val Loss : 0.3082 || Val Top 1-acc : 96.176% || Val Top 5-acc : 100.000%


Training:   0%|          | 0/43 [00:00<?, ?it/s]

Epoch : [3/5][0/43] || LR : 0.00005 || Train Loss : 0.3802 (0.3802) || Train Top 1-acc : 87.500% (87.500)% || Train Top 5-acc : 96.875% (96.875)%
Epoch : [3/5][10/43] || LR : 0.00005 || Train Loss : 0.2869 (0.6715) || Train Top 1-acc : 96.875% (75.852)% || Train Top 5-acc : 100.000% (97.443)%
Epoch : [3/5][20/43] || LR : 0.00004 || Train Loss : 0.5296 (0.7534) || Train Top 1-acc : 93.750% (76.786)% || Train Top 5-acc : 100.000% (97.917)%
Epoch : [3/5][30/43] || LR : 0.00003 || Train Loss : 0.2400 (0.7004) || Train Top 1-acc : 100.000% (80.847)% || Train Top 5-acc : 100.000% (98.488)%
Epoch : [3/5][40/43] || LR : 0.00002 || Train Loss : 1.1686 (0.7120) || Train Top 1-acc : 43.750% (79.116)% || Train Top 5-acc : 90.625% (98.095)%


Validation:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch : [3/5] || Val Loss : 0.2482 || Val Top 1-acc : 96.765% || Val Top 5-acc : 100.000%


Training:   0%|          | 0/43 [00:00<?, ?it/s]

Epoch : [4/5][0/43] || LR : 0.00001 || Train Loss : 0.9188 (0.9188) || Train Top 1-acc : 31.250% (31.250)% || Train Top 5-acc : 87.500% (87.500)%
Epoch : [4/5][10/43] || LR : 0.00001 || Train Loss : 0.1865 (0.6020) || Train Top 1-acc : 96.875% (82.955)% || Train Top 5-acc : 100.000% (98.580)%
Epoch : [4/5][20/43] || LR : 0.00002 || Train Loss : 0.2276 (0.6334) || Train Top 1-acc : 100.000% (82.887)% || Train Top 5-acc : 100.000% (97.321)%
Epoch : [4/5][30/43] || LR : 0.00002 || Train Loss : 1.5387 (0.7353) || Train Top 1-acc : 25.000% (77.621)% || Train Top 5-acc : 93.750% (97.379)%
Epoch : [4/5][40/43] || LR : 0.00002 || Train Loss : 0.9877 (0.7028) || Train Top 1-acc : 87.500% (78.659)% || Train Top 5-acc : 100.000% (97.409)%


Validation:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch : [4/5] || Val Loss : 0.2496 || Val Top 1-acc : 97.059% || Val Top 5-acc : 100.000%


Training:   0%|          | 0/43 [00:00<?, ?it/s]

Epoch : [5/5][0/43] || LR : 0.00002 || Train Loss : 1.1415 (1.1415) || Train Top 1-acc : 71.875% (71.875)% || Train Top 5-acc : 93.750% (93.750)%
Epoch : [5/5][10/43] || LR : 0.00002 || Train Loss : 0.3550 (0.6978) || Train Top 1-acc : 100.000% (77.841)% || Train Top 5-acc : 100.000% (94.602)%
Epoch : [5/5][20/43] || LR : 0.00001 || Train Loss : 0.2111 (0.6255) || Train Top 1-acc : 100.000% (83.631)% || Train Top 5-acc : 100.000% (96.875)%
Epoch : [5/5][30/43] || LR : 0.00001 || Train Loss : 0.1992 (0.6200) || Train Top 1-acc : 96.875% (83.972)% || Train Top 5-acc : 100.000% (97.379)%
Epoch : [5/5][40/43] || LR : 0.00001 || Train Loss : 0.0931 (0.5607) || Train Top 1-acc : 100.000% (86.662)% || Train Top 5-acc : 100.000% (98.018)%


Validation:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch : [5/5] || Val Loss : 0.1927 || Val Top 1-acc : 97.353% || Val Top 5-acc : 100.000%




Training:   0%|          | 0/43 [00:00<?, ?it/s]

Epoch : [1/5][0/43] || LR : 0.00002 || Train Loss : 1.9223 (1.9223) || Train Top 1-acc : 21.875% (21.875)% || Train Top 5-acc : 81.250% (81.250)%
Epoch : [1/5][10/43] || LR : 0.00008 || Train Loss : 1.8392 (1.9115) || Train Top 1-acc : 34.375% (21.307)% || Train Top 5-acc : 87.500% (81.818)%
Epoch : [1/5][20/43] || LR : 0.00010 || Train Loss : 1.6471 (1.8243) || Train Top 1-acc : 37.500% (28.125)% || Train Top 5-acc : 96.875% (87.054)%
Epoch : [1/5][30/43] || LR : 0.00008 || Train Loss : 1.1576 (1.7274) || Train Top 1-acc : 71.875% (33.065)% || Train Top 5-acc : 100.000% (88.609)%
Epoch : [1/5][40/43] || LR : 0.00006 || Train Loss : 0.8240 (1.6136) || Train Top 1-acc : 81.250% (40.473)% || Train Top 5-acc : 100.000% (90.244)%


Validation:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch : [1/5] || Val Loss : 0.4950 || Val Top 1-acc : 92.647% || Val Top 5-acc : 100.000%


Training:   0%|          | 0/43 [00:00<?, ?it/s]

Epoch : [2/5][0/43] || LR : 0.00005 || Train Loss : 0.7855 (0.7855) || Train Top 1-acc : 84.375% (84.375)% || Train Top 5-acc : 100.000% (100.000)%
Epoch : [2/5][10/43] || LR : 0.00003 || Train Loss : 0.5936 (0.7464) || Train Top 1-acc : 93.750% (78.693)% || Train Top 5-acc : 100.000% (97.159)%
Epoch : [2/5][20/43] || LR : 0.00001 || Train Loss : 0.5717 (0.8689) || Train Top 1-acc : 87.500% (78.274)% || Train Top 5-acc : 100.000% (97.173)%
Epoch : [2/5][30/43] || LR : 0.00002 || Train Loss : 0.6279 (0.8218) || Train Top 1-acc : 78.125% (78.629)% || Train Top 5-acc : 100.000% (97.379)%
Epoch : [2/5][40/43] || LR : 0.00005 || Train Loss : 1.3799 (0.8128) || Train Top 1-acc : 56.250% (79.116)% || Train Top 5-acc : 96.875% (97.713)%


Validation:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch : [2/5] || Val Loss : 0.2858 || Val Top 1-acc : 96.176% || Val Top 5-acc : 100.000%


Training:   0%|          | 0/43 [00:00<?, ?it/s]

Epoch : [3/5][0/43] || LR : 0.00005 || Train Loss : 0.3456 (0.3456) || Train Top 1-acc : 93.750% (93.750)% || Train Top 5-acc : 100.000% (100.000)%
Epoch : [3/5][10/43] || LR : 0.00005 || Train Loss : 1.4133 (0.9264) || Train Top 1-acc : 40.625% (69.602)% || Train Top 5-acc : 87.500% (96.023)%
Epoch : [3/5][20/43] || LR : 0.00004 || Train Loss : 0.8683 (0.7701) || Train Top 1-acc : 93.750% (79.018)% || Train Top 5-acc : 96.875% (97.321)%
Epoch : [3/5][30/43] || LR : 0.00003 || Train Loss : 0.3017 (0.7191) || Train Top 1-acc : 100.000% (79.536)% || Train Top 5-acc : 100.000% (97.581)%
Epoch : [3/5][40/43] || LR : 0.00002 || Train Loss : 0.3100 (0.6492) || Train Top 1-acc : 93.750% (82.470)% || Train Top 5-acc : 100.000% (98.018)%


Validation:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch : [3/5] || Val Loss : 0.1768 || Val Top 1-acc : 97.647% || Val Top 5-acc : 99.706%


Training:   0%|          | 0/43 [00:00<?, ?it/s]

Epoch : [4/5][0/43] || LR : 0.00001 || Train Loss : 1.3125 (1.3125) || Train Top 1-acc : 40.625% (40.625)% || Train Top 5-acc : 100.000% (100.000)%
Epoch : [4/5][10/43] || LR : 0.00001 || Train Loss : 1.5986 (0.7030) || Train Top 1-acc : 31.250% (72.443)% || Train Top 5-acc : 90.625% (97.443)%
Epoch : [4/5][20/43] || LR : 0.00002 || Train Loss : 0.7299 (0.6687) || Train Top 1-acc : 90.625% (81.399)% || Train Top 5-acc : 100.000% (98.512)%
Epoch : [4/5][30/43] || LR : 0.00002 || Train Loss : 0.1231 (0.7180) || Train Top 1-acc : 100.000% (79.335)% || Train Top 5-acc : 100.000% (97.984)%
Epoch : [4/5][40/43] || LR : 0.00002 || Train Loss : 0.1234 (0.7164) || Train Top 1-acc : 100.000% (77.058)% || Train Top 5-acc : 100.000% (97.942)%


Validation:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch : [4/5] || Val Loss : 0.2127 || Val Top 1-acc : 97.647% || Val Top 5-acc : 100.000%


Training:   0%|          | 0/43 [00:00<?, ?it/s]

Epoch : [5/5][0/43] || LR : 0.00002 || Train Loss : 1.1118 (1.1118) || Train Top 1-acc : 71.875% (71.875)% || Train Top 5-acc : 100.000% (100.000)%
Epoch : [5/5][10/43] || LR : 0.00002 || Train Loss : 0.7215 (0.7230) || Train Top 1-acc : 93.750% (79.830)% || Train Top 5-acc : 100.000% (96.875)%
Epoch : [5/5][20/43] || LR : 0.00001 || Train Loss : 0.1403 (0.5305) || Train Top 1-acc : 100.000% (87.054)% || Train Top 5-acc : 100.000% (98.363)%
Epoch : [5/5][30/43] || LR : 0.00001 || Train Loss : 0.1886 (0.5688) || Train Top 1-acc : 96.875% (86.391)% || Train Top 5-acc : 100.000% (97.984)%
Epoch : [5/5][40/43] || LR : 0.00001 || Train Loss : 1.3608 (0.6139) || Train Top 1-acc : 37.500% (84.604)% || Train Top 5-acc : 96.875% (97.942)%


Validation:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch : [5/5] || Val Loss : 0.1855 || Val Top 1-acc : 97.941% || Val Top 5-acc : 100.000%




Training:   0%|          | 0/43 [00:00<?, ?it/s]

Epoch : [1/5][0/43] || LR : 0.00002 || Train Loss : 1.9670 (1.9670) || Train Top 1-acc : 15.625% (15.625)% || Train Top 5-acc : 68.750% (68.750)%
Epoch : [1/5][10/43] || LR : 0.00008 || Train Loss : 1.8559 (1.9282) || Train Top 1-acc : 34.375% (16.761)% || Train Top 5-acc : 87.500% (75.852)%
Epoch : [1/5][20/43] || LR : 0.00010 || Train Loss : 1.8224 (1.8658) || Train Top 1-acc : 31.250% (25.446)% || Train Top 5-acc : 87.500% (82.589)%
Epoch : [1/5][30/43] || LR : 0.00008 || Train Loss : 1.1161 (1.7131) || Train Top 1-acc : 75.000% (38.105)% || Train Top 5-acc : 90.625% (85.786)%
Epoch : [1/5][40/43] || LR : 0.00006 || Train Loss : 1.5691 (1.5651) || Train Top 1-acc : 43.750% (45.351)% || Train Top 5-acc : 90.625% (88.796)%


Validation:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch : [1/5] || Val Loss : 0.5015 || Val Top 1-acc : 92.353% || Val Top 5-acc : 99.706%


Training:   0%|          | 0/43 [00:00<?, ?it/s]

Epoch : [2/5][0/43] || LR : 0.00005 || Train Loss : 0.6781 (0.6781) || Train Top 1-acc : 87.500% (87.500)% || Train Top 5-acc : 100.000% (100.000)%
Epoch : [2/5][10/43] || LR : 0.00003 || Train Loss : 0.4105 (0.7941) || Train Top 1-acc : 90.625% (80.114)% || Train Top 5-acc : 100.000% (98.011)%
Epoch : [2/5][20/43] || LR : 0.00001 || Train Loss : 0.5167 (0.8333) || Train Top 1-acc : 87.500% (79.167)% || Train Top 5-acc : 100.000% (97.321)%
Epoch : [2/5][30/43] || LR : 0.00002 || Train Loss : 1.2400 (0.8937) || Train Top 1-acc : 65.625% (75.605)% || Train Top 5-acc : 100.000% (96.371)%
Epoch : [2/5][40/43] || LR : 0.00005 || Train Loss : 0.9813 (0.8106) || Train Top 1-acc : 87.500% (79.192)% || Train Top 5-acc : 100.000% (97.104)%


Validation:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch : [2/5] || Val Loss : 0.3283 || Val Top 1-acc : 95.294% || Val Top 5-acc : 100.000%


Training:   0%|          | 0/43 [00:00<?, ?it/s]

Epoch : [3/5][0/43] || LR : 0.00005 || Train Loss : 0.2580 (0.2580) || Train Top 1-acc : 96.875% (96.875)% || Train Top 5-acc : 100.000% (100.000)%
Epoch : [3/5][10/43] || LR : 0.00005 || Train Loss : 1.2090 (0.7323) || Train Top 1-acc : 68.750% (80.398)% || Train Top 5-acc : 96.875% (95.739)%
Epoch : [3/5][20/43] || LR : 0.00004 || Train Loss : 0.3418 (0.7781) || Train Top 1-acc : 90.625% (75.893)% || Train Top 5-acc : 100.000% (95.387)%
Epoch : [3/5][30/43] || LR : 0.00003 || Train Loss : 0.3173 (0.7079) || Train Top 1-acc : 96.875% (78.427)% || Train Top 5-acc : 100.000% (95.968)%
Epoch : [3/5][40/43] || LR : 0.00002 || Train Loss : 0.4002 (0.6562) || Train Top 1-acc : 84.375% (80.107)% || Train Top 5-acc : 100.000% (96.875)%


Validation:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch : [3/5] || Val Loss : 0.2573 || Val Top 1-acc : 95.588% || Val Top 5-acc : 100.000%


Training:   0%|          | 0/43 [00:00<?, ?it/s]

Epoch : [4/5][0/43] || LR : 0.00001 || Train Loss : 0.2818 (0.2818) || Train Top 1-acc : 93.750% (93.750)% || Train Top 5-acc : 100.000% (100.000)%
Epoch : [4/5][10/43] || LR : 0.00001 || Train Loss : 0.1384 (0.6770) || Train Top 1-acc : 100.000% (80.114)% || Train Top 5-acc : 100.000% (99.432)%
Epoch : [4/5][20/43] || LR : 0.00002 || Train Loss : 1.3945 (0.7529) || Train Top 1-acc : 15.625% (73.958)% || Train Top 5-acc : 93.750% (98.214)%
Epoch : [4/5][30/43] || LR : 0.00002 || Train Loss : 1.1996 (0.7127) || Train Top 1-acc : 71.875% (75.605)% || Train Top 5-acc : 100.000% (97.782)%
Epoch : [4/5][40/43] || LR : 0.00002 || Train Loss : 1.1492 (0.7329) || Train Top 1-acc : 65.625% (76.372)% || Train Top 5-acc : 100.000% (97.409)%


Validation:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch : [4/5] || Val Loss : 0.2539 || Val Top 1-acc : 96.176% || Val Top 5-acc : 100.000%


Training:   0%|          | 0/43 [00:00<?, ?it/s]

Epoch : [5/5][0/43] || LR : 0.00002 || Train Loss : 0.1440 (0.1440) || Train Top 1-acc : 100.000% (100.000)% || Train Top 5-acc : 100.000% (100.000)%
Epoch : [5/5][10/43] || LR : 0.00002 || Train Loss : 0.1117 (0.4564) || Train Top 1-acc : 100.000% (90.625)% || Train Top 5-acc : 100.000% (98.864)%
Epoch : [5/5][20/43] || LR : 0.00001 || Train Loss : 1.3803 (0.5122) || Train Top 1-acc : 31.250% (85.417)% || Train Top 5-acc : 81.250% (97.917)%
Epoch : [5/5][30/43] || LR : 0.00001 || Train Loss : 0.1066 (0.4303) || Train Top 1-acc : 100.000% (87.298)% || Train Top 5-acc : 100.000% (98.387)%
Epoch : [5/5][40/43] || LR : 0.00001 || Train Loss : 0.0954 (0.4079) || Train Top 1-acc : 100.000% (88.643)% || Train Top 5-acc : 100.000% (98.323)%


Validation:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch : [5/5] || Val Loss : 0.1906 || Val Top 1-acc : 96.765% || Val Top 5-acc : 100.000%




Training:   0%|          | 0/43 [00:00<?, ?it/s]

Epoch : [1/5][0/43] || LR : 0.00002 || Train Loss : 1.9807 (1.9807) || Train Top 1-acc : 6.250% (6.250)% || Train Top 5-acc : 62.500% (62.500)%
Epoch : [1/5][10/43] || LR : 0.00008 || Train Loss : 1.8926 (1.9440) || Train Top 1-acc : 21.875% (14.489)% || Train Top 5-acc : 96.875% (75.000)%
Epoch : [1/5][20/43] || LR : 0.00010 || Train Loss : 1.6877 (1.8705) || Train Top 1-acc : 53.125% (26.637)% || Train Top 5-acc : 90.625% (83.333)%
Epoch : [1/5][30/43] || LR : 0.00008 || Train Loss : 1.4514 (1.7214) || Train Top 1-acc : 34.375% (37.802)% || Train Top 5-acc : 87.500% (87.198)%
Epoch : [1/5][40/43] || LR : 0.00006 || Train Loss : 0.9629 (1.5835) || Train Top 1-acc : 78.125% (44.665)% || Train Top 5-acc : 100.000% (88.872)%


Validation:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch : [1/5] || Val Loss : 0.5897 || Val Top 1-acc : 89.086% || Val Top 5-acc : 99.115%


Training:   0%|          | 0/43 [00:00<?, ?it/s]

Epoch : [2/5][0/43] || LR : 0.00005 || Train Loss : 1.3016 (1.3016) || Train Top 1-acc : 15.625% (15.625)% || Train Top 5-acc : 87.500% (87.500)%
Epoch : [2/5][10/43] || LR : 0.00003 || Train Loss : 0.4033 (0.8551) || Train Top 1-acc : 90.625% (77.273)% || Train Top 5-acc : 100.000% (98.011)%
Epoch : [2/5][20/43] || LR : 0.00001 || Train Loss : 0.7328 (0.8203) || Train Top 1-acc : 100.000% (75.744)% || Train Top 5-acc : 100.000% (97.321)%
Epoch : [2/5][30/43] || LR : 0.00002 || Train Loss : 1.2078 (0.7729) || Train Top 1-acc : 40.625% (78.125)% || Train Top 5-acc : 100.000% (98.085)%
Epoch : [2/5][40/43] || LR : 0.00005 || Train Loss : 0.3194 (0.7712) || Train Top 1-acc : 96.875% (78.277)% || Train Top 5-acc : 100.000% (97.866)%


Validation:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch : [2/5] || Val Loss : 0.3965 || Val Top 1-acc : 90.560% || Val Top 5-acc : 99.115%


Training:   0%|          | 0/43 [00:00<?, ?it/s]

Epoch : [3/5][0/43] || LR : 0.00005 || Train Loss : 0.3515 (0.3515) || Train Top 1-acc : 90.625% (90.625)% || Train Top 5-acc : 100.000% (100.000)%
Epoch : [3/5][10/43] || LR : 0.00005 || Train Loss : 1.0856 (0.5254) || Train Top 1-acc : 93.750% (90.909)% || Train Top 5-acc : 96.875% (99.148)%
Epoch : [3/5][20/43] || LR : 0.00004 || Train Loss : 0.3155 (0.5493) || Train Top 1-acc : 93.750% (88.839)% || Train Top 5-acc : 100.000% (98.214)%
Epoch : [3/5][30/43] || LR : 0.00003 || Train Loss : 0.1985 (0.4975) || Train Top 1-acc : 93.750% (90.020)% || Train Top 5-acc : 100.000% (98.790)%
Epoch : [3/5][40/43] || LR : 0.00002 || Train Loss : 0.1638 (0.5185) || Train Top 1-acc : 96.875% (89.024)% || Train Top 5-acc : 100.000% (98.476)%


Validation:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch : [3/5] || Val Loss : 0.3052 || Val Top 1-acc : 92.920% || Val Top 5-acc : 99.705%


Training:   0%|          | 0/43 [00:00<?, ?it/s]

Epoch : [4/5][0/43] || LR : 0.00001 || Train Loss : 0.3282 (0.3282) || Train Top 1-acc : 96.875% (96.875)% || Train Top 5-acc : 100.000% (100.000)%
Epoch : [4/5][10/43] || LR : 0.00001 || Train Loss : 1.2023 (0.8562) || Train Top 1-acc : 71.875% (70.455)% || Train Top 5-acc : 100.000% (97.159)%
Epoch : [4/5][20/43] || LR : 0.00002 || Train Loss : 0.2844 (0.6832) || Train Top 1-acc : 93.750% (79.762)% || Train Top 5-acc : 100.000% (98.363)%
Epoch : [4/5][30/43] || LR : 0.00002 || Train Loss : 0.1341 (0.6180) || Train Top 1-acc : 96.875% (82.056)% || Train Top 5-acc : 100.000% (98.387)%
Epoch : [4/5][40/43] || LR : 0.00002 || Train Loss : 0.7878 (0.6376) || Train Top 1-acc : 87.500% (80.869)% || Train Top 5-acc : 100.000% (98.171)%


Validation:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch : [4/5] || Val Loss : 0.2939 || Val Top 1-acc : 94.100% || Val Top 5-acc : 99.705%


Training:   0%|          | 0/43 [00:00<?, ?it/s]

Epoch : [5/5][0/43] || LR : 0.00002 || Train Loss : 0.7430 (0.7430) || Train Top 1-acc : 96.875% (96.875)% || Train Top 5-acc : 100.000% (100.000)%
Epoch : [5/5][10/43] || LR : 0.00002 || Train Loss : 1.2862 (0.7751) || Train Top 1-acc : 34.375% (79.830)% || Train Top 5-acc : 90.625% (98.864)%
Epoch : [5/5][20/43] || LR : 0.00001 || Train Loss : 1.4696 (0.6656) || Train Top 1-acc : 28.125% (80.655)% || Train Top 5-acc : 90.625% (98.363)%
Epoch : [5/5][30/43] || LR : 0.00001 || Train Loss : 0.1067 (0.5903) || Train Top 1-acc : 100.000% (80.242)% || Train Top 5-acc : 100.000% (97.581)%
Epoch : [5/5][40/43] || LR : 0.00001 || Train Loss : 0.2489 (0.5192) || Train Top 1-acc : 93.750% (83.994)% || Train Top 5-acc : 100.000% (98.171)%


Validation:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch : [5/5] || Val Loss : 0.2686 || Val Top 1-acc : 94.985% || Val Top 5-acc : 99.705%




Training:   0%|          | 0/43 [00:00<?, ?it/s]

Epoch : [1/5][0/43] || LR : 0.00002 || Train Loss : 1.9370 (1.9370) || Train Top 1-acc : 21.875% (21.875)% || Train Top 5-acc : 68.750% (68.750)%
Epoch : [1/5][10/43] || LR : 0.00008 || Train Loss : 1.9181 (1.9284) || Train Top 1-acc : 28.125% (18.750)% || Train Top 5-acc : 81.250% (76.705)%
Epoch : [1/5][20/43] || LR : 0.00010 || Train Loss : 1.7258 (1.8563) || Train Top 1-acc : 46.875% (28.720)% || Train Top 5-acc : 96.875% (83.482)%
Epoch : [1/5][30/43] || LR : 0.00008 || Train Loss : 0.8337 (1.7009) || Train Top 1-acc : 90.625% (40.524)% || Train Top 5-acc : 100.000% (87.601)%
Epoch : [1/5][40/43] || LR : 0.00006 || Train Loss : 0.7957 (1.5443) || Train Top 1-acc : 81.250% (48.018)% || Train Top 5-acc : 100.000% (89.939)%


Validation:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch : [1/5] || Val Loss : 0.6502 || Val Top 1-acc : 82.596% || Val Top 5-acc : 98.525%


Training:   0%|          | 0/43 [00:00<?, ?it/s]

Epoch : [2/5][0/43] || LR : 0.00005 || Train Loss : 0.5541 (0.5541) || Train Top 1-acc : 90.625% (90.625)% || Train Top 5-acc : 100.000% (100.000)%
Epoch : [2/5][10/43] || LR : 0.00003 || Train Loss : 0.3884 (0.7828) || Train Top 1-acc : 93.750% (78.977)% || Train Top 5-acc : 100.000% (96.591)%
Epoch : [2/5][20/43] || LR : 0.00001 || Train Loss : 1.2102 (0.7854) || Train Top 1-acc : 81.250% (79.315)% || Train Top 5-acc : 96.875% (96.875)%
Epoch : [2/5][30/43] || LR : 0.00002 || Train Loss : 1.0425 (0.8321) || Train Top 1-acc : 81.250% (75.101)% || Train Top 5-acc : 100.000% (95.968)%
Epoch : [2/5][40/43] || LR : 0.00005 || Train Loss : 1.5090 (0.8408) || Train Top 1-acc : 28.125% (76.143)% || Train Top 5-acc : 84.375% (96.341)%


Validation:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch : [2/5] || Val Loss : 0.4726 || Val Top 1-acc : 91.445% || Val Top 5-acc : 99.705%


Training:   0%|          | 0/43 [00:00<?, ?it/s]

Epoch : [3/5][0/43] || LR : 0.00005 || Train Loss : 0.2571 (0.2571) || Train Top 1-acc : 100.000% (100.000)% || Train Top 5-acc : 100.000% (100.000)%
Epoch : [3/5][10/43] || LR : 0.00005 || Train Loss : 0.1653 (0.4482) || Train Top 1-acc : 100.000% (85.795)% || Train Top 5-acc : 100.000% (98.580)%
Epoch : [3/5][20/43] || LR : 0.00004 || Train Loss : 0.4610 (0.4116) || Train Top 1-acc : 96.875% (90.476)% || Train Top 5-acc : 100.000% (99.107)%
Epoch : [3/5][30/43] || LR : 0.00003 || Train Loss : 1.0448 (0.5106) || Train Top 1-acc : 84.375% (86.290)% || Train Top 5-acc : 100.000% (98.387)%
Epoch : [3/5][40/43] || LR : 0.00002 || Train Loss : 0.5503 (0.5365) || Train Top 1-acc : 90.625% (85.366)% || Train Top 5-acc : 100.000% (98.399)%


Validation:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch : [3/5] || Val Loss : 0.4061 || Val Top 1-acc : 92.330% || Val Top 5-acc : 99.115%


Training:   0%|          | 0/43 [00:00<?, ?it/s]

Epoch : [4/5][0/43] || LR : 0.00001 || Train Loss : 0.0869 (0.0869) || Train Top 1-acc : 100.000% (100.000)% || Train Top 5-acc : 100.000% (100.000)%
Epoch : [4/5][10/43] || LR : 0.00001 || Train Loss : 1.1783 (0.7143) || Train Top 1-acc : 71.875% (78.693)% || Train Top 5-acc : 100.000% (96.875)%
Epoch : [4/5][20/43] || LR : 0.00002 || Train Loss : 0.1454 (0.6725) || Train Top 1-acc : 100.000% (77.083)% || Train Top 5-acc : 100.000% (96.875)%
Epoch : [4/5][30/43] || LR : 0.00002 || Train Loss : 0.3005 (0.7633) || Train Top 1-acc : 93.750% (72.480)% || Train Top 5-acc : 100.000% (96.371)%
Epoch : [4/5][40/43] || LR : 0.00002 || Train Loss : 0.8749 (0.7099) || Train Top 1-acc : 90.625% (75.000)% || Train Top 5-acc : 96.875% (96.570)%


Validation:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch : [4/5] || Val Loss : 0.3896 || Val Top 1-acc : 92.035% || Val Top 5-acc : 99.410%


Training:   0%|          | 0/43 [00:00<?, ?it/s]

Epoch : [5/5][0/43] || LR : 0.00002 || Train Loss : 0.9653 (0.9653) || Train Top 1-acc : 84.375% (84.375)% || Train Top 5-acc : 100.000% (100.000)%
Epoch : [5/5][10/43] || LR : 0.00002 || Train Loss : 0.9112 (0.6103) || Train Top 1-acc : 87.500% (77.841)% || Train Top 5-acc : 100.000% (99.148)%
Epoch : [5/5][20/43] || LR : 0.00001 || Train Loss : 0.0831 (0.5784) || Train Top 1-acc : 100.000% (81.399)% || Train Top 5-acc : 100.000% (99.107)%
Epoch : [5/5][30/43] || LR : 0.00001 || Train Loss : 1.3651 (0.6546) || Train Top 1-acc : 53.125% (78.427)% || Train Top 5-acc : 96.875% (97.883)%
Epoch : [5/5][40/43] || LR : 0.00001 || Train Loss : 0.7441 (0.6678) || Train Top 1-acc : 93.750% (78.125)% || Train Top 5-acc : 100.000% (98.171)%


Validation:   0%|          | 0/11 [00:00<?, ?it/s]

Epoch : [5/5] || Val Loss : 0.3823 || Val Top 1-acc : 92.625% || Val Top 5-acc : 99.410%


## Inference
- Test Time Augmentation 사용 (only hflip)

In [20]:
# class TestDataset(Dataset):
#     def __init__(self, data_path='./test/0/', transform=None):
#         self.data_path = data_path
#         self.data = os.listdir(data_path)
#         self.transform = transform
        
#     def __len__(self):
#         return len(self.data)

#     def __getitem__(self, idx):        
#         image_path = os.path.join(self.data_path, self.data[idx])
#         image = cv2.imread(image_path)
#         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
#         if self.transform:
#             image = self.transform(image=np.array(image))['image']
            
#         return image

In [21]:
# SEED = 77  
# BATCH_SIZE = 32    
# IMAGE_SIZE = 227
# MODEL_ARC = 'nfnet_l0'
# NUM_CLASSES = 7
# MODEL_DIR = './results'
# NUM_FOLD = 5

In [22]:
# device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [23]:
# # Fix random seed
# def seed_everything(seed: int = 42):
#     random.seed(seed)
#     np.random.seed(seed)
#     os.environ["PYTHONHASHSEED"] = str(seed)
#     torch.manual_seed(seed)
#     torch.cuda.manual_seed(seed)  # type: ignore
#     torch.backends.cudnn.deterministic = True  # type: ignore
#     torch.backends.cudnn.benchmark = True  # type: ignore

In [24]:
# seed_everything(SEED)

In [25]:
# test_transform = albumentations.Compose([               
#         albumentations.Resize(IMAGE_SIZE, IMAGE_SIZE),
#         albumentations.Normalize(mean=(0.4569, 0.5074, 0.5557), std=(0.2888, 0.2743, 0.2829)),
#         albumentations.pytorch.transforms.ToTensorV2()])

In [26]:
# test_dataset = TestDataset(transform=test_transform)
# test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [27]:
# class PretrainedModel(nn.Module):
#     def __init__(self, model_arc='swin_tiny_patch4_window7_224', num_classes=7):
#         super().__init__()
#         self.net = timm.create_model(model_arc, pretrained=False, num_classes=num_classes)
    
#     def forward(self, x):
#         x = self.net(x)

#         return x

In [28]:
# model = PretrainedModel(model_arc=MODEL_ARC, num_classes=NUM_CLASSES)
# model.to(device)

In [29]:
# states = [torch.load(glob(MODEL_DIR + f'/{MODEL_ARC}/{k}_fold/*.pth')[-1]) for k in range(1, NUM_FOLD + 1)]

In [30]:
# transforms = tta.Compose(
#     [
#         tta.HorizontalFlip(),
#         # tta.VerticalFlip(),
#         # tta.Multiply(factors=[0.9, 1, 1.1])
#     ]
# )

### model ensemble을 위해서 npy 파일 형태로 저장

In [31]:
# probs = []
# save_ = []
# for i, images in enumerate(tqdm(test_loader)):
#     images = images.to(device)
#     avg_preds = []
#     for state in states:
#         model.load_state_dict(state)
#         model.eval()
#         tta_model = tta.ClassificationTTAWrapper(model, transforms)
#         tta_model.to(device)
#         tta_model.eval()
#         with torch.no_grad():
#             logits = tta_model(images)
#         avg_preds.append(logits.to('cpu').numpy())
#     avg_preds = np.mean(avg_preds, axis=0)
#     save_.append(avg_preds)
#     probs.append(avg_preds.argmax(-1))
# save_ = np.concatenate(save_)
# probs = np.concatenate(probs)

In [32]:
# df = pd.read_csv('./test_answer_sample_.csv')

In [33]:
# len(probs)

In [34]:
# save_.shape

In [35]:
# np.save(f'./{MODEL_ARC}.npy', save_)

In [36]:
# df['answer value'] = probs

In [37]:
# df.to_csv(f'submission_{MODEL_ARC}.csv', index=False)

## Ensemble
- nfnet_l0와 swin transformer 사용
- stacking ensemble

In [38]:
# import numpy as np
# import pandas as pd

In [39]:
# npy_list = [
#     'nfnet_l0.npy',
#     'swin_base_patch4_window7_224.npy'
# ]

# predictions = []
# for f in npy_list:
#     predictions.append(np.load(f))

In [40]:
# np.array(predictions).shape

In [41]:
# final = np.array(predictions).mean(axis=0).argmax(-1)
# final.shape

In [42]:
# df = pd.read_csv('./test_answer_sample_.csv')

In [43]:
# df['answer value'] = final

In [44]:
# df.to_csv(f'submission_ensemble.csv', index=False)