In [1]:
import pandas as pd
import numpy as np
import cv2
import torch
import matplotlib.pyplot as plt
import os
import time
import os
import glob
from PIL import Image
from torch.utils.data import Dataset, DataLoader
import tqdm
from torchvision import transforms
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam,lr_scheduler
from functools import partial
from sklearn.metrics import accuracy_score, f1_score
import torchvision.models as models

In [2]:
train = pd.read_csv('input/data/train/train.csv')
image_path = train['path']

In [3]:
category_data = []

for i in train['age']:
    if i < 30:
        category_data.append(0)
    elif i < 60:
        category_data.append(1)
    else:
        category_data.append(2)

In [4]:
train['cat_age']=category_data

In [5]:
def data_class(mask,gender,age):
    if mask == 0:
        if gender == 'male':
            if age == 0:
                return 0
            elif age == 1:
                return 1
            else:
                return 2
        else:
            if age == 0:
                return 3
            elif age == 1:
                return 4
            else:
                return 5
    elif mask == 1:
        if gender == 'male':
            if age == 0:
                return 6
            elif age == 1:
                return 7
            else:
                return 8
        else:
            if age == 0:
                return 9
            elif age == 1:
                return 10
            else:
                return 11
    else:
        if gender == 'male':
            if age == 0:
                return 12
            elif age == 1:
                return 13
            else:
                return 14
        else:
            if age == 0:
                return 15
            elif age == 1:
                return 16
            else:
                return 17

In [6]:
def data_PIL_image(path):
    
    path_list = glob.glob('input/data/train/images/{}/*.*'.format(path))
    image_list = []
    str_ref = 'input/data/train/images/{}/'.format(path)
    
    
    for i in path_list:
        img = Image.open(i)
        
        if i[len(str_ref):-4] == 'normal':
            img_mask = 2
        elif i[len(str_ref):-4] == 'incorrect_mask':
            img_mask = 1
        else:
            img_mask = 0
        
        image_list.append((img,img_mask))
    
    return image_list

In [7]:
transform = transforms.Compose([
    transforms.ToTensor()
])

In [8]:
class MyDataset(Dataset):
    
    def __init__(self,image_path,gender,age,transform):
        self.image_path = image_path
        self.gender = gender
        self.age = age
        self.image_list = []
        self.transform = transform
        
        for i,path in tqdm.tqdm(enumerate(self.image_path)):
            img_list = data_PIL_image(path)
            for img,mask in img_list:
                c = torch.tensor([data_class(mask,self.gender[i],self.age[i])])
                self.image_list.append((transform(img),c))
    
    def __getitem__(self,index):
        return self.image_list[index]
    
    def __len__(self):
        return len(self.image_list)

In [9]:
image_path = train['path']
gender_list = train['gender']
age_list =train['cat_age']

In [10]:
dataset = MyDataset(image_path = image_path, gender = gender_list, age=age_list,transform=transform)

2700it [03:35, 12.52it/s]


In [11]:
class ResBlock(nn.Module):
    def __init__(self,num_channels=16):
        super(ResBlock,self).__init__()
        
        self.conv1 = nn.Conv2d(num_channels,num_channels,kernel_size=3,stride=1,padding=1)
        self.bn1 = nn.BatchNorm2d(num_channels)
        self.leakyrelu = nn.LeakyReLU(negative_slope=0.2,inplace=True)
        
        self.conv2 = nn.Conv2d(num_channels,num_channels,kernel_size=3,stride=1,padding=1)
        self.bn2 = nn.BatchNorm2d(num_channels)
    
    def forward(self,x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.leakyrelu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += residual
        return out

def make_block(r,n):
    residual = []
    
    for i in range(r):
        block = ResBlock(num_channels=n)
        residual.append(block)
    
    return nn.Sequential(*residual)

class ResizingNetwork(nn.Module):
    def __init__(self,r=1, n=16):
        super(ResizingNetwork, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels=3,out_channels=n,kernel_size=7,stride=1,padding=3)
        self.leakyrelu1 = nn.LeakyReLU(negative_slope=0.2,inplace=True)
        
        self.conv2 = nn.Conv2d(n,n,kernel_size=1,stride=1)
        self.leakyrelu2 = nn.LeakyReLU(negative_slope=0.2,inplace=True)
        self.bn1 = nn.BatchNorm2d(n)
        
        
        self.resblock = make_block(r,n)
        
        self.conv3 = nn.Conv2d(n,n,kernel_size=3,stride=1,padding=1)
        self.bn2 = nn.BatchNorm2d(n)
        
        self.conv4 = nn.Conv2d(n,out_channels=3,kernel_size=7,stride=1,padding=3)
        
    
    def forward(self,x):
        
        #residual = F.interpolate(x,scale_factor=1.5,mode='bilinear',align_corners=True,recompute_scale_factor=True)
        residual = x
        
        out = self.conv1(x)
        out = self.leakyrelu1(out)        
        out = self.conv2(out)
        out = self.leakyrelu2(out)
        out = self.bn1(out)
        
        #out_residual = F.interpolate(out,scale_factor=1.5,mode='bilinear',align_corners=True,recompute_scale_factor=True)
        out_residual = out

        out = self.resblock(out_residual)
        out = self.conv3(out)
        out = self.bn2(out)
        out += out_residual
        out = self.conv4(out)
        out += residual
        return out

In [12]:
import timm
from efficientnet_pytorch import EfficientNet

In [30]:
resizer = ResizingNetwork()
rec_model = timm.create_model('efficientnet_b3', pretrained=True, num_classes=18)

In [41]:
resizer = ResizingNetwork()
rec_model = timm.create_model('efficientnet_b0',pretrained=True,num_classes=18)

In [116]:
resizer = ResizingNetwork()
rec_model = timm.create_model('ecaresnet101d',pretrained=True,num_classes=18)

In [13]:
resizer = ResizingNetwork()
rec_model = EfficientNet.from_pretrained('efficientnet-b7',num_classes=18)

Loaded pretrained weights for efficientnet-b7


In [68]:
resizer = ResizingNetwork()
rec_model = EfficientNet.from_pretrained('efficientnet-b6',num_classes=18)

Loaded pretrained weights for efficientnet-b6


In [28]:
resizer = ResizingNetwork()
rec_model = EfficientNet.from_pretrained('efficientnet-b4',num_classes=18)

Loaded pretrained weights for efficientnet-b4


In [23]:
resizer = ResizingNetwork()
rec_model = timm.create_model('ecaresnet269d',pretrained=True,num_classes=18)

Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/ecaresnet269d_320_ra2-7baa55cb.pth" to /opt/ml/.cache/torch/hub/checkpoints/ecaresnet269d_320_ra2-7baa55cb.pth


In [49]:
resizer = ResizingNetwork()
rec_model = timm.create_model('resnet50',pretrained=True,num_classes=18)

Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/resnet50_ram-a26f946b.pth" to /opt/ml/.cache/torch/hub/checkpoints/resnet50_ram-a26f946b.pth


In [48]:
import timm
from pprint import pprint
model_names = timm.list_models(pretrained=True)
pprint(model_names)

['adv_inception_v3',
 'cspdarknet53',
 'cspresnet50',
 'cspresnext50',
 'densenet121',
 'densenet161',
 'densenet169',
 'densenet201',
 'densenetblur121d',
 'dla34',
 'dla46_c',
 'dla46x_c',
 'dla60',
 'dla60_res2net',
 'dla60_res2next',
 'dla60x',
 'dla60x_c',
 'dla102',
 'dla102x',
 'dla102x2',
 'dla169',
 'dm_nfnet_f0',
 'dm_nfnet_f1',
 'dm_nfnet_f2',
 'dm_nfnet_f3',
 'dm_nfnet_f4',
 'dm_nfnet_f5',
 'dm_nfnet_f6',
 'dpn68',
 'dpn68b',
 'dpn92',
 'dpn98',
 'dpn107',
 'dpn131',
 'ecaresnet26t',
 'ecaresnet50d',
 'ecaresnet50d_pruned',
 'ecaresnet50t',
 'ecaresnet101d',
 'ecaresnet101d_pruned',
 'ecaresnet269d',
 'ecaresnetlight',
 'efficientnet_b0',
 'efficientnet_b1',
 'efficientnet_b1_pruned',
 'efficientnet_b2',
 'efficientnet_b2_pruned',
 'efficientnet_b2a',
 'efficientnet_b3',
 'efficientnet_b3_pruned',
 'efficientnet_b3a',
 'efficientnet_em',
 'efficientnet_es',
 'efficientnet_lite0',
 'ens_adv_inception_resnet_v2',
 'ese_vovnet19b_dw',
 'ese_vovnet39b',
 'fbnetc_100',
 'gernet_

In [11]:
n_val = int(len(dataset) * 0.2)
n_train = len(dataset) - n_val
train_set, val_set = torch.utils.data.random_split(dataset, [n_train, n_val])


train_loader = torch.utils.data.DataLoader(
    train_set,
    batch_size=8,
    num_workers=2,
    shuffle=True
)

val_loader = torch.utils.data.DataLoader(
    val_set,
    batch_size=8,
    num_workers=2,
    shuffle=True
)

In [65]:
def rand_bbox(size, lam):
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = np.int(W * cut_rat)
    cut_h = np.int(H * cut_rat)

    # uniform
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2

In [16]:
def CutMix(input,target,cutmix_prob,beta=1.0):
    r = np.random.rand(1)
    if beta > 0 and r < cutmix_prob:
        lam = np.random.beta(beta, beta)
        rand_index = torch.randperm(input.size()[0]).cuda()
        target_a = target
        target_b = target[rand_index]
        bbx1, bby1, bbx2, bby2 = rand_bbox(input.size(), lam)
        input[:, :, bbx1:bbx2, bby1:bby2] = input[rand_index, :, bbx1:bbx2, bby1:bby2]
        lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (input.size()[-1] * input.size()[-2]))
        target_a = torch.nn.functional.one_hot(target_a, num_classes=18)
        target_a = target_a.float()
        target_b = torch.nn.functional.one_hot(target_b, num_classes=18)
        target_b = target_b.float()
        label = lam * target_a + (1.0-lam) * target_b
        return input,label
    else:
        return input,target

In [57]:
def CutMix_two(input,target,cutmix_prob,beta=1.0):
    r = np.random.rand(1)
    if beta > 0 and r < cutmix_prob:
        lam = np.random.beta(beta, beta)
        rand_index = torch.randperm(input.size()[0]).cuda()
        target_a = target
        target_b = target[rand_index]
        bbx1, bby1, bbx2, bby2 = rand_bbox(input.size(), lam)
        input[:, :, bbx1:bbx2, bby1:bby2] = input[rand_index, :, bbx1:bbx2, bby1:bby2]
        lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (input.size()[-1] * input.size()[-2]))
        return input,lam,target_a,target_b,True
    else:
        lam = np.random.beta(beta, beta)
        return input,lam,target,target,False

In [15]:
def mixup_data(x, y, alpha=1.0, use_cuda=False):
    '''Returns mixed inputs, pairs of targets, and lambda'''
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1

    batch_size = x.size()[0]
    if use_cuda:
        index = torch.randperm(batch_size).cuda()
    else:
        index = torch.randperm(batch_size)

    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam

In [58]:
class LabelSmoothingLoss(nn.Module):
    def __init__(self,classes=18,smoothing=0.1,dim=-1):
        super(LabelSmoothingLoss,self).__init__()
        self.smoothing = smoothing
        self.cls = classes
        self.dim = dim
    
    def forward(self,pred,target):
        pred = pred.log_softmax(dim=self.dim)
        with torch.no_grad():
            true_dist = torch.zeros_like(pred)
            true_dist.fill_(self.smoothing/self.cls)
            true_dist.scatter_(1,target.data,(1.0-self.smoothing))
        return torch.mean(torch.sum(-true_dist*pred,dim=self.dim))

In [36]:
class CutMixcrossentropyloss(nn.Module):
    def __init__(self,classes=18,smoothing=0.1,dim=-1):
        super(CutMixcrossentropyloss,self).__init__()
        self.smoothing = smoothing
        self.cls = classes
        self.dim = dim
        
    def forward(self,pred,target):
        if len(target.shape) == 3: 
            pred = pred.log_softmax(dim=self.dim)
            return torch.mean(torch.sum(-target*pred,dim=self.dim))
        else:
            pred = pred.log_softmax(dim=self.dim)
            with torch.no_grad():
                true_dist = torch.zeros_like(pred)
                true_dist.fill_(self.smoothing/self.cls)
                true_dist.scatter_(1,target.data,(1.0-self.smoothing))
            return torch.mean(torch.sum(-true_dist*pred,dim=self.dim))

In [71]:
class Mymodel(nn.Module):
    def __init__(self,resizer,recognition):
        super(Mymodel,self).__init__()
        self.resizer = resizer
        self.recognition = recognition
    
    def forward(self,x):
        resize_img = self.resizer(x)
        output = self.recognition(resize_img)
        return output

In [19]:
from adamp import AdamP

In [38]:
from madgrad import MADGRAD

In [60]:
criterion = LabelSmoothingLoss()
optimizer = Adam(model.parameters(),lr=0.001)
scheduler = lr_scheduler.CosineAnnealingLR(optimizer,T_max=2,eta_min=0)

In [20]:
criterion = LabelSmoothingLoss()
optimizer = AdamP(model.parameters(),lr=0.001,weight_decay=0.01)
scheduler = lr_scheduler.MultiStepLR(optimizer,milestones=[4,7],gamma=0.5)

In [51]:
criterion = LabelSmoothingLoss()
optimizer = MADGRAD(model.parameters(),lr=0.001)

In [73]:
criterion = LabelSmoothingLoss()
optimizer = Adam(model.parameters(),lr=0.001)
scheduler = lr_scheduler.MultiStepLR(optimizer,milestones=[4,7],gamma=0.5)

In [78]:
criterion = CutMixcrossentropyloss()
optimizer = torch.optim.SGD(model.parameters(),lr=0.01,momentum=0.7,weight_decay=0.01)
scheduler = lr_scheduler.CosineAnnealingLR(optimizer,T_max=2,eta_min=0)

In [20]:
device = torch.device('cuda')

In [76]:
counter = 0
patience = 10
best_val_acc = 0
best_val_loss = np.inf
train_log_interval = 100
num_epochs = 5

In [54]:
#mixup training

model.to(device)

for epoch in range(num_epochs):
    # train loop
    model.train()
    loss_value = 0
    train_acc = 0 
    train_f1 = 0
    for idx, (inputs,targets) in enumerate(train_loader):
        
        optimizer.zero_grad()
        
        inputs,target_a,target_b,lam = mixup_data(inputs,targets,alpha=1.0)
        
        inputs = inputs.to(device)

        outs = model(inputs)
        preds = torch.argmax(outs, dim=-1)
        
        target_a = target_a.to(device)
        target_b = target_b.to(device)
        
        loss = criterion(outs, target_a) * lam + criterion(outs, target_b)* (1-lam)

        loss.backward()
        
        optimizer.step()

        loss_value += loss.item()
        train_acc += accuracy_score(targets.cpu(),preds.cpu())
        train_f1 += f1_score(targets.cpu(),preds.cpu(),average='macro')

        if (idx + 1) % train_log_interval == 0:
            train_loss = loss_value / train_log_interval
            avg_train_acc = train_acc / train_log_interval
            avg_train_f1 = train_f1 / train_log_interval
            
            current_lr = scheduler.get_last_lr()

            print(
                f"Epoch[{epoch}/{num_epochs}]({idx + 1}/{len(train_loader)}) || "
                f"training loss {train_loss:4.4} || training accuracy {avg_train_acc:4.2%} || lr {current_lr}||"
                "fl_score_train : {}".format(avg_train_f1)
            )

            loss_value = 0
            train_acc = 0
            train_f1 = 0

    #scheduler.step()

    # val loop
    with torch.no_grad():
        print("Calculating validation results...")
        model.eval()
        val_loss_items = []
        val_f1 = 0
        val_acc = 0
        for idx,(inputs,labels) in enumerate(val_loader):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outs = model(inputs)
            preds = torch.argmax(outs, dim=-1)
            
            val_acc += accuracy_score(labels.cpu(),preds.cpu())
            val_f1 += f1_score(labels.cpu(),preds.cpu(),average='macro')
            
            loss_item = criterion(outs, labels).item()
            val_loss_items.append(loss_item)

        val_loss = np.sum(val_loss_items) / len(val_loader)
        avg_val_acc = val_acc / (idx+1)
        avg_val_f1 = val_f1 / (idx+1)
        
        
        # Callback1: validation accuracy가 향상될수록 모델을 저장합니다.
        if val_loss < best_val_loss:
            best_val_loss = val_loss
        if avg_val_acc > best_val_acc:
            print("New best model for val accuracy! saving the model..")
            torch.save(model.state_dict(), f"result_{epoch:03}_accuracy_{avg_val_acc:4.2%}.ckpt")
            best_val_acc = avg_val_acc
            counter = 0
        else:
            counter += 1
        # Callback2: patience 횟수 동안 성능 향상이 없을 경우 학습을 종료시킵니다.
        if counter > patience:
            print("Early Stopping...")
            break
        
        
        print(
            f"[Val] acc : {avg_val_acc:4.2%}, loss: {val_loss:4.2} || f1_score: {avg_val_f1} ||"
            f"best acc : {best_val_acc:4.2%}, best loss: {best_val_loss:4.2}"
        )

Epoch[0/5](100/1890) || training loss 1.054 || training accuracy 60.38% || lr [0.0010000000000000026]||fl_score_train : 0.5519310966810967
Epoch[0/5](200/1890) || training loss 1.115 || training accuracy 58.63% || lr [0.0010000000000000026]||fl_score_train : 0.5337315003779288
Epoch[0/5](300/1890) || training loss 1.066 || training accuracy 64.00% || lr [0.0010000000000000026]||fl_score_train : 0.5924376984126984
Epoch[0/5](400/1890) || training loss 1.127 || training accuracy 58.38% || lr [0.0010000000000000026]||fl_score_train : 0.5236261063011062
Epoch[0/5](500/1890) || training loss 1.071 || training accuracy 60.62% || lr [0.0010000000000000026]||fl_score_train : 0.5614214285714286
Epoch[0/5](600/1890) || training loss 1.114 || training accuracy 62.00% || lr [0.0010000000000000026]||fl_score_train : 0.5725864117364117
Epoch[0/5](700/1890) || training loss 1.099 || training accuracy 60.12% || lr [0.0010000000000000026]||fl_score_train : 0.5553904761904764
Epoch[0/5](800/1890) || tra

KeyboardInterrupt: 

In [77]:
#cutmix training

model.to(device)

for epoch in range(num_epochs):
    # train loop
    model.train()
    loss_value = 0
    train_acc = 0 
    train_f1 = 0
    for idx, (inputs,targets) in enumerate(train_loader):
        
        optimizer.zero_grad()
        
        inputs,lam,target_a,target_b,cut = CutMix_two(inputs,targets,cutmix_prob=0.5)
        
        inputs = inputs.to(device)

        outs = model(inputs)
        preds = torch.argmax(outs, dim=-1)
        if cut:
            target_a = target_a.to(device)
            target_b = target_b.to(device)
            loss = criterion(outs, target_a) * lam + criterion(outs, target_b)* (1-lam)
        else:
            target_a = target_a.to(device)
            loss = criterion(outs, target_a)

        loss.backward()
        
        optimizer.step()

        loss_value += loss.item()
        train_acc += accuracy_score(targets.cpu(),preds.cpu())
        train_f1 += f1_score(targets.cpu(),preds.cpu(),average='macro')

        if (idx + 1) % train_log_interval == 0:
            train_loss = loss_value / train_log_interval
            avg_train_acc = train_acc / train_log_interval
            avg_train_f1 = train_f1 / train_log_interval
            
            current_lr = scheduler.get_last_lr()

            print(
                f"Epoch[{epoch}/{num_epochs}]({idx + 1}/{len(train_loader)}) || "
                f"training loss {train_loss:4.4} || training accuracy {avg_train_acc:4.2%} || lr {current_lr}||"
                "fl_score_train : {}".format(avg_train_f1)
            )

            loss_value = 0
            train_acc = 0
            train_f1 = 0

    scheduler.step()

    # val loop
    with torch.no_grad():
        print("Calculating validation results...")
        model.eval()
        val_loss_items = []
        val_f1 = 0
        val_acc = 0
        for idx,(inputs,labels) in enumerate(val_loader):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outs = model(inputs)
            preds = torch.argmax(outs, dim=-1)
            
            val_acc += accuracy_score(labels.cpu(),preds.cpu())
            val_f1 += f1_score(labels.cpu(),preds.cpu(),average='macro')
            
            loss_item = criterion(outs, labels).item()
            val_loss_items.append(loss_item)

        val_loss = np.sum(val_loss_items) / len(val_loader)
        avg_val_acc = val_acc / (idx+1)
        avg_val_f1 = val_f1 / (idx+1)
        
        
        # Callback1: validation accuracy가 향상될수록 모델을 저장합니다.
        if val_loss < best_val_loss:
            best_val_loss = val_loss
        if avg_val_acc > best_val_acc:
            print("New best model for val accuracy! saving the model..")
            torch.save(model.state_dict(), f"result_{epoch:03}_accuracy_{avg_val_acc:4.2%}.ckpt")
            best_val_acc = avg_val_acc
            counter = 0
        else:
            counter += 1
        # Callback2: patience 횟수 동안 성능 향상이 없을 경우 학습을 종료시킵니다.
        if counter > patience:
            print("Early Stopping...")
            break
        
        
        print(
            f"[Val] acc : {avg_val_acc:4.2%}, loss: {val_loss:4.2} || f1_score: {avg_val_f1} ||"
            f"best acc : {best_val_acc:4.2%}, best loss: {best_val_loss:4.2}"
        )

Epoch[0/5](100/1890) || training loss 0.8697 || training accuracy 87.00% || lr [0.00025]||fl_score_train : 0.8478851130351133
Epoch[0/5](200/1890) || training loss 0.8327 || training accuracy 89.25% || lr [0.00025]||fl_score_train : 0.8673465075929362
Epoch[0/5](300/1890) || training loss 0.8255 || training accuracy 88.75% || lr [0.00025]||fl_score_train : 0.8683351319315606
Epoch[0/5](400/1890) || training loss 0.8133 || training accuracy 90.62% || lr [0.00025]||fl_score_train : 0.8870468253968252
Epoch[0/5](500/1890) || training loss 0.8398 || training accuracy 90.75% || lr [0.00025]||fl_score_train : 0.8868373015873015
Epoch[0/5](600/1890) || training loss 0.8379 || training accuracy 87.25% || lr [0.00025]||fl_score_train : 0.8487884920634918
Epoch[0/5](700/1890) || training loss 0.8047 || training accuracy 91.75% || lr [0.00025]||fl_score_train : 0.9059333333333334
Epoch[0/5](800/1890) || training loss 0.867 || training accuracy 89.12% || lr [0.00025]||fl_score_train : 0.8636369047

In [80]:
#cutmix training 2
model.to(device)

for epoch in range(num_epochs):
    # train loop
    model.train()
    loss_value = 0
    train_acc = 0 
    train_f1 = 0
    for idx, (inputs,targets) in enumerate(train_loader):
        
        optimizer.zero_grad()
        
        inputs,labels = CutMix(inputs,targets,cutmix_prob=0.3)
        
        inputs = inputs.to(device)
        labels = labels.to(device)

        outs = model(inputs)
        preds = torch.argmax(outs, dim=-1)
        loss = criterion(outs, labels)

        loss.backward()
        
        optimizer.step()

        loss_value += loss.item()
        train_acc += accuracy_score(targets.cpu(),preds.cpu())
        train_f1 += f1_score(targets.cpu(),preds.cpu(),average='macro')

        if (idx + 1) % train_log_interval == 0:
            train_loss = loss_value / train_log_interval
            avg_train_acc = train_acc / train_log_interval
            avg_train_f1 = train_f1 / train_log_interval
            
            current_lr = scheduler.get_last_lr()

            print(
                f"Epoch[{epoch}/{num_epochs}]({idx + 1}/{len(train_loader)}) || "
                f"training loss {train_loss:4.4} || training accuracy {avg_train_acc:4.2%} || lr {current_lr}||"
                "fl_score_train : {}".format(avg_train_f1)
            )

            loss_value = 0
            train_acc = 0
            train_f1 = 0

    scheduler.step()

    # val loop
    with torch.no_grad():
        print("Calculating validation results...")
        model.eval()
        val_loss_items = []
        val_f1 = 0
        val_acc = 0
        for idx,(inputs,labels) in enumerate(val_loader):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outs = model(inputs)
            preds = torch.argmax(outs, dim=-1)
            
            val_acc += accuracy_score(labels.cpu(),preds.cpu())
            val_f1 += f1_score(labels.cpu(),preds.cpu(),average='macro')
            
            loss_item = criterion(outs, labels).item()
            val_loss_items.append(loss_item)

        val_loss = np.sum(val_loss_items) / len(val_loader)
        avg_val_acc = val_acc / (idx+1)
        avg_val_f1 = val_f1 / (idx+1)
        
        
        # Callback1: validation accuracy가 향상될수록 모델을 저장합니다.
        if val_loss < best_val_loss:
            best_val_loss = val_loss
        if avg_val_acc > best_val_acc:
            print("New best model for val accuracy! saving the model..")
            torch.save(model.state_dict(), f"result_{epoch:03}_accuracy_{avg_val_acc:4.2%}.ckpt")
            best_val_acc = avg_val_acc
            counter = 0
        else:
            counter += 1
        # Callback2: patience 횟수 동안 성능 향상이 없을 경우 학습을 종료시킵니다.
        if counter > patience:
            print("Early Stopping...")
            break
        
        
        print(
            f"[Val] acc : {avg_val_acc:4.2%}, loss: {val_loss:4.2} || f1_score: {avg_val_f1} ||"
            f"best acc : {best_val_acc:4.2%}, best loss: {best_val_loss:4.2}"
        )

Epoch[0/10](100/473) || training loss 2.461 || training accuracy 20.97% || lr [0.01]||fl_score_train : 0.032290895432331636
Epoch[0/10](200/473) || training loss 2.41 || training accuracy 22.00% || lr [0.01]||fl_score_train : 0.03564169476281975
Epoch[0/10](300/473) || training loss 2.428 || training accuracy 21.88% || lr [0.01]||fl_score_train : 0.03407106069073554
Epoch[0/10](400/473) || training loss 2.427 || training accuracy 22.00% || lr [0.01]||fl_score_train : 0.0344657374821741
Calculating validation results...
New best model for val accuracy! saving the model..
[Val] acc : 21.11%, loss:  2.4 || f1_score: 0.03298499483179209 ||best acc : 21.11%, best loss:  2.4


KeyboardInterrupt: 

In [31]:
#basic training

model.to(device)

for epoch in range(num_epochs):
    # train loop
    model.train()
    loss_value = 0
    train_acc = 0 
    train_f1 = 0
    for idx, (inputs,labels) in enumerate(train_loader):
        
        optimizer.zero_grad()
        
        inputs = inputs.to(device)
        labels = labels.to(device)

        outs = model(inputs)
        preds = torch.argmax(outs, dim=-1)
        loss = criterion(outs, labels)

        loss.backward()
        
        optimizer.step()

        loss_value += loss.item()
        train_acc += accuracy_score(labels.cpu(),preds.cpu())
        train_f1 += f1_score(labels.cpu(),preds.cpu(),average='macro')

        if (idx + 1) % train_log_interval == 0:
            train_loss = loss_value / train_log_interval
            avg_train_acc = train_acc / train_log_interval
            avg_train_f1 = train_f1 / train_log_interval
            
            current_lr = scheduler.get_last_lr()
    
            print(
                f"Epoch[{epoch}/{num_epochs}]({idx + 1}/{len(train_loader)}) || "
                f"training loss {train_loss:4.4} || training accuracy {avg_train_acc:4.2%} || lr {current_lr}||"
                "fl_score_train : {}".format(avg_train_f1)
            )

            loss_value = 0
            train_acc = 0
            train_f1 = 0

    scheduler.step()

    # val loop
    with torch.no_grad():
        print("Calculating validation results...")
        model.eval()
        val_loss_items = []
        val_f1 = 0
        val_acc = 0
        for idx,(inputs,labels) in enumerate(val_loader):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outs = model(inputs)
            preds = torch.argmax(outs, dim=-1)
            
            val_acc += accuracy_score(labels.cpu(),preds.cpu())
            val_f1 += f1_score(labels.cpu(),preds.cpu(),average='macro')
            
            loss_item = criterion(outs, labels).item()
            val_loss_items.append(loss_item)

        val_loss = np.sum(val_loss_items) / len(val_loader)
        avg_val_acc = val_acc / (idx+1)
        avg_val_f1 = val_f1 / (idx+1)
        
        
        # Callback1: validation accuracy가 향상될수록 모델을 저장합니다.
        if val_loss < best_val_loss:
            best_val_loss = val_loss
        
        if avg_val_acc > best_val_acc:
            print("New best model for val accuracy! saving the model..")
            torch.save(model.state_dict(), f"result_{epoch:03}_accuracy_{avg_val_acc:4.2%}.ckpt")
            best_val_acc = avg_val_acc
            counter = 0
        else:
            counter += 1
        # Callback2: patience 횟수 동안 성능 향상이 없을 경우 학습을 종료시킵니다.
        if counter > patience:
            print("Early Stopping...")
            break
        
        
        print(
            f"[Val] acc : {avg_val_acc:4.2%}, loss: {val_loss:4.2} || f1_score: {avg_val_f1} ||"
            f"best acc : {best_val_acc:4.2%}, best loss: {best_val_loss:4.2}"
        )

Epoch[0/10](100/472) || training loss 1.387 || training accuracy 70.44% || lr [0.001]||fl_score_train : 0.5170381672729036
Epoch[0/10](200/472) || training loss 1.01 || training accuracy 82.91% || lr [0.001]||fl_score_train : 0.6796844283469174
Epoch[0/10](300/472) || training loss 0.9222 || training accuracy 87.34% || lr [0.001]||fl_score_train : 0.7652154375596248
Epoch[0/10](400/472) || training loss 0.9119 || training accuracy 87.31% || lr [0.001]||fl_score_train : 0.7536396747027149
Calculating validation results...
New best model for val accuracy! saving the model..
[Val] acc : 88.29%, loss: 0.87 || f1_score: 0.7711771300654587 ||best acc : 88.29%, best loss: 0.87
Epoch[1/10](100/472) || training loss 0.7735 || training accuracy 93.00% || lr [0.0005]||fl_score_train : 0.8579344988942027
Epoch[1/10](200/472) || training loss 0.7107 || training accuracy 95.31% || lr [0.0005]||fl_score_train : 0.8993191227049467
Epoch[1/10](300/472) || training loss 0.6773 || training accuracy 96.47