In [2]:
import sys
import numpy as np
import pandas as pd

import os
import gc
import matplotlib.pyplot as plt
import importlib
import pickle

from tqdm.notebook import tqdm

pd.set_option('display.max_rows', 200)
pd.set_option("max_colwidth", 45)
pd.set_option("display.precision", 1)
pd.options.display.float_format = "{:.3f}".format
# pd.set_option("display.max_rows", 5)
# pd.reset_option("display.max_rows")

from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold

from pandarallel import pandarallel
pandarallel.initialize(progress_bar=True, nb_workers=2)

SEED = 34
np.random.seed(SEED)
N_CPU = os.cpu_count()

INFO: Pandarallel will run on 2 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


In [None]:
%%capture
!pip3 install git+https://github.com/daniilkk/python-cpbd.git
!pip3 install git+https://github.com/umang-singhal/pydom.git

In [3]:
import cv2
import cpbd
import pywt
from dom import DOM

from torch.utils.data import DataLoader,Dataset
from typing import Union, Any, Optional, Tuple, Dict, List
import torchvision.transforms as T
import torchvision
from PIL import Image

import torch
from torch import nn
from torch.nn.modules.loss import BCEWithLogitsLoss
from torchmetrics.classification import BinaryAUROC
import torch.optim as optim

torch.__version__, torchvision.__version__

('1.11.0', '0.12.0')

In [4]:
dir_data = '/kaggle/input/shift-cv-winter-2023/'

DIR_TRAIN = dir_data + 'train/train/'
DIR_TEST = dir_data + 'test/test/'

df_train = pd.read_csv(dir_data + 'train.csv')
df_test = pd.read_csv(dir_data + 'sample_submission.csv')

df_train['filename'] = DIR_TRAIN + df_train['filename']
df_test['filename'] = DIR_TEST + + df_test['filename']
df_train[:3]

Unnamed: 0,filename,blur
0,/kaggle/input/shift-cv-winter-2023/train/...,0.0
1,/kaggle/input/shift-cv-winter-2023/train/...,0.0
2,/kaggle/input/shift-cv-winter-2023/train/...,0.0


In [5]:
df_train['blur'].value_counts()

0.000    1367
1.000    1297
Name: blur, dtype: int64

In [6]:
def add_folds(df:pd.DataFrame, n_folds:int=4, seed:int=34) -> pd.DataFrame:

    skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=seed)

    df['fold'] = -1
    for fold, (trn_, val_) in enumerate(skf.split(df,df['blur'])):
        df.loc[val_,'fold'] = fold

    return df

def calc_fft_measure(gray_img, size=40):
    """ Определение заблюренности изображения с помощью Fourier transform:

    - Изображение переводится в частотный спектр с помощью fft, производится сдвиг начала координат и
    зануляется низкочастотный спектр.
    - Частотный спектр переводится обратно в изображение с помощью inverse fft.
    - Считается магнитуда, определяется уровень заблюренности картинки.
    Чем больше полученное значение, тем более размыто изображение
    """
    gray_img = gray_img[None,:]
    
    (n, h, w) = gray_img.shape
    (cx, cy) = (int(w / 2.0), int(h / 2.0))
    fft = np.fft.fft2(gray_img, axes=(1, 2))
    fftShift = np.fft.fftshift(fft, axes=(1, 2))

    fftShift[:, cy - size : cy + size, cx - size : cx + size] = 0
    fftShift = np.fft.ifftshift(fftShift, axes=(1, 2))
    recon = np.fft.ifft2(fftShift, axes=(1, 2))
    
    magnitude = 20 * np.log(np.abs(recon))
    mean = 1/np.average(magnitude, axis=(1, 2))

    return mean

import torch
def calc_tv_measure(gray_img):
    """ Определение заблюренности изображения с помощью Total Variation:

    Берется отношение суммы квадратов разницы между соседними писелями к сумме модулей разницы.
    Чем больше полученное значение, тем более размыто изображение

    """
    gray_img = torch.from_numpy(gray_img[None,:])
    gray_img = gray_img[:, gray_img.shape[1]//4:3*gray_img.shape[1]//4,gray_img.shape[2]//4:3*gray_img.shape[2]//4]
    
    w_variance = torch.sum(torch.pow(gray_img[:, :, 1:] - gray_img[:, :, :-1], 2), dim=[1, 2])
    h_variance = torch.sum(torch.pow(gray_img[:, 1:, :] - gray_img[:, :-1, :], 2), dim=[1, 2])

    l2_score = (h_variance + w_variance)

    w_variance = torch.sum(torch.abs(gray_img[:, :, 1:] - gray_img[:, :, :-1]), dim=[1, 2])
    h_variance = torch.sum(torch.abs(gray_img[:, 1:, :] - gray_img[:, :-1, :]), dim=[1, 2])
    
    l1_score = (h_variance + w_variance)
    
    tv_measure = l1_score/l2_score
    
    return tv_measure

In [None]:
def blur_detect(Y, threshold=35):
    
    # Convert image to grayscale
#     Y = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    M, N = Y.shape
    
    # Crop input image to be 3 divisible by 2
    Y = Y[0:int(M/16)*16, 0:int(N/16)*16]
    
    # Step 1, compute Haar wavelet of input image
    LL1,(LH1,HL1,HH1)= pywt.dwt2(Y, 'haar')
    # Another application of 2D haar to LL1
    LL2,(LH2,HL2,HH2)= pywt.dwt2(LL1, 'haar') 
    # Another application of 2D haar to LL2
    LL3,(LH3,HL3,HH3)= pywt.dwt2(LL2, 'haar')
    
    # Construct the edge map in each scale Step 2
    E1 = np.sqrt(np.power(LH1, 2)+np.power(HL1, 2)+np.power(HH1, 2))
    E2 = np.sqrt(np.power(LH2, 2)+np.power(HL2, 2)+np.power(HH2, 2))
    E3 = np.sqrt(np.power(LH3, 2)+np.power(HL3, 2)+np.power(HH3, 2))
    
    M1, N1 = E1.shape

    # Sliding window size level 1
    sizeM1 = 8
    sizeN1 = 8
    
    # Sliding windows size level 2
    sizeM2 = int(sizeM1/2)
    sizeN2 = int(sizeN1/2)
    
    # Sliding windows size level 3
    sizeM3 = int(sizeM2/2)
    sizeN3 = int(sizeN2/2)
    
    # Number of edge maps, related to sliding windows size
    N_iter = int((M1/sizeM1)*(N1/sizeN1))
    
    Emax1 = np.zeros((N_iter))
    Emax2 = np.zeros((N_iter))
    Emax3 = np.zeros((N_iter))
    
    
    count = 0
    
    # Sliding windows index of level 1
    x1 = 0
    y1 = 0
    # Sliding windows index of level 2
    x2 = 0
    y2 = 0
    # Sliding windows index of level 3
    x3 = 0
    y3 = 0
    
    # Sliding windows limit on horizontal dimension
    Y_limit = N1-sizeN1
    
    while count < N_iter:
        # Get the maximum value of slicing windows over edge maps 
        # in each level
        Emax1[count] = np.max(E1[x1:x1+sizeM1,y1:y1+sizeN1])
        Emax2[count] = np.max(E2[x2:x2+sizeM2,y2:y2+sizeN2])
        Emax3[count] = np.max(E3[x3:x3+sizeM3,y3:y3+sizeN3])
        
        # if sliding windows ends horizontal direction
        # move along vertical direction and resets horizontal
        # direction
        if y1 == Y_limit:
            x1 = x1 + sizeM1
            y1 = 0
            
            x2 = x2 + sizeM2
            y2 = 0
            
            x3 = x3 + sizeM3
            y3 = 0
            
            count += 1
        
        # windows moves along horizontal dimension
        else:
                
            y1 = y1 + sizeN1
            y2 = y2 + sizeN2
            y3 = y3 + sizeN3
            count += 1
    
    # Step 3
    EdgePoint1 = Emax1 > threshold;
    EdgePoint2 = Emax2 > threshold;
    EdgePoint3 = Emax3 > threshold;
    
    # Rule 1 Edge Pojnts
    EdgePoint = EdgePoint1 + EdgePoint2 + EdgePoint3
    
    n_edges = EdgePoint.shape[0]
    
    # Rule 2 Dirak-Structure or Astep-Structure
    DAstructure = (Emax1[EdgePoint] > Emax2[EdgePoint]) * (Emax2[EdgePoint] > Emax3[EdgePoint]);
    
    # Rule 3 Roof-Structure or Gstep-Structure
    
    RGstructure = np.zeros((n_edges))

    for i in range(n_edges):
    
        if EdgePoint[i] == 1:
        
            if Emax1[i] < Emax2[i] and Emax2[i] < Emax3[i]:
            
                RGstructure[i] = 1
                
    # Rule 4 Roof-Structure
    
    RSstructure = np.zeros((n_edges))

    for i in range(n_edges):
    
        if EdgePoint[i] == 1:
        
            if Emax2[i] > Emax1[i] and Emax2[i] > Emax3[i]:
            
                RSstructure[i] = 1

    # Rule 5 Edge more likely to be in a blurred image 

    BlurC = np.zeros((n_edges));

    for i in range(n_edges):
    
        if RGstructure[i] == 1 or RSstructure[i] == 1:
        
            if Emax1[i] < threshold:
            
                BlurC[i] = 1                        
        
    # Step 6
    Per = np.sum(DAstructure)/np.sum(EdgePoint)
    
    # Step 7
    if (np.sum(RGstructure) + np.sum(RSstructure)) == 0:
        
        BlurExtent = 100
    else:
        BlurExtent = np.sum(BlurC) / (np.sum(RGstructure) + np.sum(RSstructure))
#     return Per, BlurExtent
    return BlurExtent

In [None]:
from skimage import measure

def skimage_blur_eff(gray_img):
    return measure.blur_effect(gray_img,h_size=7)

def feats_from_laplacian(gray_img):
    laplacian = cv2.Laplacian(gray_img, cv2.CV_64F)
    return laplacian.var(), np.quantile(laplacian, 0.99)

def dom_sharpness(gray_img):
    iqa = DOM()
    return iqa.get_sharpness(gray_img, width=50,sharpness_threshold=2, edge_threshold=0.0001)

In [None]:
FUNCS = [(feats_from_laplacian,['laplacian_var','laplacian_q99']),
        (blur_detect, 'blur_extent'),
        (cpbd.compute,'cpbd'),
        (skimage_blur_eff,'skimage_blur_eff'),
         (calc_fft_measure,'fft'),
        (calc_tv_measure,'tv'),
        (dom_sharpness,'dom_sharpness')]

def read_image_gray(path):
    return cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2GRAY)

def get_feats(image_path, funcs):
    gray_img = read_image_gray(image_path)
    feats = {}
    for f in funcs:
        fs = f[0](gray_img)
        if isinstance(f[1],str):
            feats[f[1]] = float(fs)
        else:
            for i,feat_name in enumerate(f[1]):
                feats[feat_name] = float(fs[i])
            
    return pd.Series(feats)

In [None]:
train_feats = df_train['filename'].parallel_apply(get_feats,args=(FUNCS,))
train_feats[:3]

In [None]:
FEAT_COLS = train_feats.columns.to_list()
FEAT_COLS

In [None]:
test_feats = df_test['filename'].parallel_apply(get_feats,args=(FUNCS,))
test_feats[:3]

In [None]:
train_feats.to_csv('train_feats.csv',index=False)
test_feats.to_csv('test_feats.csv',index=False)

In [None]:
df_train = pd.concat([df_train,train_feats],axis=1)
df_train[:3]

In [None]:
df_test = pd.concat([df_test,test_feats],axis=1)
df_test[:3]

In [None]:
df_train.to_csv('df_train.csv',index=False)
df_test.to_csv('df_test.csv',index=False)

In [7]:
df_train = pd.read_csv('/kaggle/input/shift14-plus-pytorch/df_train.csv')
df_test = pd.read_csv('/kaggle/input/shift14-plus-pytorch/df_test.csv')
df_train[:1]

Unnamed: 0,filename,blur,laplacian_var,laplacian_q99,blur_extent,cpbd,skimage_blur_eff,fft,tv,dom_sharpness
0,/kaggle/input/shift-cv-winter-2023/train/...,0.0,403.814,67.0,0.459,0.542,0.435,0.028,4.34,1.14


In [8]:
FEAT_COLS = df_train.columns.to_list()[2:]
FEAT_COLS

['laplacian_var',
 'laplacian_q99',
 'blur_extent',
 'cpbd',
 'skimage_blur_eff',
 'fft',
 'tv',
 'dom_sharpness']

In [9]:
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [10]:
import random

def set_seed(seed: int = 34) -> None:
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    os.environ["PYTHONHASHSEED"] = str(seed)
    print(f"Random seed set as {seed}")

set_seed(SEED)

Random seed set as 34


In [11]:
class Dset(Dataset):
    def __init__(self, df:pd.DataFrame, augmentation:Optional[T.Compose]=None, feat_cols:List[str]=FEAT_COLS):
        
        self.df = df.reset_index(drop=True)
        self.blur_feats = df[feat_cols].to_numpy()

        self.labels = torch.tensor(self.df['blur'].to_numpy(), dtype=torch.float32).unsqueeze(1)

        self.aug = augmentation

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):

        img_path = self.df['filename'].iloc[idx]
        image = Image.open(img_path)

        blur_feats = torch.tensor(self.blur_feats[idx], dtype=torch.float32)

        if self.aug:
            image = self.aug(image)

        return {
            "image": image,
            "blur_feats": blur_feats,
            "label": self.labels[idx],
            }

In [12]:
OUTPUT_SHAPE = (224, 224)
# OUTPUT_SHAPE = (600, 600)

train_transform = T.Compose([
    # T.RandomApply(transforms=[T.RandomRotation(degrees=(90, 90))], p=0.5),
    # T.RandomApply(transforms=[T.RandomRotation(degrees=(180, 180))], p=0.3),
    T.RandomCrop(OUTPUT_SHAPE), 
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    T.RandomVerticalFlip(p=0.5),
    T.RandomHorizontalFlip(p=0.5)])

val_0 = T.Compose([
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

val_transform = T.Compose([
    T.FiveCrop(OUTPUT_SHAPE),
    T.Lambda(lambda crops: torch.stack([val_0(crop) for crop in crops]))
    ])

In [22]:
class Model(nn.Module):
    def __init__(self, n_feats_from_backbone:int=10, n_calc_feats:int = 8, output_shape:int = 1) -> None:
        super().__init__()

        self.backbone = torchvision.models.efficientnet_b0(pretrained=True)
        self.backbone.classifier = nn.Sequential(
    torch.nn.Dropout(p=0.2, inplace=True), 
    torch.nn.Linear(in_features=1280,# in_features=2560
                    out_features=n_feats_from_backbone),torch.nn.ReLU())

        self.lin1 = torch.nn.Sequential(
            torch.nn.Linear(in_features=n_calc_feats, out_features=n_calc_feats, bias=True),torch.nn.ReLU())

        self.lin2 = torch.nn.Sequential(
            torch.nn.Dropout(p=0.1, inplace=True), 
            torch.nn.Linear(in_features=n_feats_from_backbone+n_calc_feats, out_features=output_shape))

    def forward(self, batch):
        x, blur_feats = batch['image'], batch['blur_feats']

        x = self.backbone(x)
        blur_feats = self.lin1(blur_feats)

        x = torch.cat([x,blur_feats],axis=1)
        x = self.lin2(x)
        return x

class Trainer():
    def __init__(self, model: torch.nn.Module, train_dataloader: DataLoader, val_dataloader: DataLoader,loss_fn: torch.nn.Module, optimizer: torch.optim.Optimizer,scheduler, device: Union[torch.device,str], max_tol:int=12, load_best:bool=True, fold:int=111) -> None:

        self.model = model.to(device)
        self.optimizer = optimizer
        self.scheduler = scheduler
        self.device = device

        self.train_dl = train_dataloader
        self.val_dl = val_dataloader

        self.loss_fn = loss_fn
        self.metric = BinaryAUROC()

        self.train_losses = []
        self.val_losses = []

        self.best_metric = None
        self.max_tol = max_tol
        self.tol = 0
        self.fold = fold

        self.total_epochs = 0
        self.load_best = load_best
        self.best_ckpt = None
        self.ckpt_dir = 'ckpts/'
        
        os.makedirs(self.ckpt_dir,exist_ok=True)

    def training_step(self, batch):
        batch = self.dict_to_device(batch)
        y = batch['label']

        y_pred = self.model(batch)
        loss = self.loss_fn(y_pred, y)

        loss.backward()
        self.optimizer.step()
        self.optimizer.zero_grad()

        self.train_losses.append(loss.item())

    def crop_prepare(self, batch, n_repeat:int=5):
        bs, ncrops, c, h, w = batch['image'].size()
        batch['image'] = batch['image'].view(-1, c, h, w)

        batch['blur_feats'] = batch['blur_feats'].repeat_interleave(n_repeat,dim=0)
        batch['label'] = batch['label'].repeat_interleave(n_repeat,dim=0)

        return batch

    def validation_step(self, batch):

        batch = self.crop_prepare(batch, n_repeat=5)        
        batch = self.dict_to_device(batch)

        y = batch['label']

        y_pred = self.model(batch)

        loss = self.loss_fn(y_pred, y)
        # https://torchmetrics.readthedocs.io/en/stable/classification/auroc.html#binaryauroc
        #  If preds has values outside [0,1] range we consider the input to be logits and will auto apply sigmoid per element.
        y_pred = y_pred.view(-1,5).sum(axis=1,keepdim=True)
        y = y[::5]

        self.metric(y_pred,y)

        self.val_losses.append(loss.item())

    def train(self, max_epochs:int=100):

        for epoch in tqdm(range(max_epochs)):
            self.total_epochs += 1

            self.model.train()
            for batch in tqdm(self.train_dl):
                self.training_step(batch)

            self.model.eval()
            with torch.inference_mode():
                for batch in self.val_dl:
                    self.validation_step(batch)

            train_epoch_loss = np.mean(self.train_losses)
            val_epoch_loss = np.mean(self.val_losses)
            val_metric = self.metric.compute()
            self.reset_losses_metrics()

            self.scheduler.step(val_metric)

            print(f"Epoch: {epoch+1} | "
                    f"train_loss: {train_epoch_loss:.4f} | "
                    f"val_loss: {val_epoch_loss:.4f} | "
                    f"ROC_AUC: {val_metric:.5f}")

            if self.early_stopping(val_metric):
                break
            elif self.is_best_metric(val_metric):
                self.save_checkpoint(f'{self.ckpt_dir}best_fold{self.fold}.ckpt')
            

        print(f'[BEST_METRIC]: {self.best_metric}')

        if self.load_best:
            self.load_checkpoint(self.best_ckpt)

    def predict(self, test_dl:DataLoader):
        preds = []
        self.model.eval()
        with torch.inference_mode():
            for batch in test_dl:
                batch = self.crop_prepare(batch, n_repeat=5)
                batch = self.dict_to_device(batch)
                y_pred = self.model(batch)
                y_pred = y_pred.view(-1,5).sum(axis=1,keepdim=True)
                y_probs = torch.sigmoid(y_pred).cpu()
                preds.append(y_probs)

        return torch.cat(preds).numpy()

    def dict_to_device(self, batch:Dict[str,torch.tensor]) -> Dict[str,torch.tensor]:
        return {k: v.to(self.device) if hasattr(v, 'to') else v for k, v in batch.items()}

    def reset_losses_metrics(self) -> None:
        self.train_losses = []
        self.val_losses = []
        self.metric.reset()

    def is_best_metric(self, val_metric:float) -> bool:
        if (self.best_metric is None) or val_metric > self.best_metric:
            self.best_metric = val_metric
            return True
        return False

    def early_stopping(self, val_metric:float) -> bool:
        if (self.best_metric is not None) and val_metric <= self.best_metric:
            self.tol += 1
        else:
            self.tol = 0
        if self.tol >= self.max_tol:
            print('early_stopping')
            return True
        return False

    def save_checkpoint(self, filename:str = 'best.ckpt') -> None:
        checkpoint = {'epoch': self.total_epochs,
                      'model_state_dict': self.model.state_dict(),
                      'optimizer_state_dict': self.optimizer.state_dict(),
                      'best_metric':self.best_metric}
        torch.save(checkpoint, filename)
        self.best_ckpt = filename

    def load_checkpoint(self, filename:str = 'best.ckpt') -> None:
        print('load checkpoint: ', filename)
        checkpoint = torch.load(filename)

        self.model.load_state_dict(checkpoint['model_state_dict'])
        self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        self.model.train()

In [23]:
N_FOLDS = 4
TRAIN_BATCH_SIZE = 64
VAL_BATCH_SIZE = 64

test_ds = Dset(df_test, augmentation=val_transform)
test_dl = DataLoader(test_ds, batch_size=VAL_BATCH_SIZE, num_workers=N_CPU)

In [None]:
# import shutil
# shutil.rmtree('./ckpts')

In [24]:
preds = np.zeros(len(df_test))

for fold in range(N_FOLDS):
    print(f'[START TRAINING FOLD]: {fold}')

    df_train = add_folds(df_train, n_folds=N_FOLDS, seed=SEED)

    Xy_train = df_train[df_train['fold']!=fold]
    Xy_val = df_train[df_train['fold']==fold]

    train_ds = Dset(Xy_train, augmentation=train_transform)
    val_ds = Dset(Xy_val, augmentation=val_transform)

    train_dl = DataLoader(train_ds, batch_size=TRAIN_BATCH_SIZE, shuffle=True, num_workers=N_CPU)
    val_dl = DataLoader(val_ds, batch_size=VAL_BATCH_SIZE, num_workers=N_CPU)

    model = Model().to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', factor=0.5, patience=5, threshold=0.0001, min_lr=0.00005,verbose=True)

    loss_fn = BCEWithLogitsLoss()

    trainer = Trainer(model=model,
                       train_dataloader=train_dl,
                       val_dataloader=val_dl,
                       optimizer=optimizer,
                       scheduler=scheduler,
                       loss_fn=loss_fn,
                       device=device,
                       fold=fold)

    trainer.train(max_epochs=1000)

    fold_preds = trainer.predict(test_dl)
    preds += fold_preds.flatten()

    del model, trainer
    gc.collect()

    torch.cuda.empty_cache()

[START TRAINING FOLD]: 0


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.6001 | val_loss: 0.3118 | ROC_AUC: 0.98043


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 2 | train_loss: 0.2147 | val_loss: 0.1997 | ROC_AUC: 0.99455


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 3 | train_loss: 0.2002 | val_loss: 0.1604 | ROC_AUC: 0.99537


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 4 | train_loss: 0.1659 | val_loss: 0.1312 | ROC_AUC: 0.99580


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 5 | train_loss: 0.1409 | val_loss: 0.2091 | ROC_AUC: 0.99356


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 6 | train_loss: 0.1202 | val_loss: 0.1778 | ROC_AUC: 0.99772


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 7 | train_loss: 0.0966 | val_loss: 0.2306 | ROC_AUC: 0.98697


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 8 | train_loss: 0.1093 | val_loss: 0.1473 | ROC_AUC: 0.99647


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 9 | train_loss: 0.0943 | val_loss: 0.1166 | ROC_AUC: 0.99840


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 10 | train_loss: 0.0824 | val_loss: 0.1311 | ROC_AUC: 0.99765


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 11 | train_loss: 0.0847 | val_loss: 0.1307 | ROC_AUC: 0.99807


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 12 | train_loss: 0.0707 | val_loss: 0.1070 | ROC_AUC: 0.99935


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 13 | train_loss: 0.0893 | val_loss: 0.0976 | ROC_AUC: 0.99898


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 14 | train_loss: 0.0820 | val_loss: 0.1655 | ROC_AUC: 0.99436


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 15 | train_loss: 0.0847 | val_loss: 0.1378 | ROC_AUC: 0.99736


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 16 | train_loss: 0.0711 | val_loss: 0.1165 | ROC_AUC: 0.99651


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 17 | train_loss: 0.1149 | val_loss: 0.1496 | ROC_AUC: 0.99264


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch 00018: reducing learning rate of group 0 to 5.0000e-04.
Epoch: 18 | train_loss: 0.0872 | val_loss: 0.1112 | ROC_AUC: 0.99695


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 19 | train_loss: 0.0628 | val_loss: 0.0891 | ROC_AUC: 0.99833


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 20 | train_loss: 0.0488 | val_loss: 0.0769 | ROC_AUC: 0.99884


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 21 | train_loss: 0.0504 | val_loss: 0.0675 | ROC_AUC: 0.99928


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 22 | train_loss: 0.0497 | val_loss: 0.0826 | ROC_AUC: 0.99896


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 23 | train_loss: 0.0428 | val_loss: 0.0714 | ROC_AUC: 0.99952


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 24 | train_loss: 0.0310 | val_loss: 0.0674 | ROC_AUC: 0.99956


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 25 | train_loss: 0.0332 | val_loss: 0.0698 | ROC_AUC: 0.99926


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 26 | train_loss: 0.0426 | val_loss: 0.0631 | ROC_AUC: 0.99938


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 27 | train_loss: 0.0331 | val_loss: 0.0839 | ROC_AUC: 0.99927


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 28 | train_loss: 0.0433 | val_loss: 0.0904 | ROC_AUC: 0.99875


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch 00029: reducing learning rate of group 0 to 2.5000e-04.
Epoch: 29 | train_loss: 0.0355 | val_loss: 0.0923 | ROC_AUC: 0.99881


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 30 | train_loss: 0.0299 | val_loss: 0.0710 | ROC_AUC: 0.99886


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 31 | train_loss: 0.0323 | val_loss: 0.0704 | ROC_AUC: 0.99901


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 32 | train_loss: 0.0296 | val_loss: 0.0649 | ROC_AUC: 0.99912


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 33 | train_loss: 0.0266 | val_loss: 0.0647 | ROC_AUC: 0.99942


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 34 | train_loss: 0.0225 | val_loss: 0.0719 | ROC_AUC: 0.99940


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch 00035: reducing learning rate of group 0 to 1.2500e-04.
Epoch: 35 | train_loss: 0.0250 | val_loss: 0.0666 | ROC_AUC: 0.99923


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 36 | train_loss: 0.0216 | val_loss: 0.0591 | ROC_AUC: 0.99940
early_stopping
[BEST_METRIC]: 0.9995578527450562
load checkpoint:  ckpts/best_fold0.ckpt
[START TRAINING FOLD]: 1


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 5.5515 | val_loss: 1.0456 | ROC_AUC: 0.85264


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 2 | train_loss: 1.0787 | val_loss: 0.3472 | ROC_AUC: 0.98630


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 3 | train_loss: 0.6627 | val_loss: 0.2564 | ROC_AUC: 0.99550


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 4 | train_loss: 0.5065 | val_loss: 0.2374 | ROC_AUC: 0.99536


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 5 | train_loss: 0.3782 | val_loss: 0.3453 | ROC_AUC: 0.98929


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 6 | train_loss: 0.5404 | val_loss: 0.2004 | ROC_AUC: 0.99699


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 7 | train_loss: 0.4099 | val_loss: 0.1800 | ROC_AUC: 0.99703


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 8 | train_loss: 0.4020 | val_loss: 0.1586 | ROC_AUC: 0.99900


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 9 | train_loss: 0.2843 | val_loss: 0.2009 | ROC_AUC: 0.99344


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 10 | train_loss: 0.2424 | val_loss: 0.1755 | ROC_AUC: 0.99833


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 11 | train_loss: 0.2330 | val_loss: 0.1668 | ROC_AUC: 0.99804


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 12 | train_loss: 0.2602 | val_loss: 0.1565 | ROC_AUC: 0.99837


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 13 | train_loss: 0.1957 | val_loss: 0.3364 | ROC_AUC: 0.97978


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch 00014: reducing learning rate of group 0 to 5.0000e-04.
Epoch: 14 | train_loss: 0.2239 | val_loss: 0.1542 | ROC_AUC: 0.99876


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 15 | train_loss: 0.1687 | val_loss: 0.1324 | ROC_AUC: 0.99896


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 16 | train_loss: 0.1461 | val_loss: 0.1261 | ROC_AUC: 0.99912


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 17 | train_loss: 0.1246 | val_loss: 0.1229 | ROC_AUC: 0.99937


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 18 | train_loss: 0.1081 | val_loss: 0.1376 | ROC_AUC: 0.99775


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 19 | train_loss: 0.1096 | val_loss: 0.1248 | ROC_AUC: 0.99793


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 20 | train_loss: 0.1022 | val_loss: 0.1144 | ROC_AUC: 0.99809


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 21 | train_loss: 0.1336 | val_loss: 0.1681 | ROC_AUC: 0.99505


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 22 | train_loss: 0.1476 | val_loss: 0.1225 | ROC_AUC: 0.99815


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 23 | train_loss: 0.1154 | val_loss: 0.1212 | ROC_AUC: 0.99966


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 24 | train_loss: 0.0955 | val_loss: 0.1010 | ROC_AUC: 0.99982


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 25 | train_loss: 0.0851 | val_loss: 0.1089 | ROC_AUC: 0.99815


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 26 | train_loss: 0.1245 | val_loss: 0.1112 | ROC_AUC: 0.99814


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 27 | train_loss: 0.1047 | val_loss: 0.1423 | ROC_AUC: 0.99787


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 28 | train_loss: 0.1146 | val_loss: 0.0973 | ROC_AUC: 0.99949


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 29 | train_loss: 0.0550 | val_loss: 0.0913 | ROC_AUC: 0.99845


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch 00030: reducing learning rate of group 0 to 2.5000e-04.
Epoch: 30 | train_loss: 0.0884 | val_loss: 0.1137 | ROC_AUC: 0.99983


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 31 | train_loss: 0.0778 | val_loss: 0.1268 | ROC_AUC: 0.99875


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 32 | train_loss: 0.0755 | val_loss: 0.1279 | ROC_AUC: 0.99739


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 33 | train_loss: 0.0682 | val_loss: 0.0871 | ROC_AUC: 0.99987


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 34 | train_loss: 0.0634 | val_loss: 0.0838 | ROC_AUC: 0.99990


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 35 | train_loss: 0.0602 | val_loss: 0.0814 | ROC_AUC: 0.99994


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 36 | train_loss: 0.0493 | val_loss: 0.0851 | ROC_AUC: 0.99990


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 37 | train_loss: 0.0544 | val_loss: 0.1122 | ROC_AUC: 0.99945


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 38 | train_loss: 0.0681 | val_loss: 0.0814 | ROC_AUC: 0.99997


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 39 | train_loss: 0.0900 | val_loss: 0.1599 | ROC_AUC: 0.99846


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 40 | train_loss: 0.0981 | val_loss: 0.0999 | ROC_AUC: 0.99954


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch 00041: reducing learning rate of group 0 to 1.2500e-04.
Epoch: 41 | train_loss: 0.0860 | val_loss: 0.1172 | ROC_AUC: 0.99946


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 42 | train_loss: 0.0758 | val_loss: 0.0887 | ROC_AUC: 0.99979


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 43 | train_loss: 0.0544 | val_loss: 0.1000 | ROC_AUC: 0.99984


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 44 | train_loss: 0.0531 | val_loss: 0.0860 | ROC_AUC: 0.99994


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 45 | train_loss: 0.0526 | val_loss: 0.0909 | ROC_AUC: 0.99985


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 46 | train_loss: 0.0576 | val_loss: 0.0997 | ROC_AUC: 0.99976


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch 00047: reducing learning rate of group 0 to 6.2500e-05.
Epoch: 47 | train_loss: 0.0506 | val_loss: 0.0979 | ROC_AUC: 0.99848


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 48 | train_loss: 0.0426 | val_loss: 0.0826 | ROC_AUC: 0.99847


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 49 | train_loss: 0.0439 | val_loss: 0.0786 | ROC_AUC: 0.99991


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 50 | train_loss: 0.0394 | val_loss: 0.0818 | ROC_AUC: 0.99990
early_stopping
[BEST_METRIC]: 0.9999728798866272
load checkpoint:  ckpts/best_fold1.ckpt
[START TRAINING FOLD]: 2


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 9.7456 | val_loss: 2.9584 | ROC_AUC: 0.77864


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 2 | train_loss: 1.3898 | val_loss: 0.9568 | ROC_AUC: 0.96871


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 3 | train_loss: 0.7485 | val_loss: 0.4048 | ROC_AUC: 0.98797


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 4 | train_loss: 0.6484 | val_loss: 0.4118 | ROC_AUC: 0.98480


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 5 | train_loss: 0.4640 | val_loss: 0.3840 | ROC_AUC: 0.99079


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 6 | train_loss: 0.3879 | val_loss: 0.3059 | ROC_AUC: 0.98775


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 7 | train_loss: 0.3498 | val_loss: 0.3544 | ROC_AUC: 0.99398


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 8 | train_loss: 0.2799 | val_loss: 0.3632 | ROC_AUC: 0.99403


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 9 | train_loss: 0.3851 | val_loss: 0.2574 | ROC_AUC: 0.99169


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 10 | train_loss: 0.2134 | val_loss: 0.3381 | ROC_AUC: 0.98994


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 11 | train_loss: 0.2373 | val_loss: 0.3274 | ROC_AUC: 0.99343


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 12 | train_loss: 0.1963 | val_loss: 0.2131 | ROC_AUC: 0.99839


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 13 | train_loss: 0.2536 | val_loss: 0.5689 | ROC_AUC: 0.98472


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 14 | train_loss: 0.2503 | val_loss: 0.2701 | ROC_AUC: 0.99043


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 15 | train_loss: 0.2795 | val_loss: 0.2422 | ROC_AUC: 0.99190


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 16 | train_loss: 0.2151 | val_loss: 0.3183 | ROC_AUC: 0.99094


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 17 | train_loss: 0.2099 | val_loss: 0.2282 | ROC_AUC: 0.99302


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch 00018: reducing learning rate of group 0 to 5.0000e-04.
Epoch: 18 | train_loss: 0.1808 | val_loss: 0.1992 | ROC_AUC: 0.99724


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 19 | train_loss: 0.2142 | val_loss: 0.2193 | ROC_AUC: 0.99422


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 20 | train_loss: 0.1274 | val_loss: 0.2109 | ROC_AUC: 0.99420


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 21 | train_loss: 0.1992 | val_loss: 0.2041 | ROC_AUC: 0.99452


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 22 | train_loss: 0.1756 | val_loss: 0.2436 | ROC_AUC: 0.99441


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 23 | train_loss: 0.1457 | val_loss: 0.1743 | ROC_AUC: 0.99574


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch 00024: reducing learning rate of group 0 to 2.5000e-04.
Epoch: 24 | train_loss: 0.1286 | val_loss: 0.2054 | ROC_AUC: 0.99070
early_stopping
[BEST_METRIC]: 0.9983936548233032
load checkpoint:  ckpts/best_fold2.ckpt
[START TRAINING FOLD]: 3


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 2.0348 | val_loss: 0.3146 | ROC_AUC: 0.98184


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 2 | train_loss: 0.8438 | val_loss: 0.2641 | ROC_AUC: 0.98888


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 3 | train_loss: 0.7850 | val_loss: 0.3086 | ROC_AUC: 0.99202


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 4 | train_loss: 0.5026 | val_loss: 0.5057 | ROC_AUC: 0.97750


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 5 | train_loss: 0.4944 | val_loss: 0.4782 | ROC_AUC: 0.97955


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 6 | train_loss: 0.4180 | val_loss: 0.2403 | ROC_AUC: 0.99459


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 7 | train_loss: 0.4728 | val_loss: 0.3418 | ROC_AUC: 0.98667


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 8 | train_loss: 0.3316 | val_loss: 0.3332 | ROC_AUC: 0.98710


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 9 | train_loss: 0.2657 | val_loss: 0.1921 | ROC_AUC: 0.99522


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 10 | train_loss: 0.3135 | val_loss: 0.2058 | ROC_AUC: 0.99661


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 11 | train_loss: 0.2776 | val_loss: 0.1899 | ROC_AUC: 0.99691


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 12 | train_loss: 0.2926 | val_loss: 0.2423 | ROC_AUC: 0.99468


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 13 | train_loss: 0.2644 | val_loss: 0.2605 | ROC_AUC: 0.99487


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 14 | train_loss: 0.2139 | val_loss: 0.2478 | ROC_AUC: 0.99731


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 15 | train_loss: 0.2261 | val_loss: 0.2749 | ROC_AUC: 0.99730


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 16 | train_loss: 0.1627 | val_loss: 0.1446 | ROC_AUC: 0.99782


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 17 | train_loss: 0.1143 | val_loss: 0.1665 | ROC_AUC: 0.99710


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 18 | train_loss: 0.1302 | val_loss: 0.1939 | ROC_AUC: 0.99624


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 19 | train_loss: 0.1596 | val_loss: 0.2545 | ROC_AUC: 0.99546


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 20 | train_loss: 0.1521 | val_loss: 0.2005 | ROC_AUC: 0.99489


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 21 | train_loss: 0.1225 | val_loss: 0.1744 | ROC_AUC: 0.99607


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch 00022: reducing learning rate of group 0 to 5.0000e-04.
Epoch: 22 | train_loss: 0.1757 | val_loss: 0.2328 | ROC_AUC: 0.98920


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 23 | train_loss: 0.1098 | val_loss: 0.1909 | ROC_AUC: 0.99519


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 24 | train_loss: 0.0819 | val_loss: 0.1572 | ROC_AUC: 0.99764


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 25 | train_loss: 0.0613 | val_loss: 0.1496 | ROC_AUC: 0.99845


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 26 | train_loss: 0.0699 | val_loss: 0.1490 | ROC_AUC: 0.99883


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 27 | train_loss: 0.0703 | val_loss: 0.1478 | ROC_AUC: 0.99864


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 28 | train_loss: 0.0653 | val_loss: 0.1288 | ROC_AUC: 0.99871


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 29 | train_loss: 0.0723 | val_loss: 0.1935 | ROC_AUC: 0.99859


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 30 | train_loss: 0.0818 | val_loss: 0.1396 | ROC_AUC: 0.99833


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 31 | train_loss: 0.0724 | val_loss: 0.1707 | ROC_AUC: 0.99792


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch 00032: reducing learning rate of group 0 to 2.5000e-04.
Epoch: 32 | train_loss: 0.0381 | val_loss: 0.1322 | ROC_AUC: 0.99872


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 33 | train_loss: 0.0495 | val_loss: 0.1252 | ROC_AUC: 0.99890


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 34 | train_loss: 0.0627 | val_loss: 0.1334 | ROC_AUC: 0.99906


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 35 | train_loss: 0.0371 | val_loss: 0.1297 | ROC_AUC: 0.99932


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 36 | train_loss: 0.0641 | val_loss: 0.1419 | ROC_AUC: 0.99931


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 37 | train_loss: 0.0427 | val_loss: 0.1207 | ROC_AUC: 0.99905


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 38 | train_loss: 0.0615 | val_loss: 0.1330 | ROC_AUC: 0.99897


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 39 | train_loss: 0.0599 | val_loss: 0.1362 | ROC_AUC: 0.99869


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 40 | train_loss: 0.0438 | val_loss: 0.1330 | ROC_AUC: 0.99905


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch 00041: reducing learning rate of group 0 to 1.2500e-04.
Epoch: 41 | train_loss: 0.0318 | val_loss: 0.1244 | ROC_AUC: 0.99887


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 42 | train_loss: 0.0526 | val_loss: 0.1383 | ROC_AUC: 0.99890


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 43 | train_loss: 0.0372 | val_loss: 0.1318 | ROC_AUC: 0.99897


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 44 | train_loss: 0.0373 | val_loss: 0.1284 | ROC_AUC: 0.99872


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 45 | train_loss: 0.0311 | val_loss: 0.1370 | ROC_AUC: 0.99875


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch: 46 | train_loss: 0.0506 | val_loss: 0.1270 | ROC_AUC: 0.99908


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch 00047: reducing learning rate of group 0 to 6.2500e-05.
Epoch: 47 | train_loss: 0.0437 | val_loss: 0.1292 | ROC_AUC: 0.99900
early_stopping
[BEST_METRIC]: 0.9993232488632202
load checkpoint:  ckpts/best_fold3.ckpt


In [25]:
sub = pd.DataFrame()
sub['filename'] = df_test['filename'].apply(lambda x : os.path.split(x)[1])
sub['blur'] = pd.Series(preds).round(5)
sub.to_csv('submission.csv', index=False)
sub[:3]

Unnamed: 0,filename,blur
0,bnxzvzqlzlnnbxfkcuin.jpg,0.0
1,powqsnpoynygwqsciedp.jpg,4.0
2,zpjlbfhurhygjnqccpii.jpg,0.0
