In [1]:
import os
import numpy as np
import pandas as pd
from PIL import Image
from matplotlib import pyplot as plt
import seaborn as sns

# ====================================================
# Library
# ====================================================
import sys
sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')

import os
import math
import time
import random
import shutil
from pathlib import Path
from contextlib import contextmanager
from collections import defaultdict, Counter

import scipy as sp
import numpy as np
import pandas as pd

from sklearn import preprocessing
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold

from tqdm.auto import tqdm
from functools import partial

import cv2
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam, SGD
import torchvision.models as models
from torch.nn.parameter import Parameter
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau

import albumentations as A
from albumentations.pytorch import ToTensorV2
from albumentations import ImageOnlyTransform

import timm


from torch.cuda.amp import autocast, GradScaler

import warnings 
warnings.filterwarnings('ignore')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
import os

OUTPUT_DIR = './'
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

def init_logger(log_file=OUTPUT_DIR+'train.log'):
    from logging import getLogger, INFO, FileHandler,  Formatter,  StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = init_logger()

In [3]:
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import os

train = pd.DataFrame(columns=['file_path','target'])
test = pd.DataFrame(columns=['file_path','target'])

def get_train_data(path,train):
    
    for dirname, _, filenames in os.walk(path):
        
        if dirname.split('/')[-1] == 'meme':
            #Memes
            for filename in filenames:
#                 print(filename)
                train = train.append({'file_path':os.path.join(dirname, filename), 'target':1},ignore_index=True)
        elif dirname.split('/')[-1] == 'not_meme':
            #Not Memes
            for filename in filenames:
                train = train.append({'file_path':os.path.join(dirname, filename), 'target':0},ignore_index=True)
        else:
            continue
            
    return train

def get_test_data(path,test):
    
    for dirname, _, filenames in os.walk(path):
        if dirname.split('/')[-1] == 'meme':
            #Memes
            for filename in filenames:
                test = test.append({'file_path':os.path.join(dirname, filename), 'target':1},ignore_index=True)
        elif dirname.split('/')[-1] == 'not_meme':
            #Not Memes
            for filename in filenames:
                test = test.append({'file_path':os.path.join(dirname, filename), 'target':0},ignore_index=True)
        else:
            continue
            
    return test
            
train = get_train_data('../input/memes-classification-dataset/memes_dataset/train',train)
test = get_test_data('../input/memes-classification-dataset/memes_dataset/test',test)

In [4]:
# ====================================================
# CFG
# ====================================================
class CFG:
    apex=False
    debug=False
    print_freq=100
    num_workers=4
    model_name='nfnet_l0'
    size=224
    scheduler='CosineAnnealingLR' # ['ReduceLROnPlateau', 'CosineAnnealingLR', 'CosineAnnealingWarmRestarts']
    epochs=6
    #factor=0.2 # ReduceLROnPlateau
    #patience=4 # ReduceLROnPlateau
    #eps=1e-6 # ReduceLROnPlateau
    T_max=6 # CosineAnnealingLR
    #T_0=6 # CosineAnnealingWarmRestarts
    lr=1e-4
    min_lr=1e-6
    batch_size=64
    weight_decay=1e-6
    gradient_accumulation_steps=1
    max_grad_norm=1000
    seed=42
    target_size=1
    target_col='target'
    n_fold=4
    trn_fold=[0, 1, 2, 3]
    train=True
    
if CFG.debug:
    CFG.epochs = 1
    train = train.sample(n=1000, random_state=CFG.seed).reset_index(drop=True)

In [5]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
train['target'] = label_encoder.fit_transform(train['target'])
test['target'] = label_encoder.fit_transform(test['target'])

In [6]:
Fold = StratifiedKFold(n_splits=5, shuffle=True, random_state=CFG.seed)
for n, (train_index, val_index) in enumerate(Fold.split(train, train[CFG.target_col])):
    train.loc[val_index, 'fold'] = int(n)
train['fold'] = train['fold'].astype(int)
display(train.groupby(['fold', 'target']).size())

fold  target
0     0         125
      1         134
1     0         125
      1         134
2     0         126
      1         133
3     0         125
      1         133
4     0         125
      1         133
dtype: int64

In [7]:
def get_score(y_true, y_pred):
    score = roc_auc_score(y_true, y_pred)
    return score

In [8]:
# ====================================================
# Dataset
# ====================================================
class TrainDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df['file_path'].values
        self.labels = df['target'].values
        self.transform = transform
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        file_path = self.file_names[index]
        image = Image.open(file_path).convert('RGB')
#         image = image.astype(np.float32)
        image = image.resize((CFG.size,CFG.size))
        image = np.array(image)
        image = image[:,:,:3]
        image = np.moveaxis(image, -1, 0)
        image = torch.from_numpy(image).float()
        
#         if self.transform:
#             image = self.transform(image=image)['image']
#         else:
#             image = image[np.newaxis,:,:]
            
            
        label = torch.tensor(self.labels[index]).float()
        return image, label

In [9]:
# ====================================================
# Transforms
# ====================================================
def get_transforms(*, data):
    
    if data == 'train':
        return A.Compose([
            A.Resize(CFG.size, CFG.size),
            ToTensorV2(),
        ])

    elif data == 'valid':
        return A.Compose([
            A.Resize(CFG.size, CFG.size),
            ToTensorV2(),
        ])

In [10]:
# ====================================================
# MODEL
# ====================================================
class CustomModel(nn.Module):
    def __init__(self, cfg, pretrained=False):
        super().__init__()
        self.cfg = cfg
        self.model = timm.create_model(self.cfg.model_name, pretrained=pretrained, in_chans=3)
        self.n_features = self.model.head.fc.in_features
        self.model.head.fc = nn.Linear(self.n_features, self.cfg.target_size)

    def forward(self, x):
        output = self.model(x)
        return output

In [11]:
# ====================================================
# Helper functions
# ====================================================
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))


def train_fn(train_loader, model, criterion, optimizer, epoch, scheduler, device):
    if CFG.apex:
        scaler = GradScaler()
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()
    # switch to train mode
    model.train()
    start = end = time.time()
    global_step = 0
    for step, (images, labels) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        images = images.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        if CFG.apex:
            with autocast():
                y_preds = model(images)
                loss = criterion(y_preds.view(-1), labels)
        else:
            y_preds = model(images)
            loss = criterion(y_preds.view(-1), labels)
        # record loss
        losses.update(loss.item(), batch_size)
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        if CFG.apex:
            scaler.scale(loss).backward()
        else:
            loss.backward()
        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)
        if (step + 1) % CFG.gradient_accumulation_steps == 0:
            if CFG.apex:
                scaler.step(optimizer)
                scaler.update()
            else:
                optimizer.step()
            optimizer.zero_grad()
            global_step += 1
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(train_loader)-1):
            print('Epoch: [{0}][{1}/{2}] '
                  'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  'Grad: {grad_norm:.4f}  '
                  #'LR: {lr:.6f}  '
                  .format(
                   epoch+1, step, len(train_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses,
                   remain=timeSince(start, float(step+1)/len(train_loader)),
                   grad_norm=grad_norm,
                   #lr=scheduler.get_lr()[0],
                   ))
    return losses.avg


def valid_fn(valid_loader, model, criterion, device):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()
    # switch to evaluation mode
    model.eval()
    preds = []
    start = end = time.time()
    for step, (images, labels) in enumerate(valid_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        images = images.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        # compute loss
        with torch.no_grad():
            y_preds = model(images)
        loss = criterion(y_preds.view(-1), labels)
        losses.update(loss.item(), batch_size)
        # record accuracy
        preds.append(y_preds.sigmoid().to('cpu').numpy())
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(valid_loader)-1):
            print('EVAL: [{0}/{1}] '
                  'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  .format(
                   step, len(valid_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses,
                   remain=timeSince(start, float(step+1)/len(valid_loader)),
                   ))
    predictions = np.concatenate(preds)
    return losses.avg, predictions

In [12]:
# ====================================================
# Train loop
# ====================================================
def train_loop(folds, fold):
    
    LOGGER.info(f"========== fold: {fold} training ==========")

    # ====================================================
    # loader
    # ====================================================
    trn_idx = folds[folds['fold'] != fold].index
    val_idx = folds[folds['fold'] == fold].index

    train_folds = folds.loc[trn_idx].reset_index(drop=True)
    valid_folds = folds.loc[val_idx].reset_index(drop=True)
    valid_labels = valid_folds[CFG.target_col].values

    train_dataset = TrainDataset(train_folds, 
                                 transform=get_transforms(data='train'))
    valid_dataset = TrainDataset(valid_folds, 
                                 transform=get_transforms(data='valid'))

    train_loader = DataLoader(train_dataset, 
                              batch_size=CFG.batch_size, 
                              shuffle=True, 
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=True)
    valid_loader = DataLoader(valid_dataset, 
                              batch_size=CFG.batch_size * 2, 
                              shuffle=False, 
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=False)
    
    # ====================================================
    # scheduler 
    # ====================================================
    def get_scheduler(optimizer):
        if CFG.scheduler=='ReduceLROnPlateau':
            scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=CFG.factor, patience=CFG.patience, verbose=True, eps=CFG.eps)
        elif CFG.scheduler=='CosineAnnealingLR':
            scheduler = CosineAnnealingLR(optimizer, T_max=CFG.T_max, eta_min=CFG.min_lr, last_epoch=-1)
        elif CFG.scheduler=='CosineAnnealingWarmRestarts':
            scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=CFG.T_0, T_mult=1, eta_min=CFG.min_lr, last_epoch=-1)
        return scheduler

    # ====================================================
    # model & optimizer
    # ====================================================
    model = CustomModel(CFG, pretrained=True)
    model.to(device)

    optimizer = Adam(model.parameters(), lr=CFG.lr, weight_decay=CFG.weight_decay, amsgrad=False)
    scheduler = get_scheduler(optimizer)

    # ====================================================
    # loop
    # ====================================================
    criterion = nn.BCEWithLogitsLoss()

    best_score = 0.
    best_loss = np.inf
    
    for epoch in range(CFG.epochs):
        
        start_time = time.time()
        
        # train
        avg_loss = train_fn(train_loader, model, criterion, optimizer, epoch, scheduler, device)

        # eval
        avg_val_loss, preds = valid_fn(valid_loader, model, criterion, device)
        
        if isinstance(scheduler, ReduceLROnPlateau):
            scheduler.step(avg_val_loss)
        elif isinstance(scheduler, CosineAnnealingLR):
            scheduler.step()
        elif isinstance(scheduler, CosineAnnealingWarmRestarts):
            scheduler.step()

        # scoring
        score = get_score(valid_labels, preds)

        elapsed = time.time() - start_time

        LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
        LOGGER.info(f'Epoch {epoch+1} - Score: {score:.4f}')

        if score > best_score:
            best_score = score
            LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
            torch.save({'model': model.state_dict(), 
                        'preds': preds},
                        OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best_score.pth')
        
        if avg_val_loss < best_loss:
            best_loss = avg_val_loss
            LOGGER.info(f'Epoch {epoch+1} - Save Best Loss: {best_loss:.4f} Model')
            torch.save({'model': model.state_dict(), 
                        'preds': preds},
                        OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best_loss.pth')
    
    valid_folds['preds'] = torch.load(OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best_loss.pth', 
                                      map_location=torch.device('cpu'))['preds']

    return valid_folds

In [13]:
# ====================================================
# main
# ====================================================
def main():

    """
    Prepare: 1.train 
    """

    def get_result(result_df):
        preds = result_df['preds'].values
        labels = result_df[CFG.target_col].values
        score = get_score(labels, preds)
        LOGGER.info(f'Score: {score:<.4f}')
    
    if CFG.train:
        # train 
        oof_df = pd.DataFrame()
        for fold in range(CFG.n_fold):
            if fold in CFG.trn_fold:
                _oof_df = train_loop(train, fold)
                oof_df = pd.concat([oof_df, _oof_df])
                LOGGER.info(f"========== fold: {fold} result ==========")
                get_result(_oof_df)
        # CV result
        LOGGER.info(f"========== CV ==========")
        get_result(oof_df)
        # save result
        oof_df.to_csv(OUTPUT_DIR+'oof_df.csv', index=False)

In [14]:
if __name__ == '__main__':
    main()

Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/nfnet_l0_ra2-45c6688d.pth" to /root/.cache/torch/hub/checkpoints/nfnet_l0_ra2-45c6688d.pth


Epoch: [1][0/16] Data 5.792 (5.792) Elapsed 0m 13s (remain 3m 19s) Loss: 0.6158(0.6158) Grad: 20.1955  
Epoch: [1][15/16] Data 0.000 (0.572) Elapsed 0m 26s (remain 0m 0s) Loss: 0.3905(0.2565) Grad: 34.8505  
EVAL: [0/3] Data 5.755 (5.755) Elapsed 0m 6s (remain 0m 12s) Loss: 0.0965(0.0965) 


Epoch 1 - avg_train_loss: 0.2565  avg_val_loss: 0.1133  time: 34s
Epoch 1 - Score: 0.9891
Epoch 1 - Save Best Score: 0.9891 Model


EVAL: [2/3] Data 0.000 (2.035) Elapsed 0m 6s (remain 0m 0s) Loss: 0.0210(0.1133) 


Epoch 1 - Save Best Loss: 0.1133 Model


Epoch: [2][0/16] Data 5.288 (5.288) Elapsed 0m 5s (remain 1m 29s) Loss: 0.0444(0.0444) Grad: 2.9241  
Epoch: [2][15/16] Data 0.000 (0.864) Elapsed 0m 24s (remain 0m 0s) Loss: 0.0639(0.0597) Grad: 19.3333  
EVAL: [0/3] Data 5.211 (5.211) Elapsed 0m 5s (remain 0m 11s) Loss: 0.0941(0.0941) 


Epoch 2 - avg_train_loss: 0.0597  avg_val_loss: 0.0897  time: 32s
Epoch 2 - Score: 0.9954
Epoch 2 - Save Best Score: 0.9954 Model


EVAL: [2/3] Data 0.000 (1.928) Elapsed 0m 6s (remain 0m 0s) Loss: 0.0029(0.0897) 


Epoch 2 - Save Best Loss: 0.0897 Model


Epoch: [3][0/16] Data 4.468 (4.468) Elapsed 0m 5s (remain 1m 17s) Loss: 0.0094(0.0094) Grad: 0.7367  
Epoch: [3][15/16] Data 0.000 (0.837) Elapsed 0m 24s (remain 0m 0s) Loss: 0.0067(0.0131) Grad: 0.8904  
EVAL: [0/3] Data 5.251 (5.251) Elapsed 0m 5s (remain 0m 11s) Loss: 0.0980(0.0980) 


Epoch 3 - avg_train_loss: 0.0131  avg_val_loss: 0.0934  time: 31s
Epoch 3 - Score: 0.9958
Epoch 3 - Save Best Score: 0.9958 Model


EVAL: [2/3] Data 0.000 (1.872) Elapsed 0m 6s (remain 0m 0s) Loss: 0.0018(0.0934) 
Epoch: [4][0/16] Data 5.402 (5.402) Elapsed 0m 6s (remain 1m 31s) Loss: 0.0040(0.0040) Grad: 0.6560  
Epoch: [4][15/16] Data 0.000 (0.890) Elapsed 0m 25s (remain 0m 0s) Loss: 0.0007(0.0025) Grad: 0.0795  
EVAL: [0/3] Data 5.736 (5.736) Elapsed 0m 6s (remain 0m 12s) Loss: 0.1225(0.1225) 


Epoch 4 - avg_train_loss: 0.0025  avg_val_loss: 0.1056  time: 32s
Epoch 4 - Score: 0.9956


EVAL: [2/3] Data 0.000 (2.012) Elapsed 0m 6s (remain 0m 0s) Loss: 0.0009(0.1056) 
Epoch: [5][0/16] Data 5.457 (5.457) Elapsed 0m 6s (remain 1m 32s) Loss: 0.0004(0.0004) Grad: 0.0479  
Epoch: [5][15/16] Data 0.000 (0.896) Elapsed 0m 25s (remain 0m 0s) Loss: 0.0014(0.0010) Grad: 0.1107  
EVAL: [0/3] Data 5.596 (5.596) Elapsed 0m 5s (remain 0m 11s) Loss: 0.1083(0.1083) 


Epoch 5 - avg_train_loss: 0.0010  avg_val_loss: 0.1027  time: 32s
Epoch 5 - Score: 0.9957


EVAL: [2/3] Data 0.000 (1.984) Elapsed 0m 6s (remain 0m 0s) Loss: 0.0008(0.1027) 
Epoch: [6][0/16] Data 6.952 (6.952) Elapsed 0m 7s (remain 1m 54s) Loss: 0.0004(0.0004) Grad: 0.0480  
Epoch: [6][15/16] Data 0.000 (0.934) Elapsed 0m 25s (remain 0m 0s) Loss: 0.0005(0.0008) Grad: 0.0507  
EVAL: [0/3] Data 5.431 (5.431) Elapsed 0m 5s (remain 0m 11s) Loss: 0.1131(0.1131) 


Epoch 6 - avg_train_loss: 0.0008  avg_val_loss: 0.1045  time: 33s
Epoch 6 - Score: 0.9956


EVAL: [2/3] Data 0.000 (1.937) Elapsed 0m 6s (remain 0m 0s) Loss: 0.0007(0.1045) 


Score: 0.9954


Epoch: [1][0/16] Data 4.735 (4.735) Elapsed 0m 5s (remain 1m 21s) Loss: 1.0735(1.0735) Grad: 47.6859  
Epoch: [1][15/16] Data 0.000 (0.899) Elapsed 0m 25s (remain 0m 0s) Loss: 0.1272(0.2905) Grad: 8.1203  
EVAL: [0/3] Data 4.503 (4.503) Elapsed 0m 4s (remain 0m 9s) Loss: 0.2033(0.2033) 


Epoch 1 - avg_train_loss: 0.2905  avg_val_loss: 0.1960  time: 31s
Epoch 1 - Score: 0.9793
Epoch 1 - Save Best Score: 0.9793 Model


EVAL: [2/3] Data 0.000 (1.723) Elapsed 0m 5s (remain 0m 0s) Loss: 0.0242(0.1960) 


Epoch 1 - Save Best Loss: 0.1960 Model


Epoch: [2][0/16] Data 5.290 (5.290) Elapsed 0m 5s (remain 1m 29s) Loss: 0.0859(0.0859) Grad: 3.3118  
Epoch: [2][15/16] Data 0.000 (0.922) Elapsed 0m 25s (remain 0m 0s) Loss: 0.0217(0.0567) Grad: 1.7351  
EVAL: [0/3] Data 4.657 (4.657) Elapsed 0m 5s (remain 0m 10s) Loss: 0.1797(0.1797) 


Epoch 2 - avg_train_loss: 0.0567  avg_val_loss: 0.1763  time: 32s
Epoch 2 - Score: 0.9837
Epoch 2 - Save Best Score: 0.9837 Model


EVAL: [2/3] Data 0.000 (1.734) Elapsed 0m 5s (remain 0m 0s) Loss: 0.0623(0.1763) 


Epoch 2 - Save Best Loss: 0.1763 Model


Epoch: [3][0/16] Data 5.485 (5.485) Elapsed 0m 6s (remain 1m 32s) Loss: 0.0770(0.0770) Grad: 6.4075  
Epoch: [3][15/16] Data 0.000 (0.899) Elapsed 0m 25s (remain 0m 0s) Loss: 0.0046(0.0172) Grad: 0.6101  
EVAL: [0/3] Data 4.649 (4.649) Elapsed 0m 5s (remain 0m 10s) Loss: 0.1316(0.1316) 


Epoch 3 - avg_train_loss: 0.0172  avg_val_loss: 0.1678  time: 31s
Epoch 3 - Score: 0.9881
Epoch 3 - Save Best Score: 0.9881 Model


EVAL: [2/3] Data 0.000 (1.747) Elapsed 0m 5s (remain 0m 0s) Loss: 0.0180(0.1678) 


Epoch 3 - Save Best Loss: 0.1678 Model


Epoch: [4][0/16] Data 5.577 (5.577) Elapsed 0m 6s (remain 1m 34s) Loss: 0.0056(0.0056) Grad: 0.6978  
Epoch: [4][15/16] Data 0.000 (0.925) Elapsed 0m 25s (remain 0m 0s) Loss: 0.0049(0.0032) Grad: 0.6214  
EVAL: [0/3] Data 4.682 (4.682) Elapsed 0m 5s (remain 0m 10s) Loss: 0.1625(0.1625) 


Epoch 4 - avg_train_loss: 0.0032  avg_val_loss: 0.1826  time: 32s
Epoch 4 - Score: 0.9878


EVAL: [2/3] Data 0.000 (1.730) Elapsed 0m 5s (remain 0m 0s) Loss: 0.0061(0.1826) 
Epoch: [5][0/16] Data 5.227 (5.227) Elapsed 0m 5s (remain 1m 29s) Loss: 0.0014(0.0014) Grad: 0.1642  
Epoch: [5][15/16] Data 0.000 (0.905) Elapsed 0m 25s (remain 0m 0s) Loss: 0.0007(0.0014) Grad: 0.1054  
EVAL: [0/3] Data 4.708 (4.708) Elapsed 0m 5s (remain 0m 10s) Loss: 0.1761(0.1761) 


Epoch 5 - avg_train_loss: 0.0014  avg_val_loss: 0.1858  time: 31s
Epoch 5 - Score: 0.9881


EVAL: [2/3] Data 0.000 (1.761) Elapsed 0m 6s (remain 0m 0s) Loss: 0.0048(0.1858) 
Epoch: [6][0/16] Data 5.272 (5.272) Elapsed 0m 5s (remain 1m 29s) Loss: 0.0017(0.0017) Grad: 0.1407  
Epoch: [6][15/16] Data 0.000 (0.920) Elapsed 0m 25s (remain 0m 0s) Loss: 0.0004(0.0010) Grad: 0.0666  
EVAL: [0/3] Data 4.718 (4.718) Elapsed 0m 5s (remain 0m 10s) Loss: 0.1765(0.1765) 


Epoch 6 - avg_train_loss: 0.0010  avg_val_loss: 0.1871  time: 32s
Epoch 6 - Score: 0.9881
Score: 0.9881


EVAL: [2/3] Data 0.000 (1.847) Elapsed 0m 6s (remain 0m 0s) Loss: 0.0048(0.1871) 
Epoch: [1][0/16] Data 4.158 (4.158) Elapsed 0m 4s (remain 1m 12s) Loss: 0.8590(0.8590) Grad: 28.6530  
Epoch: [1][15/16] Data 0.000 (0.864) Elapsed 0m 24s (remain 0m 0s) Loss: 0.1065(0.3019) Grad: 11.2391  
EVAL: [0/3] Data 5.211 (5.211) Elapsed 0m 5s (remain 0m 11s) Loss: 0.0774(0.0774) 


Epoch 1 - avg_train_loss: 0.3019  avg_val_loss: 0.1203  time: 32s
Epoch 1 - Score: 0.9933
Epoch 1 - Save Best Score: 0.9933 Model


EVAL: [2/3] Data 0.000 (1.944) Elapsed 0m 6s (remain 0m 0s) Loss: 0.9720(0.1203) 


Epoch 1 - Save Best Loss: 0.1203 Model


Epoch: [2][0/16] Data 5.045 (5.045) Elapsed 0m 5s (remain 1m 26s) Loss: 0.0467(0.0467) Grad: 3.0488  
Epoch: [2][15/16] Data 0.000 (0.845) Elapsed 0m 24s (remain 0m 0s) Loss: 0.2469(0.0765) Grad: 12.8911  
EVAL: [0/3] Data 5.020 (5.020) Elapsed 0m 5s (remain 0m 10s) Loss: 0.1335(0.1335) 


Epoch 2 - avg_train_loss: 0.0765  avg_val_loss: 0.0756  time: 31s
Epoch 2 - Score: 0.9989
Epoch 2 - Save Best Score: 0.9989 Model


EVAL: [2/3] Data 0.000 (1.922) Elapsed 0m 6s (remain 0m 0s) Loss: 0.0271(0.0756) 


Epoch 2 - Save Best Loss: 0.0756 Model


Epoch: [3][0/16] Data 5.327 (5.327) Elapsed 0m 6s (remain 1m 30s) Loss: 0.0874(0.0874) Grad: 4.1489  
Epoch: [3][15/16] Data 0.000 (0.899) Elapsed 0m 25s (remain 0m 0s) Loss: 0.0132(0.0291) Grad: 1.7083  
EVAL: [0/3] Data 4.779 (4.779) Elapsed 0m 5s (remain 0m 10s) Loss: 0.0558(0.0558) 


Epoch 3 - avg_train_loss: 0.0291  avg_val_loss: 0.0362  time: 31s
Epoch 3 - Score: 0.9999
Epoch 3 - Save Best Score: 0.9999 Model


EVAL: [2/3] Data 0.000 (1.802) Elapsed 0m 6s (remain 0m 0s) Loss: 0.0373(0.0362) 


Epoch 3 - Save Best Loss: 0.0362 Model


Epoch: [4][0/16] Data 7.528 (7.528) Elapsed 0m 8s (remain 2m 3s) Loss: 0.0038(0.0038) Grad: 0.4722  
Epoch: [4][15/16] Data 0.000 (0.925) Elapsed 0m 25s (remain 0m 0s) Loss: 0.0049(0.0099) Grad: 1.1896  
EVAL: [0/3] Data 4.674 (4.674) Elapsed 0m 5s (remain 0m 10s) Loss: 0.0164(0.0164) 


Epoch 4 - avg_train_loss: 0.0099  avg_val_loss: 0.0220  time: 32s
Epoch 4 - Score: 1.0000
Epoch 4 - Save Best Score: 1.0000 Model


EVAL: [2/3] Data 0.000 (1.771) Elapsed 0m 6s (remain 0m 0s) Loss: 0.1357(0.0220) 


Epoch 4 - Save Best Loss: 0.0220 Model


Epoch: [5][0/16] Data 5.481 (5.481) Elapsed 0m 6s (remain 1m 32s) Loss: 0.0068(0.0068) Grad: 1.5247  
Epoch: [5][15/16] Data 0.165 (0.820) Elapsed 0m 24s (remain 0m 0s) Loss: 0.0020(0.0034) Grad: 0.1776  
EVAL: [0/3] Data 4.736 (4.736) Elapsed 0m 5s (remain 0m 10s) Loss: 0.0458(0.0458) 


Epoch 5 - avg_train_loss: 0.0034  avg_val_loss: 0.0278  time: 30s
Epoch 5 - Score: 0.9999


EVAL: [2/3] Data 0.000 (1.796) Elapsed 0m 6s (remain 0m 0s) Loss: 0.0411(0.0278) 
Epoch: [6][0/16] Data 5.510 (5.510) Elapsed 0m 6s (remain 1m 32s) Loss: 0.0015(0.0015) Grad: 0.1421  
Epoch: [6][15/16] Data 0.000 (0.859) Elapsed 0m 24s (remain 0m 0s) Loss: 0.0007(0.0016) Grad: 0.1006  
EVAL: [0/3] Data 5.407 (5.407) Elapsed 0m 5s (remain 0m 11s) Loss: 0.0526(0.0526) 


Epoch 6 - avg_train_loss: 0.0016  avg_val_loss: 0.0303  time: 32s
Epoch 6 - Score: 0.9999
Score: 1.0000


EVAL: [2/3] Data 0.000 (2.036) Elapsed 0m 6s (remain 0m 0s) Loss: 0.0336(0.0303) 
Epoch: [1][0/16] Data 5.060 (5.060) Elapsed 0m 5s (remain 1m 26s) Loss: 0.9477(0.9477) Grad: 29.8462  
Epoch: [1][15/16] Data 0.000 (0.876) Elapsed 0m 24s (remain 0m 0s) Loss: 0.2141(0.3020) Grad: 8.0208  
EVAL: [0/3] Data 4.875 (4.875) Elapsed 0m 5s (remain 0m 10s) Loss: 0.0983(0.0983) 


Epoch 1 - avg_train_loss: 0.3020  avg_val_loss: 0.1120  time: 32s
Epoch 1 - Score: 0.9919
Epoch 1 - Save Best Score: 0.9919 Model


EVAL: [2/3] Data 0.000 (1.963) Elapsed 0m 6s (remain 0m 0s) Loss: 0.0122(0.1120) 


Epoch 1 - Save Best Loss: 0.1120 Model


Epoch: [2][0/16] Data 6.014 (6.014) Elapsed 0m 6s (remain 1m 40s) Loss: 0.1942(0.1942) Grad: 8.4612  
Epoch: [2][15/16] Data 0.000 (0.877) Elapsed 0m 24s (remain 0m 0s) Loss: 0.0494(0.0921) Grad: 3.6446  
EVAL: [0/3] Data 5.070 (5.070) Elapsed 0m 5s (remain 0m 10s) Loss: 0.0225(0.0225) 


Epoch 2 - avg_train_loss: 0.0921  avg_val_loss: 0.1304  time: 32s
Epoch 2 - Score: 0.9971
Epoch 2 - Save Best Score: 0.9971 Model


EVAL: [2/3] Data 0.000 (2.006) Elapsed 0m 6s (remain 0m 0s) Loss: 0.0289(0.1304) 
Epoch: [3][0/16] Data 5.163 (5.163) Elapsed 0m 5s (remain 1m 27s) Loss: 0.0420(0.0420) Grad: 2.7265  
Epoch: [3][15/16] Data 0.000 (0.903) Elapsed 0m 25s (remain 0m 0s) Loss: 0.0365(0.0369) Grad: 2.8815  
EVAL: [0/3] Data 4.927 (4.927) Elapsed 0m 5s (remain 0m 10s) Loss: 0.0504(0.0504) 


Epoch 3 - avg_train_loss: 0.0369  avg_val_loss: 0.0777  time: 32s
Epoch 3 - Score: 0.9970
Epoch 3 - Save Best Loss: 0.0777 Model


EVAL: [2/3] Data 0.000 (1.938) Elapsed 0m 6s (remain 0m 0s) Loss: 0.0153(0.0777) 
Epoch: [4][0/16] Data 5.251 (5.251) Elapsed 0m 6s (remain 1m 30s) Loss: 0.0091(0.0091) Grad: 0.4857  
Epoch: [4][15/16] Data 0.000 (0.887) Elapsed 0m 24s (remain 0m 0s) Loss: 0.0090(0.0132) Grad: 1.1767  
EVAL: [0/3] Data 4.714 (4.714) Elapsed 0m 5s (remain 0m 10s) Loss: 0.0557(0.0557) 


Epoch 4 - avg_train_loss: 0.0132  avg_val_loss: 0.0809  time: 32s
Epoch 4 - Score: 0.9969


EVAL: [2/3] Data 0.000 (1.972) Elapsed 0m 6s (remain 0m 0s) Loss: 0.0125(0.0809) 
Epoch: [5][0/16] Data 4.779 (4.779) Elapsed 0m 5s (remain 1m 21s) Loss: 0.0042(0.0042) Grad: 0.2614  
Epoch: [5][15/16] Data 0.000 (0.857) Elapsed 0m 24s (remain 0m 0s) Loss: 0.0038(0.0067) Grad: 0.2810  
EVAL: [0/3] Data 4.119 (4.119) Elapsed 0m 4s (remain 0m 8s) Loss: 0.0838(0.0838) 


Epoch 5 - avg_train_loss: 0.0067  avg_val_loss: 0.0770  time: 31s
Epoch 5 - Score: 0.9960
Epoch 5 - Save Best Loss: 0.0770 Model


EVAL: [2/3] Data 0.000 (1.859) Elapsed 0m 6s (remain 0m 0s) Loss: 0.0065(0.0770) 
Epoch: [6][0/16] Data 4.668 (4.668) Elapsed 0m 5s (remain 1m 19s) Loss: 0.0017(0.0017) Grad: 0.7992  
Epoch: [6][15/16] Data 0.000 (0.863) Elapsed 0m 24s (remain 0m 0s) Loss: 0.0021(0.0036) Grad: 0.2868  
EVAL: [0/3] Data 3.979 (3.979) Elapsed 0m 4s (remain 0m 8s) Loss: 0.0841(0.0841) 


Epoch 6 - avg_train_loss: 0.0036  avg_val_loss: 0.0775  time: 31s
Epoch 6 - Score: 0.9961


EVAL: [2/3] Data 0.000 (1.657) Elapsed 0m 5s (remain 0m 0s) Loss: 0.0058(0.0775) 


Score: 0.9960
Score: 0.9953
