In [1]:
# ====================================================
# directory settings
# ====================================================

import os

OUTPUT_DIR = './'
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

TRAIN_PATH = '../input/pgpdataset2022mar07/dataset/train'
TEST_PATH = '../input/pgpdataset2022mar07/dataset/test'

In [2]:
# ====================================================
# CFG
# ====================================================

class CFG:
    print_freq=100
    num_workers = 4
    model_name = 'resnext50_32x4d'
    size = 512
    epochs = 15
    factor = 0.2
    patience = 5
    eps = 1e-6
    lr = 1e-4
    min_lr = 1e-6
    batch_size = 16
    weight_decay = 1e-6
    gradient_accumulation_steps = 1
    max_grad_norm = 1000
    seed = 42
    target_size = 7
    target_col = 'label'
    n_fold = 5
    trn_fold = [1,2,3,4,5]

In [3]:
# ====================================================
# libraries
# ====================================================

import sys
sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')
import os
import math
import time
import random
import shutil
from pathlib import Path
from contextlib import contextmanager
from collections import defaultdict, Counter
import scipy as sp
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.metrics import accuracy_score, f1_score
from tqdm.auto import tqdm
from functools import partial
import cv2
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam, SGD
import torchvision.models as models
from torch.nn.parameter import Parameter
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import ReduceLROnPlateau
from albumentations import (Compose, Normalize, Resize, RandomResizedCrop, HorizontalFlip, VerticalFlip, ShiftScaleRotate, Transpose)
from albumentations.pytorch import ToTensorV2
from albumentations import ImageOnlyTransform
import timm
import warnings 
warnings.filterwarnings('ignore')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
from matplotlib import pyplot as plt
import joblib

In [4]:
# ====================================================
# utils
# ====================================================

def get_score(y_true, y_pred):
    return accuracy_score(y_true, y_pred)

def get_f1_score(y_true, y_pred):
    return f1_score(y_true, y_pred, average='weighted')

@contextmanager
def timer(name):
    t0 = time.time()
    LOGGER.info(f'[{name}] start')
    yield
    LOGGER.info(f'[{name}] done in {time.time() - t0:.0f} s.')

def init_logger(log_file=OUTPUT_DIR+'train.log'):
    from logging import getLogger, INFO, FileHandler,  Formatter,  StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = init_logger()

def seed_torch(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_torch(seed=CFG.seed)

In [5]:
# ====================================================
# dataset
# ====================================================

class TrainDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        #self.file_names = df['image_ID'].values
        self.labels = df['label'].values
        self.transform = transform
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_name = self.df.loc[idx, 'image_ID']
        file_path = f'{TRAIN_PATH}/{file_name}'
        image = Image.open(file_path)
        image = np.array(image)
        if image.shape[-1]>3: image = image[:, :, :-1]
        if image.shape[0]==1:
            image = np.ones((512, 512, 3))
        #print(image.shape)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        label = torch.tensor(self.labels[idx]).long()
        return image, label

In [6]:
# ====================================================
# transformations
# ====================================================

def get_transforms(*, data):
    
    if data == 'train':
        return Compose([
            RandomResizedCrop(CFG.size, CFG.size),
            Transpose(p=0.5),
            HorizontalFlip(p=0.5),
            VerticalFlip(p=0.5),
            ShiftScaleRotate(p=0.5),
            Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2(),
        ])
    
    elif data == 'valid':
        return Compose([
           Resize(CFG.size, CFG.size),
           Normalize(
               mean=[0.485, 0.456, 0.406],
               std=[0.229, 0.224, 0.225],
           ),
           ToTensorV2(),
       ])

In [7]:
# ====================================================
# model initialization
# ====================================================

class CustomResNext(nn.Module):
    def __init__(self, model_name='resnext50_32x4d', pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)
        n_features = self.model.fc.in_features
        self.model.fc = nn.Linear(n_features, CFG.target_size)

    def forward(self, x):
        x = self.model(x)
        return x

In [8]:
# ====================================================
# helper functions
# ====================================================

class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))

In [9]:
def train_fn(train_loader, model, criterion, optimizer, epoch, scheduler, device):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()
    model.train()
    start = end = time.time()
    global_step = 0
    for step, (images, labels) in enumerate(train_loader):
        data_time.update(time.time() - end)
        if images.shape[0] > 1:            
            images = images.to(device)
            labels = labels.to(device)
            batch_size = labels.size(0)
            y_preds = model(images)
            loss = criterion(y_preds, labels)
            losses.update(loss.item(), batch_size)
            if CFG.gradient_accumulation_steps > 1:
                loss = loss / CFG.gradient_accumulation_steps
            else:
                loss.backward()
            grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)
            if (step + 1) % CFG.gradient_accumulation_steps == 0:
                optimizer.step()
                optimizer.zero_grad()
                global_step += 1
            batch_time.update(time.time() - end)
            end = time.time()
            if step % CFG.print_freq == 0 or step == (len(train_loader)-1):
                print('Epoch: [{0}][{1}/{2}] '
                      'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                      'Elapsed {remain:s} '
                      'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                      'Grad: {grad_norm:.4f}  '
                      .format(
                       epoch+1, step, len(train_loader), batch_time=batch_time,
                       data_time=data_time, loss=losses,
                       remain=timeSince(start, float(step+1)/len(train_loader)),
                       grad_norm=grad_norm,
                       ))
    return losses.avg

In [10]:
def valid_fn(valid_loader, model, criterion, device):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()
    model.eval()
    preds = []
    start = end = time.time()
    for step, (images, labels) in enumerate(valid_loader):
        data_time.update(time.time() - end)
        if images.shape[0]>1:
            images = images.to(device)
            labels = labels.to(device)
            batch_size = labels.size(0)
            with torch.no_grad():
                y_preds = model(images)
            loss = criterion(y_preds, labels)
            losses.update(loss.item(), batch_size)
            preds.append(y_preds.softmax(1).to('cpu').numpy())
            if CFG.gradient_accumulation_steps > 1:
                loss = loss / CFG.gradient_accumulation_steps
            batch_time.update(time.time() - end)
            end = time.time()
            if step % CFG.print_freq == 0 or step == (len(valid_loader)-1):
                print('EVAL: [{0}/{1}] '
                      'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                      'Elapsed {remain:s} '
                      'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                      .format(
                       step, len(valid_loader), batch_time=batch_time,
                       data_time=data_time, loss=losses,
                       remain=timeSince(start, float(step+1)/len(valid_loader)),
                       ))
    predictions = np.concatenate(preds)
    return losses.avg, predictions

In [11]:
# ====================================================
# train loop
# ====================================================

def train_loop(folds, fold):

    LOGGER.info(f"========== fold: {fold} training ==========")

    trn_idx = folds[folds['fold'] != fold].index
    val_idx = folds[folds['fold'] == fold].index

    train_folds = folds.loc[trn_idx].reset_index(drop=True)
    valid_folds = folds.loc[val_idx].reset_index(drop=True)

    train_dataset = TrainDataset(train_folds, transform=get_transforms(data='train'))
    valid_dataset = TrainDataset(valid_folds, transform=get_transforms(data='valid'))

    train_loader = DataLoader(train_dataset, batch_size=CFG.batch_size, 
                              shuffle=True, num_workers=CFG.num_workers, pin_memory=True, drop_last=True)
    valid_loader = DataLoader(valid_dataset, batch_size=CFG.batch_size, 
                              shuffle=False, num_workers=CFG.num_workers, pin_memory=True, drop_last=False)
    
    model = CustomResNext(CFG.model_name, pretrained=True)
    model.to(device)

    optimizer = Adam(model.parameters(), lr=CFG.lr, weight_decay=CFG.weight_decay, amsgrad=False)
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=CFG.factor, patience=CFG.patience, verbose=True, eps=CFG.eps)
  
    criterion = nn.CrossEntropyLoss()

    best_score = 0.
    best_loss = np.inf
    
    for epoch in range(CFG.epochs):
        start_time = time.time()
        avg_loss = train_fn(train_loader, model, criterion, optimizer, epoch, scheduler, device)
        avg_val_loss, preds = valid_fn(valid_loader, model, criterion, device)
        valid_labels = valid_folds[CFG.target_col].values
        scheduler.step(avg_val_loss)
        score = get_score(valid_labels, preds.argmax(1))
        score_f1 = get_f1_score(valid_labels, preds.argmax(1))
        elapsed = time.time() - start_time
        LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
        LOGGER.info(f'Epoch {epoch+1} - Accuracy: {score}  F1-Score: {score_f1}')
        if score > best_score:
            best_score = score
            LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
            torch.save({'model': model.state_dict(), 'preds': preds}, OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best.pth')
    
    check_point = torch.load(OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best.pth')
    valid_folds[[str(c) for c in range(7)]] = check_point['preds']
    valid_folds['preds'] = check_point['preds'].argmax(1)

    return valid_folds

In [12]:
# ====================================================
# main function
# ====================================================

def main():

    def get_result(result_df):
        preds = result_df['preds'].values
        labels = result_df[CFG.target_col].values
        score = get_score(labels, preds)
        LOGGER.info(f'Score: {score:<.5f}')
    
    oof_df = pd.DataFrame()
    for fold in range(CFG.n_fold):
        #if fold in CFG.trn_fold:
        _oof_df = train_loop(folds, fold)
        oof_df = pd.concat([oof_df, _oof_df])
        LOGGER.info(f"========== fold: {fold} result ==========")
        get_result(_oof_df)
    LOGGER.info(f"========== CV ==========")
    get_result(oof_df)
    oof_df.to_csv(OUTPUT_DIR+'oof_df.csv', index=False)

In [13]:
train = pd.read_csv('../input/pgpfolddataset/train.csv')
labels = {'label': {'Cricket': 0, 'Wrestling': 1, 'Tennis': 2, 'Badminton': 3, 'Soccer': 4, 'Swimming': 5, 'Karate': 6}}
train = train.replace(labels)
folds = train

In [14]:
train['label'].value_counts()

0    1556
1    1471
2    1445
3    1394
4    1188
5     595
6     578
Name: label, dtype: int64

In [15]:
main()

Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/resnext50_32x4d_ra-d733960d.pth" to /root/.cache/torch/hub/checkpoints/resnext50_32x4d_ra-d733960d.pth


Epoch: [1][0/411] Data 1.451 (1.451) Elapsed 0m 9s (remain 64m 10s) Loss: 2.0074(2.0074) Grad: 9.8342  
Epoch: [1][100/411] Data 0.000 (0.015) Elapsed 1m 26s (remain 4m 25s) Loss: 0.7176(1.2674) Grad: 7.7979  
Epoch: [1][200/411] Data 0.000 (0.008) Elapsed 2m 43s (remain 2m 51s) Loss: 1.2781(1.0593) Grad: 18.9191  
Epoch: [1][300/411] Data 0.000 (0.005) Elapsed 4m 0s (remain 1m 28s) Loss: 1.0748(0.9489) Grad: 13.4000  
Epoch: [1][400/411] Data 0.000 (0.004) Elapsed 5m 17s (remain 0m 7s) Loss: 0.6999(0.8775) Grad: 9.1394  
Epoch: [1][410/411] Data 0.000 (0.004) Elapsed 5m 25s (remain 0m 0s) Loss: 0.9273(0.8738) Grad: 11.9485  
EVAL: [0/103] Data 1.238 (1.238) Elapsed 0m 1s (remain 2m 27s) Loss: 0.0751(0.0751) 
EVAL: [100/103] Data 0.000 (0.075) Elapsed 0m 29s (remain 0m 0s) Loss: 0.5155(0.2771) 


Epoch 1 - avg_train_loss: 0.8738  avg_val_loss: 0.2747  time: 356s
Epoch 1 - Accuracy: 0.9113001215066828  F1-Score: 0.9113984041593446
Epoch 1 - Save Best Score: 0.9113 Model


EVAL: [102/103] Data 0.000 (0.074) Elapsed 0m 29s (remain 0m 0s) Loss: 0.1176(0.2747) 
Epoch: [2][0/411] Data 1.211 (1.211) Elapsed 0m 2s (remain 14m 6s) Loss: 0.3580(0.3580) Grad: 5.0363  
Epoch: [2][100/411] Data 0.000 (0.012) Elapsed 1m 19s (remain 4m 3s) Loss: 1.0349(0.5689) Grad: 11.7815  
Epoch: [2][200/411] Data 0.000 (0.006) Elapsed 2m 36s (remain 2m 43s) Loss: 0.4249(0.5618) Grad: 5.8189  
Epoch: [2][300/411] Data 0.000 (0.004) Elapsed 3m 53s (remain 1m 25s) Loss: 0.4739(0.5328) Grad: 6.8897  
Epoch: [2][400/411] Data 0.000 (0.003) Elapsed 5m 10s (remain 0m 7s) Loss: 0.8790(0.5385) Grad: 13.0071  
Epoch: [2][410/411] Data 0.000 (0.003) Elapsed 5m 18s (remain 0m 0s) Loss: 0.3455(0.5354) Grad: 5.4465  
EVAL: [0/103] Data 1.179 (1.179) Elapsed 0m 1s (remain 2m 21s) Loss: 0.0751(0.0751) 
EVAL: [100/103] Data 0.000 (0.057) Elapsed 0m 27s (remain 0m 0s) Loss: 0.6636(0.2189) 


Epoch 2 - avg_train_loss: 0.5354  avg_val_loss: 0.2168  time: 347s
Epoch 2 - Accuracy: 0.9319562575941677  F1-Score: 0.9324189713362732
Epoch 2 - Save Best Score: 0.9320 Model


EVAL: [102/103] Data 0.000 (0.056) Elapsed 0m 28s (remain 0m 0s) Loss: 0.1238(0.2168) 
Epoch: [3][0/411] Data 2.093 (2.093) Elapsed 0m 2s (remain 19m 46s) Loss: 0.1910(0.1910) Grad: 4.3102  
Epoch: [3][100/411] Data 0.000 (0.021) Elapsed 1m 20s (remain 4m 5s) Loss: 0.3403(0.4725) Grad: 5.9085  
Epoch: [3][200/411] Data 0.000 (0.011) Elapsed 2m 37s (remain 2m 44s) Loss: 0.4032(0.4788) Grad: 9.7589  
Epoch: [3][300/411] Data 0.000 (0.007) Elapsed 3m 54s (remain 1m 25s) Loss: 0.2998(0.4743) Grad: 6.6826  
Epoch: [3][400/411] Data 0.000 (0.006) Elapsed 5m 11s (remain 0m 7s) Loss: 0.8738(0.4775) Grad: 12.4781  
Epoch: [3][410/411] Data 0.000 (0.005) Elapsed 5m 18s (remain 0m 0s) Loss: 0.3760(0.4781) Grad: 7.0764  
EVAL: [0/103] Data 1.195 (1.195) Elapsed 0m 1s (remain 2m 25s) Loss: 0.0289(0.0289) 
EVAL: [100/103] Data 0.000 (0.062) Elapsed 0m 28s (remain 0m 0s) Loss: 0.6747(0.2067) 


Epoch 3 - avg_train_loss: 0.4781  avg_val_loss: 0.2041  time: 348s
Epoch 3 - Accuracy: 0.9295261239368166  F1-Score: 0.9295406025225402


EVAL: [102/103] Data 0.000 (0.061) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0470(0.2041) 
Epoch: [4][0/411] Data 1.059 (1.059) Elapsed 0m 1s (remain 13m 14s) Loss: 0.2596(0.2596) Grad: 8.7645  
Epoch: [4][100/411] Data 0.000 (0.011) Elapsed 1m 19s (remain 4m 2s) Loss: 0.0808(0.4172) Grad: 2.5348  
Epoch: [4][200/411] Data 0.000 (0.006) Elapsed 2m 36s (remain 2m 43s) Loss: 0.5021(0.4184) Grad: 10.2075  
Epoch: [4][300/411] Data 0.000 (0.004) Elapsed 3m 53s (remain 1m 25s) Loss: 0.1290(0.4140) Grad: 4.0291  
Epoch: [4][400/411] Data 0.000 (0.003) Elapsed 5m 10s (remain 0m 7s) Loss: 0.2977(0.4173) Grad: 4.9521  
Epoch: [4][410/411] Data 0.000 (0.003) Elapsed 5m 17s (remain 0m 0s) Loss: 0.5061(0.4193) Grad: 9.5174  
EVAL: [0/103] Data 1.192 (1.192) Elapsed 0m 1s (remain 2m 22s) Loss: 0.0300(0.0300) 
EVAL: [100/103] Data 0.000 (0.050) Elapsed 0m 26s (remain 0m 0s) Loss: 0.2446(0.1271) 


Epoch 4 - avg_train_loss: 0.4193  avg_val_loss: 0.1251  time: 345s
Epoch 4 - Accuracy: 0.9556500607533415  F1-Score: 0.9555463328384083
Epoch 4 - Save Best Score: 0.9557 Model


EVAL: [102/103] Data 0.000 (0.049) Elapsed 0m 27s (remain 0m 0s) Loss: 0.0236(0.1251) 
Epoch: [5][0/411] Data 2.494 (2.494) Elapsed 0m 3s (remain 23m 7s) Loss: 0.3492(0.3492) Grad: 5.3440  
Epoch: [5][100/411] Data 0.000 (0.025) Elapsed 1m 20s (remain 4m 7s) Loss: 0.5270(0.3748) Grad: 7.8552  
Epoch: [5][200/411] Data 0.000 (0.013) Elapsed 2m 37s (remain 2m 44s) Loss: 0.4746(0.3644) Grad: 6.9216  
Epoch: [5][300/411] Data 0.000 (0.009) Elapsed 3m 54s (remain 1m 25s) Loss: 0.5536(0.3686) Grad: 7.8690  
Epoch: [5][400/411] Data 0.000 (0.006) Elapsed 5m 11s (remain 0m 7s) Loss: 0.4705(0.3709) Grad: 9.3396  
Epoch: [5][410/411] Data 0.000 (0.006) Elapsed 5m 19s (remain 0m 0s) Loss: 0.7619(0.3713) Grad: 11.3616  
EVAL: [0/103] Data 1.649 (1.649) Elapsed 0m 1s (remain 3m 9s) Loss: 0.0051(0.0051) 
EVAL: [100/103] Data 0.000 (0.060) Elapsed 0m 27s (remain 0m 0s) Loss: 0.4791(0.1559) 


Epoch 5 - avg_train_loss: 0.3713  avg_val_loss: 0.1538  time: 347s
Epoch 5 - Accuracy: 0.9507897934386391  F1-Score: 0.9510649600683437


EVAL: [102/103] Data 0.000 (0.059) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0651(0.1538) 
Epoch: [6][0/411] Data 1.221 (1.221) Elapsed 0m 2s (remain 14m 1s) Loss: 0.2397(0.2397) Grad: 5.4379  
Epoch: [6][100/411] Data 0.000 (0.013) Elapsed 1m 19s (remain 4m 2s) Loss: 0.2925(0.3407) Grad: 5.8084  
Epoch: [6][200/411] Data 0.000 (0.006) Elapsed 2m 36s (remain 2m 43s) Loss: 0.0865(0.3496) Grad: 3.1547  
Epoch: [6][300/411] Data 0.001 (0.004) Elapsed 3m 53s (remain 1m 25s) Loss: 0.3788(0.3689) Grad: 7.6604  
Epoch: [6][400/411] Data 0.000 (0.003) Elapsed 5m 10s (remain 0m 7s) Loss: 0.4421(0.3672) Grad: 8.4833  
Epoch: [6][410/411] Data 0.000 (0.003) Elapsed 5m 17s (remain 0m 0s) Loss: 0.4209(0.3655) Grad: 7.3737  
EVAL: [0/103] Data 1.333 (1.333) Elapsed 0m 1s (remain 2m 37s) Loss: 0.0247(0.0247) 
EVAL: [100/103] Data 0.000 (0.058) Elapsed 0m 27s (remain 0m 0s) Loss: 0.3629(0.1551) 


Epoch 6 - avg_train_loss: 0.3655  avg_val_loss: 0.1527  time: 346s
Epoch 6 - Accuracy: 0.9507897934386391  F1-Score: 0.9509213212209114


EVAL: [102/103] Data 0.000 (0.057) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0322(0.1527) 
Epoch: [7][0/411] Data 1.122 (1.122) Elapsed 0m 2s (remain 13m 42s) Loss: 0.2019(0.2019) Grad: 6.7604  
Epoch: [7][100/411] Data 0.000 (0.011) Elapsed 1m 18s (remain 4m 2s) Loss: 0.6511(0.3313) Grad: 10.2405  
Epoch: [7][200/411] Data 0.000 (0.006) Elapsed 2m 36s (remain 2m 43s) Loss: 0.2602(0.3248) Grad: 5.9038  
Epoch: [7][300/411] Data 0.000 (0.004) Elapsed 3m 53s (remain 1m 25s) Loss: 0.8085(0.3207) Grad: 14.1581  
Epoch: [7][400/411] Data 0.000 (0.003) Elapsed 5m 10s (remain 0m 7s) Loss: 0.2096(0.3200) Grad: 7.4217  
Epoch: [7][410/411] Data 0.000 (0.003) Elapsed 5m 17s (remain 0m 0s) Loss: 0.7303(0.3193) Grad: 11.9520  
EVAL: [0/103] Data 1.335 (1.335) Elapsed 0m 1s (remain 2m 37s) Loss: 0.0026(0.0026) 
EVAL: [100/103] Data 0.000 (0.061) Elapsed 0m 27s (remain 0m 0s) Loss: 0.6509(0.1512) 


Epoch 7 - avg_train_loss: 0.3193  avg_val_loss: 0.1485  time: 346s
Epoch 7 - Accuracy: 0.9520048602673147  F1-Score: 0.9520757200557874


EVAL: [102/103] Data 0.000 (0.060) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0054(0.1485) 
Epoch: [8][0/411] Data 1.653 (1.653) Elapsed 0m 2s (remain 16m 36s) Loss: 0.1980(0.1980) Grad: 5.0431  
Epoch: [8][100/411] Data 0.000 (0.017) Elapsed 1m 19s (remain 4m 3s) Loss: 0.6560(0.3181) Grad: 9.1819  
Epoch: [8][200/411] Data 0.000 (0.008) Elapsed 2m 36s (remain 2m 43s) Loss: 0.1595(0.2998) Grad: 4.3202  
Epoch: [8][300/411] Data 0.000 (0.006) Elapsed 3m 53s (remain 1m 25s) Loss: 0.7569(0.2965) Grad: 12.0900  
Epoch: [8][400/411] Data 0.000 (0.004) Elapsed 5m 10s (remain 0m 7s) Loss: 0.1296(0.3055) Grad: 4.3721  
Epoch: [8][410/411] Data 0.000 (0.004) Elapsed 5m 18s (remain 0m 0s) Loss: 0.3796(0.3060) Grad: 5.9564  
EVAL: [0/103] Data 1.187 (1.187) Elapsed 0m 1s (remain 2m 22s) Loss: 0.0782(0.0782) 
EVAL: [100/103] Data 0.000 (0.057) Elapsed 0m 27s (remain 0m 0s) Loss: 0.4557(0.1217) 


Epoch 8 - avg_train_loss: 0.3060  avg_val_loss: 0.1201  time: 347s
Epoch 8 - Accuracy: 0.959902794653706  F1-Score: 0.9598961805620364
Epoch 8 - Save Best Score: 0.9599 Model


EVAL: [102/103] Data 0.000 (0.056) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0551(0.1201) 
Epoch: [9][0/411] Data 1.918 (1.918) Elapsed 0m 2s (remain 18m 27s) Loss: 0.2192(0.2192) Grad: 3.0730  
Epoch: [9][100/411] Data 0.000 (0.019) Elapsed 1m 19s (remain 4m 4s) Loss: 0.1652(0.2992) Grad: 4.9984  
Epoch: [9][200/411] Data 0.000 (0.010) Elapsed 2m 36s (remain 2m 43s) Loss: 0.1475(0.2898) Grad: 4.7749  
Epoch: [9][300/411] Data 0.000 (0.007) Elapsed 3m 53s (remain 1m 25s) Loss: 0.1744(0.2976) Grad: 5.6458  
Epoch: [9][400/411] Data 0.000 (0.005) Elapsed 5m 10s (remain 0m 7s) Loss: 0.5337(0.2919) Grad: 9.2600  
Epoch: [9][410/411] Data 0.000 (0.005) Elapsed 5m 18s (remain 0m 0s) Loss: 0.1951(0.2922) Grad: 3.7512  
EVAL: [0/103] Data 1.189 (1.189) Elapsed 0m 1s (remain 2m 25s) Loss: 0.0021(0.0021) 
EVAL: [100/103] Data 0.000 (0.055) Elapsed 0m 27s (remain 0m 0s) Loss: 0.4286(0.1487) 


Epoch 9 - avg_train_loss: 0.2922  avg_val_loss: 0.1462  time: 347s
Epoch 9 - Accuracy: 0.9489671931956257  F1-Score: 0.9491148660399723


EVAL: [102/103] Data 0.000 (0.054) Elapsed 0m 27s (remain 0m 0s) Loss: 0.0173(0.1462) 
Epoch: [10][0/411] Data 1.723 (1.723) Elapsed 0m 2s (remain 17m 55s) Loss: 0.1772(0.1772) Grad: 4.6707  
Epoch: [10][100/411] Data 0.000 (0.017) Elapsed 1m 19s (remain 4m 4s) Loss: 0.2452(0.2669) Grad: 6.1767  
Epoch: [10][200/411] Data 0.000 (0.009) Elapsed 2m 36s (remain 2m 43s) Loss: 0.9507(0.2754) Grad: 10.9765  
Epoch: [10][300/411] Data 0.000 (0.006) Elapsed 3m 53s (remain 1m 25s) Loss: 0.0619(0.2720) Grad: 3.1958  
Epoch: [10][400/411] Data 0.000 (0.005) Elapsed 5m 10s (remain 0m 7s) Loss: 0.3546(0.2829) Grad: 6.2777  
Epoch: [10][410/411] Data 0.000 (0.004) Elapsed 5m 18s (remain 0m 0s) Loss: 0.0580(0.2832) Grad: 1.9447  
EVAL: [0/103] Data 1.116 (1.116) Elapsed 0m 1s (remain 2m 15s) Loss: 0.0033(0.0033) 
EVAL: [100/103] Data 0.000 (0.057) Elapsed 0m 27s (remain 0m 0s) Loss: 0.5207(0.1545) 


Epoch 10 - avg_train_loss: 0.2832  avg_val_loss: 0.1521  time: 347s
Epoch 10 - Accuracy: 0.9526123936816525  F1-Score: 0.9525997662099421


EVAL: [102/103] Data 0.000 (0.056) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0311(0.1521) 
Epoch: [11][0/411] Data 1.326 (1.326) Elapsed 0m 2s (remain 15m 2s) Loss: 0.1511(0.1511) Grad: 3.7360  
Epoch: [11][100/411] Data 0.000 (0.013) Elapsed 1m 19s (remain 4m 3s) Loss: 0.3062(0.2877) Grad: 6.6593  
Epoch: [11][200/411] Data 0.000 (0.007) Elapsed 2m 36s (remain 2m 43s) Loss: 0.3447(0.2781) Grad: 5.6912  
Epoch: [11][300/411] Data 0.000 (0.005) Elapsed 3m 53s (remain 1m 25s) Loss: 0.2716(0.2709) Grad: 5.4237  
Epoch: [11][400/411] Data 0.000 (0.004) Elapsed 5m 10s (remain 0m 7s) Loss: 0.3002(0.2721) Grad: 6.1041  
Epoch: [11][410/411] Data 0.000 (0.003) Elapsed 5m 17s (remain 0m 0s) Loss: 0.1199(0.2721) Grad: 3.3862  
EVAL: [0/103] Data 1.436 (1.436) Elapsed 0m 1s (remain 2m 47s) Loss: 0.0174(0.0174) 
EVAL: [100/103] Data 0.000 (0.059) Elapsed 0m 27s (remain 0m 0s) Loss: 0.5477(0.1336) 


Epoch 11 - avg_train_loss: 0.2721  avg_val_loss: 0.1319  time: 346s
Epoch 11 - Accuracy: 0.959902794653706  F1-Score: 0.9600082439190051


EVAL: [102/103] Data 0.000 (0.058) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0696(0.1319) 
Epoch: [12][0/411] Data 1.885 (1.885) Elapsed 0m 2s (remain 18m 14s) Loss: 0.2240(0.2240) Grad: 5.7485  
Epoch: [12][100/411] Data 0.000 (0.019) Elapsed 1m 19s (remain 4m 4s) Loss: 0.0652(0.2858) Grad: 2.1648  
Epoch: [12][200/411] Data 0.000 (0.010) Elapsed 2m 36s (remain 2m 43s) Loss: 0.3388(0.2832) Grad: 4.1633  
Epoch: [12][300/411] Data 0.000 (0.006) Elapsed 3m 53s (remain 1m 25s) Loss: 0.2909(0.2777) Grad: 5.5700  
Epoch: [12][400/411] Data 0.000 (0.005) Elapsed 5m 10s (remain 0m 7s) Loss: 0.1508(0.2782) Grad: 4.3638  
Epoch: [12][410/411] Data 0.000 (0.005) Elapsed 5m 18s (remain 0m 0s) Loss: 0.2169(0.2781) Grad: 4.6331  
EVAL: [0/103] Data 1.297 (1.297) Elapsed 0m 1s (remain 2m 33s) Loss: 0.0115(0.0115) 
EVAL: [100/103] Data 0.000 (0.053) Elapsed 0m 27s (remain 0m 0s) Loss: 0.6326(0.2004) 


Epoch 12 - avg_train_loss: 0.2781  avg_val_loss: 0.1973  time: 346s
Epoch 12 - Accuracy: 0.9422843256379101  F1-Score: 0.942309363385773


EVAL: [102/103] Data 0.000 (0.052) Elapsed 0m 27s (remain 0m 0s) Loss: 0.0049(0.1973) 
Epoch: [13][0/411] Data 1.338 (1.338) Elapsed 0m 2s (remain 15m 0s) Loss: 0.0674(0.0674) Grad: 2.4342  
Epoch: [13][100/411] Data 0.000 (0.014) Elapsed 1m 19s (remain 4m 3s) Loss: 0.2371(0.2302) Grad: 7.6792  
Epoch: [13][200/411] Data 0.000 (0.007) Elapsed 2m 36s (remain 2m 43s) Loss: 0.6678(0.2406) Grad: 8.2506  
Epoch: [13][300/411] Data 0.000 (0.005) Elapsed 3m 53s (remain 1m 25s) Loss: 0.1609(0.2408) Grad: 5.0153  
Epoch: [13][400/411] Data 0.000 (0.004) Elapsed 5m 10s (remain 0m 7s) Loss: 0.0407(0.2533) Grad: 1.4719  
Epoch: [13][410/411] Data 0.000 (0.004) Elapsed 5m 17s (remain 0m 0s) Loss: 0.3475(0.2532) Grad: 5.5837  
EVAL: [0/103] Data 1.204 (1.204) Elapsed 0m 1s (remain 2m 24s) Loss: 0.0140(0.0140) 
EVAL: [100/103] Data 0.000 (0.056) Elapsed 0m 27s (remain 0m 0s) Loss: 0.4345(0.1528) 


Epoch 13 - avg_train_loss: 0.2532  avg_val_loss: 0.1502  time: 346s
Epoch 13 - Accuracy: 0.9538274605103281  F1-Score: 0.9538139300407872


EVAL: [102/103] Data 0.000 (0.055) Elapsed 0m 27s (remain 0m 0s) Loss: 0.0078(0.1502) 
Epoch: [14][0/411] Data 1.024 (1.024) Elapsed 0m 1s (remain 12m 41s) Loss: 0.1502(0.1502) Grad: 6.7396  
Epoch: [14][100/411] Data 0.000 (0.010) Elapsed 1m 18s (remain 4m 2s) Loss: 0.2260(0.2250) Grad: 3.9150  
Epoch: [14][200/411] Data 0.000 (0.005) Elapsed 2m 35s (remain 2m 42s) Loss: 0.5004(0.2358) Grad: 10.4683  
Epoch: [14][300/411] Data 0.000 (0.004) Elapsed 3m 52s (remain 1m 25s) Loss: 0.3265(0.2296) Grad: 8.1470  
Epoch: [14][400/411] Data 0.000 (0.003) Elapsed 5m 9s (remain 0m 7s) Loss: 0.2655(0.2382) Grad: 6.4131  
Epoch: [14][410/411] Data 0.000 (0.003) Elapsed 5m 17s (remain 0m 0s) Loss: 0.1850(0.2397) Grad: 5.6277  
EVAL: [0/103] Data 1.371 (1.371) Elapsed 0m 1s (remain 2m 42s) Loss: 0.0029(0.0029) 
EVAL: [100/103] Data 0.000 (0.052) Elapsed 0m 27s (remain 0m 0s) Loss: 0.2972(0.1698) 


Epoch 14 - avg_train_loss: 0.2397  avg_val_loss: 0.1673  time: 345s
Epoch 14 - Accuracy: 0.9526123936816525  F1-Score: 0.9528434273869895


EVAL: [102/103] Data 0.000 (0.051) Elapsed 0m 27s (remain 0m 0s) Loss: 0.0489(0.1673) 
Epoch    14: reducing learning rate of group 0 to 2.0000e-05.
Epoch: [15][0/411] Data 1.301 (1.301) Elapsed 0m 2s (remain 14m 34s) Loss: 0.1110(0.1110) Grad: 4.3612  
Epoch: [15][100/411] Data 0.000 (0.013) Elapsed 1m 19s (remain 4m 3s) Loss: 0.3241(0.1813) Grad: 6.2670  
Epoch: [15][200/411] Data 0.001 (0.007) Elapsed 2m 36s (remain 2m 43s) Loss: 0.1433(0.1834) Grad: 3.7332  
Epoch: [15][300/411] Data 0.001 (0.005) Elapsed 3m 53s (remain 1m 25s) Loss: 0.5479(0.1950) Grad: 8.5353  
Epoch: [15][400/411] Data 0.001 (0.004) Elapsed 5m 10s (remain 0m 7s) Loss: 0.0671(0.1877) Grad: 2.0478  
Epoch: [15][410/411] Data 0.000 (0.003) Elapsed 5m 17s (remain 0m 0s) Loss: 0.0477(0.1871) Grad: 1.5583  
EVAL: [0/103] Data 1.231 (1.231) Elapsed 0m 1s (remain 2m 27s) Loss: 0.0009(0.0009) 
EVAL: [100/103] Data 0.000 (0.067) Elapsed 0m 28s (remain 0m 0s) Loss: 0.1730(0.1290) 


Epoch 15 - avg_train_loss: 0.1871  avg_val_loss: 0.1268  time: 347s
Epoch 15 - Accuracy: 0.9690157958687727  F1-Score: 0.9690552541904861
Epoch 15 - Save Best Score: 0.9690 Model


EVAL: [102/103] Data 0.000 (0.066) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0080(0.1268) 


Score: 0.96902


Epoch: [1][0/411] Data 0.959 (0.959) Elapsed 0m 1s (remain 12m 28s) Loss: 2.0405(2.0405) Grad: 10.1632  
Epoch: [1][100/411] Data 0.000 (0.010) Elapsed 1m 18s (remain 4m 2s) Loss: 1.1878(1.2576) Grad: 11.4503  
Epoch: [1][200/411] Data 0.000 (0.005) Elapsed 2m 35s (remain 2m 42s) Loss: 0.9760(1.0578) Grad: 14.7010  
Epoch: [1][300/411] Data 0.000 (0.004) Elapsed 3m 52s (remain 1m 25s) Loss: 0.4367(0.9332) Grad: 8.0028  
Epoch: [1][400/411] Data 0.000 (0.003) Elapsed 5m 9s (remain 0m 7s) Loss: 0.3474(0.8703) Grad: 7.1471  
Epoch: [1][410/411] Data 0.000 (0.003) Elapsed 5m 17s (remain 0m 0s) Loss: 0.6524(0.8640) Grad: 13.4865  
EVAL: [0/103] Data 0.898 (0.898) Elapsed 0m 1s (remain 1m 54s) Loss: 0.4062(0.4062) 
EVAL: [100/103] Data 0.000 (0.058) Elapsed 0m 27s (remain 0m 0s) Loss: 0.1893(0.2919) 


Epoch 1 - avg_train_loss: 0.8640  avg_val_loss: 0.2923  time: 346s
Epoch 1 - Accuracy: 0.9070473876063183  F1-Score: 0.9067106971759162
Epoch 1 - Save Best Score: 0.9070 Model


EVAL: [102/103] Data 0.000 (0.057) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0848(0.2923) 
Epoch: [2][0/411] Data 1.724 (1.724) Elapsed 0m 2s (remain 17m 37s) Loss: 0.6806(0.6806) Grad: 11.7403  
Epoch: [2][100/411] Data 0.000 (0.017) Elapsed 1m 19s (remain 4m 4s) Loss: 0.4086(0.5713) Grad: 8.7687  
Epoch: [2][200/411] Data 0.000 (0.009) Elapsed 2m 36s (remain 2m 43s) Loss: 0.6194(0.5636) Grad: 9.2441  
Epoch: [2][300/411] Data 0.000 (0.006) Elapsed 3m 53s (remain 1m 25s) Loss: 0.6690(0.5673) Grad: 9.6191  
Epoch: [2][400/411] Data 0.000 (0.005) Elapsed 5m 10s (remain 0m 7s) Loss: 0.3766(0.5510) Grad: 8.8770  
Epoch: [2][410/411] Data 0.000 (0.004) Elapsed 5m 17s (remain 0m 0s) Loss: 0.4481(0.5518) Grad: 6.4686  
EVAL: [0/103] Data 0.852 (0.852) Elapsed 0m 1s (remain 1m 48s) Loss: 0.2876(0.2876) 
EVAL: [100/103] Data 0.158 (0.064) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0756(0.1917) 


Epoch 2 - avg_train_loss: 0.5518  avg_val_loss: 0.1918  time: 347s
Epoch 2 - Accuracy: 0.9356014580801945  F1-Score: 0.9354694639503333
Epoch 2 - Save Best Score: 0.9356 Model


EVAL: [102/103] Data 0.000 (0.063) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0747(0.1918) 
Epoch: [3][0/411] Data 1.301 (1.301) Elapsed 0m 2s (remain 14m 34s) Loss: 0.2993(0.2993) Grad: 6.1642  
Epoch: [3][100/411] Data 0.000 (0.013) Elapsed 1m 19s (remain 4m 2s) Loss: 0.4213(0.4566) Grad: 7.6028  
Epoch: [3][200/411] Data 0.000 (0.007) Elapsed 2m 36s (remain 2m 42s) Loss: 0.4154(0.4674) Grad: 8.0035  
Epoch: [3][300/411] Data 0.000 (0.005) Elapsed 3m 53s (remain 1m 25s) Loss: 0.8070(0.4642) Grad: 11.1345  
Epoch: [3][400/411] Data 0.000 (0.003) Elapsed 5m 9s (remain 0m 7s) Loss: 0.3002(0.4563) Grad: 6.4020  
Epoch: [3][410/411] Data 0.000 (0.003) Elapsed 5m 17s (remain 0m 0s) Loss: 0.2222(0.4542) Grad: 7.9345  
EVAL: [0/103] Data 0.875 (0.875) Elapsed 0m 1s (remain 1m 52s) Loss: 0.2449(0.2449) 
EVAL: [100/103] Data 0.145 (0.061) Elapsed 0m 27s (remain 0m 0s) Loss: 0.1813(0.2166) 


Epoch 3 - avg_train_loss: 0.4542  avg_val_loss: 0.2135  time: 346s
Epoch 3 - Accuracy: 0.9307411907654921  F1-Score: 0.9305951973966835


EVAL: [102/103] Data 0.000 (0.059) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0244(0.2135) 
Epoch: [4][0/411] Data 1.482 (1.482) Elapsed 0m 2s (remain 15m 58s) Loss: 0.4890(0.4890) Grad: 8.2689  
Epoch: [4][100/411] Data 0.000 (0.015) Elapsed 1m 19s (remain 4m 3s) Loss: 0.6067(0.4109) Grad: 9.8792  
Epoch: [4][200/411] Data 0.000 (0.008) Elapsed 2m 36s (remain 2m 43s) Loss: 0.1698(0.4209) Grad: 4.9459  
Epoch: [4][300/411] Data 0.000 (0.005) Elapsed 3m 53s (remain 1m 25s) Loss: 0.3544(0.4208) Grad: 4.8927  
Epoch: [4][400/411] Data 0.000 (0.004) Elapsed 5m 9s (remain 0m 7s) Loss: 0.5174(0.4250) Grad: 7.9700  
Epoch: [4][410/411] Data 0.000 (0.004) Elapsed 5m 17s (remain 0m 0s) Loss: 0.3151(0.4223) Grad: 5.5150  
EVAL: [0/103] Data 0.839 (0.839) Elapsed 0m 1s (remain 1m 47s) Loss: 0.3486(0.3486) 
EVAL: [100/103] Data 0.078 (0.059) Elapsed 0m 27s (remain 0m 0s) Loss: 0.1699(0.1950) 


Epoch 4 - avg_train_loss: 0.4223  avg_val_loss: 0.1937  time: 346s
Epoch 4 - Accuracy: 0.93681652490887  F1-Score: 0.9370182381893611
Epoch 4 - Save Best Score: 0.9368 Model


EVAL: [102/103] Data 0.000 (0.058) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0400(0.1937) 
Epoch: [5][0/411] Data 1.287 (1.287) Elapsed 0m 2s (remain 14m 33s) Loss: 0.5968(0.5968) Grad: 11.4370  
Epoch: [5][100/411] Data 0.000 (0.013) Elapsed 1m 19s (remain 4m 2s) Loss: 0.1696(0.3569) Grad: 3.6825  
Epoch: [5][200/411] Data 0.000 (0.007) Elapsed 2m 35s (remain 2m 42s) Loss: 0.4275(0.3673) Grad: 6.5002  
Epoch: [5][300/411] Data 0.000 (0.005) Elapsed 3m 52s (remain 1m 25s) Loss: 0.2673(0.3670) Grad: 6.9343  
Epoch: [5][400/411] Data 0.000 (0.003) Elapsed 5m 9s (remain 0m 7s) Loss: 0.2425(0.3659) Grad: 6.7263  
Epoch: [5][410/411] Data 0.000 (0.003) Elapsed 5m 17s (remain 0m 0s) Loss: 0.4273(0.3675) Grad: 8.0698  
EVAL: [0/103] Data 1.015 (1.015) Elapsed 0m 1s (remain 2m 8s) Loss: 0.2120(0.2120) 
EVAL: [100/103] Data 0.000 (0.057) Elapsed 0m 27s (remain 0m 0s) Loss: 0.1107(0.1315) 


Epoch 5 - avg_train_loss: 0.3675  avg_val_loss: 0.1316  time: 346s
Epoch 5 - Accuracy: 0.9556500607533415  F1-Score: 0.9557511862066109
Epoch 5 - Save Best Score: 0.9557 Model


EVAL: [102/103] Data 0.000 (0.056) Elapsed 0m 27s (remain 0m 0s) Loss: 0.0133(0.1316) 
Epoch: [6][0/411] Data 1.455 (1.455) Elapsed 0m 2s (remain 15m 54s) Loss: 0.3553(0.3553) Grad: 8.0355  
Epoch: [6][100/411] Data 0.000 (0.015) Elapsed 1m 19s (remain 4m 3s) Loss: 0.2217(0.3068) Grad: 8.3252  
Epoch: [6][200/411] Data 0.000 (0.008) Elapsed 2m 36s (remain 2m 43s) Loss: 0.2377(0.3216) Grad: 12.1776  
Epoch: [6][300/411] Data 0.000 (0.005) Elapsed 3m 53s (remain 1m 25s) Loss: 0.4484(0.3271) Grad: 7.8078  
Epoch: [6][400/411] Data 0.000 (0.004) Elapsed 5m 10s (remain 0m 7s) Loss: 0.1658(0.3322) Grad: 3.5463  
Epoch: [6][410/411] Data 0.000 (0.004) Elapsed 5m 17s (remain 0m 0s) Loss: 0.2482(0.3319) Grad: 7.3178  
EVAL: [0/103] Data 0.849 (0.849) Elapsed 0m 1s (remain 1m 47s) Loss: 0.4229(0.4229) 
EVAL: [100/103] Data 0.000 (0.060) Elapsed 0m 27s (remain 0m 0s) Loss: 0.2007(0.1894) 


Epoch 6 - avg_train_loss: 0.3319  avg_val_loss: 0.1886  time: 346s
Epoch 6 - Accuracy: 0.9380315917375456  F1-Score: 0.9380482917572455


EVAL: [102/103] Data 0.000 (0.059) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0418(0.1886) 
Epoch: [7][0/411] Data 2.008 (2.008) Elapsed 0m 2s (remain 19m 6s) Loss: 0.1590(0.1590) Grad: 5.0758  
Epoch: [7][100/411] Data 0.000 (0.020) Elapsed 1m 19s (remain 4m 4s) Loss: 0.1979(0.3156) Grad: 4.4438  
Epoch: [7][200/411] Data 0.000 (0.010) Elapsed 2m 36s (remain 2m 43s) Loss: 0.5183(0.3457) Grad: 8.1774  
Epoch: [7][300/411] Data 0.000 (0.007) Elapsed 3m 53s (remain 1m 25s) Loss: 0.5065(0.3356) Grad: 9.6300  
Epoch: [7][400/411] Data 0.000 (0.005) Elapsed 5m 10s (remain 0m 7s) Loss: 0.4978(0.3435) Grad: 5.8936  
Epoch: [7][410/411] Data 0.000 (0.005) Elapsed 5m 17s (remain 0m 0s) Loss: 0.4156(0.3445) Grad: 10.4028  
EVAL: [0/103] Data 1.030 (1.030) Elapsed 0m 1s (remain 2m 6s) Loss: 0.1758(0.1758) 
EVAL: [100/103] Data 0.000 (0.053) Elapsed 0m 27s (remain 0m 0s) Loss: 0.0187(0.1299) 


Epoch 7 - avg_train_loss: 0.3445  avg_val_loss: 0.1288  time: 346s
Epoch 7 - Accuracy: 0.9605103280680437  F1-Score: 0.9605880254210586
Epoch 7 - Save Best Score: 0.9605 Model


EVAL: [102/103] Data 0.000 (0.052) Elapsed 0m 27s (remain 0m 0s) Loss: 0.0233(0.1288) 
Epoch: [8][0/411] Data 1.332 (1.332) Elapsed 0m 2s (remain 14m 51s) Loss: 0.5071(0.5071) Grad: 9.8638  
Epoch: [8][100/411] Data 0.000 (0.014) Elapsed 1m 19s (remain 4m 3s) Loss: 0.4936(0.2992) Grad: 8.0816  
Epoch: [8][200/411] Data 0.000 (0.007) Elapsed 2m 36s (remain 2m 43s) Loss: 0.1287(0.3025) Grad: 2.5605  
Epoch: [8][300/411] Data 0.000 (0.005) Elapsed 3m 53s (remain 1m 25s) Loss: 0.4302(0.3061) Grad: 9.2510  
Epoch: [8][400/411] Data 0.000 (0.004) Elapsed 5m 10s (remain 0m 7s) Loss: 0.1978(0.3075) Grad: 5.8555  
Epoch: [8][410/411] Data 0.000 (0.003) Elapsed 5m 17s (remain 0m 0s) Loss: 0.2033(0.3081) Grad: 5.5632  
EVAL: [0/103] Data 0.845 (0.845) Elapsed 0m 1s (remain 1m 48s) Loss: 0.2663(0.2663) 
EVAL: [100/103] Data 0.000 (0.056) Elapsed 0m 27s (remain 0m 0s) Loss: 0.0618(0.1374) 


Epoch 8 - avg_train_loss: 0.3081  avg_val_loss: 0.1361  time: 346s
Epoch 8 - Accuracy: 0.951397326852977  F1-Score: 0.9513779137639747


EVAL: [102/103] Data 0.000 (0.055) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0140(0.1361) 
Epoch: [9][0/411] Data 1.353 (1.353) Elapsed 0m 2s (remain 14m 59s) Loss: 0.2674(0.2674) Grad: 6.0366  
Epoch: [9][100/411] Data 0.000 (0.014) Elapsed 1m 19s (remain 4m 3s) Loss: 0.2971(0.2975) Grad: 6.0922  
Epoch: [9][200/411] Data 0.000 (0.007) Elapsed 2m 36s (remain 2m 43s) Loss: 0.3034(0.3068) Grad: 6.4888  
Epoch: [9][300/411] Data 0.000 (0.005) Elapsed 3m 53s (remain 1m 25s) Loss: 0.1396(0.3021) Grad: 4.4712  
Epoch: [9][400/411] Data 0.000 (0.004) Elapsed 5m 10s (remain 0m 7s) Loss: 0.5490(0.3000) Grad: 9.3628  
Epoch: [9][410/411] Data 0.000 (0.004) Elapsed 5m 17s (remain 0m 0s) Loss: 0.2116(0.2999) Grad: 4.4908  
EVAL: [0/103] Data 0.786 (0.786) Elapsed 0m 0s (remain 1m 41s) Loss: 0.0614(0.0614) 
EVAL: [100/103] Data 0.000 (0.058) Elapsed 0m 27s (remain 0m 0s) Loss: 0.0050(0.1436) 


Epoch 9 - avg_train_loss: 0.2999  avg_val_loss: 0.1429  time: 346s
Epoch 9 - Accuracy: 0.9538274605103281  F1-Score: 0.9541305055887614


EVAL: [102/103] Data 0.000 (0.057) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0040(0.1429) 
Epoch: [10][0/411] Data 1.425 (1.425) Elapsed 0m 2s (remain 15m 23s) Loss: 0.2396(0.2396) Grad: 6.1504  
Epoch: [10][100/411] Data 0.000 (0.014) Elapsed 1m 19s (remain 4m 3s) Loss: 0.3230(0.2764) Grad: 5.1844  
Epoch: [10][200/411] Data 0.000 (0.007) Elapsed 2m 36s (remain 2m 43s) Loss: 0.0955(0.2737) Grad: 8.3771  
Epoch: [10][300/411] Data 0.000 (0.005) Elapsed 3m 53s (remain 1m 25s) Loss: 0.1185(0.2737) Grad: 4.2217  
Epoch: [10][400/411] Data 0.000 (0.004) Elapsed 5m 10s (remain 0m 7s) Loss: 0.1299(0.2794) Grad: 5.1463  
Epoch: [10][410/411] Data 0.000 (0.004) Elapsed 5m 18s (remain 0m 0s) Loss: 0.2084(0.2804) Grad: 6.1982  
EVAL: [0/103] Data 0.851 (0.851) Elapsed 0m 1s (remain 1m 48s) Loss: 0.1268(0.1268) 
EVAL: [100/103] Data 0.097 (0.060) Elapsed 0m 27s (remain 0m 0s) Loss: 0.2537(0.1839) 


Epoch 10 - avg_train_loss: 0.2804  avg_val_loss: 0.1808  time: 347s
Epoch 10 - Accuracy: 0.9362089914945322  F1-Score: 0.9355757035043852


EVAL: [102/103] Data 0.000 (0.058) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0179(0.1808) 
Epoch: [11][0/411] Data 1.408 (1.408) Elapsed 0m 2s (remain 15m 30s) Loss: 0.3690(0.3690) Grad: 5.8200  
Epoch: [11][100/411] Data 0.000 (0.014) Elapsed 1m 19s (remain 4m 3s) Loss: 0.2151(0.2638) Grad: 6.7265  
Epoch: [11][200/411] Data 0.000 (0.007) Elapsed 2m 36s (remain 2m 43s) Loss: 0.3431(0.2712) Grad: 6.2260  
Epoch: [11][300/411] Data 0.000 (0.005) Elapsed 3m 53s (remain 1m 25s) Loss: 0.0962(0.2767) Grad: 3.4843  
Epoch: [11][400/411] Data 0.000 (0.004) Elapsed 5m 10s (remain 0m 7s) Loss: 0.0831(0.2802) Grad: 2.4362  
Epoch: [11][410/411] Data 0.000 (0.004) Elapsed 5m 17s (remain 0m 0s) Loss: 0.1780(0.2816) Grad: 3.5628  
EVAL: [0/103] Data 1.052 (1.052) Elapsed 0m 1s (remain 2m 8s) Loss: 0.0772(0.0772) 
EVAL: [100/103] Data 0.000 (0.058) Elapsed 0m 27s (remain 0m 0s) Loss: 0.1841(0.1344) 


Epoch 11 - avg_train_loss: 0.2816  avg_val_loss: 0.1327  time: 346s
Epoch 11 - Accuracy: 0.9580801944106926  F1-Score: 0.9580159316153759


EVAL: [102/103] Data 0.000 (0.057) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0497(0.1327) 
Epoch: [12][0/411] Data 1.453 (1.453) Elapsed 0m 2s (remain 15m 31s) Loss: 0.2031(0.2031) Grad: 2.9586  
Epoch: [12][100/411] Data 0.000 (0.015) Elapsed 1m 19s (remain 4m 2s) Loss: 0.1769(0.2716) Grad: 5.3676  
Epoch: [12][200/411] Data 0.000 (0.007) Elapsed 2m 36s (remain 2m 43s) Loss: 0.3581(0.2698) Grad: 6.4322  
Epoch: [12][300/411] Data 0.000 (0.005) Elapsed 3m 53s (remain 1m 25s) Loss: 0.3915(0.2679) Grad: 7.5950  
Epoch: [12][400/411] Data 0.000 (0.004) Elapsed 5m 10s (remain 0m 7s) Loss: 0.2887(0.2776) Grad: 4.7973  
Epoch: [12][410/411] Data 0.000 (0.004) Elapsed 5m 17s (remain 0m 0s) Loss: 0.5969(0.2799) Grad: 11.3539  
EVAL: [0/103] Data 0.856 (0.856) Elapsed 0m 1s (remain 1m 48s) Loss: 0.3078(0.3078) 
EVAL: [100/103] Data 0.016 (0.058) Elapsed 0m 27s (remain 0m 0s) Loss: 0.2288(0.1419) 


Epoch 12 - avg_train_loss: 0.2799  avg_val_loss: 0.1421  time: 346s
Epoch 12 - Accuracy: 0.9489671931956257  F1-Score: 0.9489356016709525


EVAL: [102/103] Data 0.000 (0.057) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0162(0.1421) 
Epoch: [13][0/411] Data 1.798 (1.798) Elapsed 0m 2s (remain 17m 35s) Loss: 0.0868(0.0868) Grad: 2.5364  
Epoch: [13][100/411] Data 0.000 (0.018) Elapsed 1m 19s (remain 4m 3s) Loss: 0.2023(0.2798) Grad: 3.9679  
Epoch: [13][200/411] Data 0.000 (0.009) Elapsed 2m 36s (remain 2m 43s) Loss: 0.1498(0.2607) Grad: 5.1815  
Epoch: [13][300/411] Data 0.000 (0.006) Elapsed 3m 53s (remain 1m 25s) Loss: 0.2634(0.2615) Grad: 8.1345  
Epoch: [13][400/411] Data 0.000 (0.005) Elapsed 5m 10s (remain 0m 7s) Loss: 0.8933(0.2686) Grad: 13.5986  
Epoch: [13][410/411] Data 0.000 (0.005) Elapsed 5m 18s (remain 0m 0s) Loss: 0.2585(0.2681) Grad: 5.1904  
EVAL: [0/103] Data 0.904 (0.904) Elapsed 0m 1s (remain 1m 53s) Loss: 0.2076(0.2076) 
EVAL: [100/103] Data 0.161 (0.059) Elapsed 0m 27s (remain 0m 0s) Loss: 0.2352(0.1521) 


Epoch 13 - avg_train_loss: 0.2681  avg_val_loss: 0.1512  time: 346s
Epoch 13 - Accuracy: 0.9434993924665857  F1-Score: 0.943411238522545


EVAL: [102/103] Data 0.000 (0.058) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0082(0.1512) 
Epoch    13: reducing learning rate of group 0 to 2.0000e-05.
Epoch: [14][0/411] Data 1.259 (1.259) Elapsed 0m 2s (remain 14m 22s) Loss: 0.2707(0.2707) Grad: 6.7161  
Epoch: [14][100/411] Data 0.000 (0.013) Elapsed 1m 19s (remain 4m 3s) Loss: 0.4841(0.2294) Grad: 6.2604  
Epoch: [14][200/411] Data 0.000 (0.007) Elapsed 2m 36s (remain 2m 43s) Loss: 0.1729(0.2110) Grad: 5.2634  
Epoch: [14][300/411] Data 0.000 (0.004) Elapsed 3m 53s (remain 1m 25s) Loss: 0.1731(0.2126) Grad: 3.4642  
Epoch: [14][400/411] Data 0.000 (0.003) Elapsed 5m 10s (remain 0m 7s) Loss: 0.1520(0.2045) Grad: 4.1594  
Epoch: [14][410/411] Data 0.000 (0.003) Elapsed 5m 17s (remain 0m 0s) Loss: 0.3250(0.2043) Grad: 4.5705  
EVAL: [0/103] Data 0.957 (0.957) Elapsed 0m 1s (remain 1m 58s) Loss: 0.1860(0.1860) 
EVAL: [100/103] Data 0.000 (0.068) Elapsed 0m 28s (remain 0m 0s) Loss: 0.1151(0.0966) 


Epoch 14 - avg_train_loss: 0.2043  avg_val_loss: 0.0952  time: 347s
Epoch 14 - Accuracy: 0.9641555285540705  F1-Score: 0.96413756894668
Epoch 14 - Save Best Score: 0.9642 Model


EVAL: [102/103] Data 0.000 (0.066) Elapsed 0m 29s (remain 0m 0s) Loss: 0.0018(0.0952) 
Epoch: [15][0/411] Data 1.092 (1.092) Elapsed 0m 1s (remain 13m 23s) Loss: 0.1539(0.1539) Grad: 9.5210  
Epoch: [15][100/411] Data 0.000 (0.011) Elapsed 1m 18s (remain 4m 2s) Loss: 0.3012(0.1720) Grad: 7.3061  
Epoch: [15][200/411] Data 0.001 (0.006) Elapsed 2m 35s (remain 2m 42s) Loss: 0.1497(0.1745) Grad: 4.6461  
Epoch: [15][300/411] Data 0.000 (0.004) Elapsed 3m 52s (remain 1m 25s) Loss: 0.1343(0.1709) Grad: 5.4618  
Epoch: [15][400/411] Data 0.000 (0.003) Elapsed 5m 9s (remain 0m 7s) Loss: 0.2788(0.1670) Grad: 6.7111  
Epoch: [15][410/411] Data 0.000 (0.003) Elapsed 5m 17s (remain 0m 0s) Loss: 0.1885(0.1653) Grad: 4.9490  
EVAL: [0/103] Data 0.878 (0.878) Elapsed 0m 1s (remain 1m 51s) Loss: 0.1434(0.1434) 
EVAL: [100/103] Data 0.131 (0.056) Elapsed 0m 27s (remain 0m 0s) Loss: 0.1411(0.0805) 


Epoch 15 - avg_train_loss: 0.1653  avg_val_loss: 0.0792  time: 345s
Epoch 15 - Accuracy: 0.971445929526124  F1-Score: 0.9714616351891567
Epoch 15 - Save Best Score: 0.9714 Model


EVAL: [102/103] Data 0.000 (0.055) Elapsed 0m 27s (remain 0m 0s) Loss: 0.0018(0.0792) 


Score: 0.97145


Epoch: [1][0/411] Data 1.317 (1.317) Elapsed 0m 2s (remain 14m 47s) Loss: 1.9671(1.9671) Grad: 6.9005  
Epoch: [1][100/411] Data 0.000 (0.013) Elapsed 1m 19s (remain 4m 3s) Loss: 0.8641(1.2373) Grad: 9.3724  
Epoch: [1][200/411] Data 0.000 (0.007) Elapsed 2m 36s (remain 2m 43s) Loss: 1.1202(1.0285) Grad: 13.6813  
Epoch: [1][300/411] Data 0.000 (0.005) Elapsed 3m 53s (remain 1m 25s) Loss: 0.3297(0.9079) Grad: 5.5947  
Epoch: [1][400/411] Data 0.000 (0.004) Elapsed 5m 9s (remain 0m 7s) Loss: 0.2912(0.8469) Grad: 4.4434  
Epoch: [1][410/411] Data 0.000 (0.003) Elapsed 5m 17s (remain 0m 0s) Loss: 1.0844(0.8406) Grad: 14.3599  
EVAL: [0/103] Data 1.207 (1.207) Elapsed 0m 1s (remain 2m 24s) Loss: 0.1368(0.1368) 
EVAL: [100/103] Data 0.000 (0.064) Elapsed 0m 28s (remain 0m 0s) Loss: 0.2899(0.2833) 


Epoch 1 - avg_train_loss: 0.8406  avg_val_loss: 0.2823  time: 347s
Epoch 1 - Accuracy: 0.9015197568389057  F1-Score: 0.9023901103855033
Epoch 1 - Save Best Score: 0.9015 Model


EVAL: [102/103] Data 0.000 (0.063) Elapsed 0m 28s (remain 0m 0s) Loss: 0.3020(0.2823) 
Epoch: [2][0/411] Data 1.172 (1.172) Elapsed 0m 1s (remain 13m 39s) Loss: 0.3351(0.3351) Grad: 8.1992  
Epoch: [2][100/411] Data 0.000 (0.012) Elapsed 1m 19s (remain 4m 2s) Loss: 1.0553(0.5629) Grad: 16.7503  
Epoch: [2][200/411] Data 0.000 (0.006) Elapsed 2m 35s (remain 2m 42s) Loss: 0.6320(0.5524) Grad: 8.7278  
Epoch: [2][300/411] Data 0.000 (0.004) Elapsed 3m 52s (remain 1m 25s) Loss: 0.4262(0.5306) Grad: 6.8239  
Epoch: [2][400/411] Data 0.000 (0.003) Elapsed 5m 9s (remain 0m 7s) Loss: 0.4538(0.5274) Grad: 6.9136  
Epoch: [2][410/411] Data 0.000 (0.003) Elapsed 5m 17s (remain 0m 0s) Loss: 0.6456(0.5267) Grad: 9.4479  
EVAL: [0/103] Data 1.238 (1.238) Elapsed 0m 1s (remain 2m 28s) Loss: 0.1659(0.1659) 
EVAL: [100/103] Data 0.000 (0.064) Elapsed 0m 28s (remain 0m 0s) Loss: 0.6172(0.2403) 


Epoch 2 - avg_train_loss: 0.5267  avg_val_loss: 0.2387  time: 346s
Epoch 2 - Accuracy: 0.9276595744680851  F1-Score: 0.9280498315218761
Epoch 2 - Save Best Score: 0.9277 Model


EVAL: [102/103] Data 0.000 (0.063) Elapsed 0m 28s (remain 0m 0s) Loss: 0.2608(0.2387) 
Epoch: [3][0/411] Data 1.452 (1.452) Elapsed 0m 2s (remain 15m 31s) Loss: 0.3572(0.3572) Grad: 8.1339  
Epoch: [3][100/411] Data 0.000 (0.015) Elapsed 1m 19s (remain 4m 3s) Loss: 0.5904(0.4654) Grad: 13.7531  
Epoch: [3][200/411] Data 0.000 (0.008) Elapsed 2m 36s (remain 2m 43s) Loss: 0.2015(0.4557) Grad: 2.9194  
Epoch: [3][300/411] Data 0.000 (0.005) Elapsed 3m 53s (remain 1m 25s) Loss: 0.5921(0.4681) Grad: 10.6034  
Epoch: [3][400/411] Data 0.000 (0.004) Elapsed 5m 10s (remain 0m 7s) Loss: 0.3068(0.4667) Grad: 6.6538  
Epoch: [3][410/411] Data 0.000 (0.004) Elapsed 5m 17s (remain 0m 0s) Loss: 0.7312(0.4682) Grad: 9.6598  
EVAL: [0/103] Data 1.196 (1.196) Elapsed 0m 1s (remain 2m 23s) Loss: 0.0271(0.0271) 
EVAL: [100/103] Data 0.000 (0.069) Elapsed 0m 28s (remain 0m 0s) Loss: 0.4905(0.2022) 


Epoch 3 - avg_train_loss: 0.4682  avg_val_loss: 0.2003  time: 347s
Epoch 3 - Accuracy: 0.9355623100303951  F1-Score: 0.936138416432249
Epoch 3 - Save Best Score: 0.9356 Model


EVAL: [102/103] Data 0.000 (0.068) Elapsed 0m 29s (remain 0m 0s) Loss: 0.1851(0.2003) 
Epoch: [4][0/411] Data 1.374 (1.374) Elapsed 0m 2s (remain 15m 13s) Loss: 0.5547(0.5547) Grad: 10.0937  
Epoch: [4][100/411] Data 0.000 (0.014) Elapsed 1m 19s (remain 4m 3s) Loss: 0.3233(0.3971) Grad: 8.2399  
Epoch: [4][200/411] Data 0.000 (0.007) Elapsed 2m 36s (remain 2m 43s) Loss: 0.2927(0.3914) Grad: 7.8890  
Epoch: [4][300/411] Data 0.000 (0.005) Elapsed 3m 53s (remain 1m 25s) Loss: 0.4848(0.4075) Grad: 8.8088  
Epoch: [4][400/411] Data 0.000 (0.004) Elapsed 5m 10s (remain 0m 7s) Loss: 0.3074(0.4133) Grad: 6.0831  
Epoch: [4][410/411] Data 0.000 (0.004) Elapsed 5m 17s (remain 0m 0s) Loss: 0.3682(0.4128) Grad: 7.7322  
EVAL: [0/103] Data 1.132 (1.132) Elapsed 0m 1s (remain 2m 17s) Loss: 0.0039(0.0039) 
EVAL: [100/103] Data 0.000 (0.069) Elapsed 0m 28s (remain 0m 0s) Loss: 0.2770(0.1694) 


Epoch 4 - avg_train_loss: 0.4128  avg_val_loss: 0.1670  time: 347s
Epoch 4 - Accuracy: 0.9477203647416413  F1-Score: 0.947638032163153
Epoch 4 - Save Best Score: 0.9477 Model


EVAL: [102/103] Data 0.000 (0.068) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0486(0.1670) 
Epoch: [5][0/411] Data 1.359 (1.359) Elapsed 0m 2s (remain 15m 5s) Loss: 0.3249(0.3249) Grad: 8.5354  
Epoch: [5][100/411] Data 0.000 (0.014) Elapsed 1m 19s (remain 4m 3s) Loss: 0.4318(0.3547) Grad: 7.2063  
Epoch: [5][200/411] Data 0.000 (0.007) Elapsed 2m 36s (remain 2m 43s) Loss: 0.2861(0.3724) Grad: 5.9100  
Epoch: [5][300/411] Data 0.000 (0.005) Elapsed 3m 53s (remain 1m 25s) Loss: 0.1059(0.3691) Grad: 2.6497  
Epoch: [5][400/411] Data 0.001 (0.004) Elapsed 5m 10s (remain 0m 7s) Loss: 0.2998(0.3706) Grad: 7.9087  
Epoch: [5][410/411] Data 0.000 (0.004) Elapsed 5m 17s (remain 0m 0s) Loss: 0.2274(0.3714) Grad: 5.6609  
EVAL: [0/103] Data 1.182 (1.182) Elapsed 0m 1s (remain 2m 22s) Loss: 0.0287(0.0287) 
EVAL: [100/103] Data 0.000 (0.062) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0427(0.2011) 


Epoch 5 - avg_train_loss: 0.3714  avg_val_loss: 0.2005  time: 347s
Epoch 5 - Accuracy: 0.9452887537993921  F1-Score: 0.9453056623906767


EVAL: [102/103] Data 0.000 (0.061) Elapsed 0m 28s (remain 0m 0s) Loss: 0.1182(0.2005) 
Epoch: [6][0/411] Data 2.033 (2.033) Elapsed 0m 2s (remain 19m 12s) Loss: 0.0887(0.0887) Grad: 3.0403  
Epoch: [6][100/411] Data 0.000 (0.020) Elapsed 1m 19s (remain 4m 4s) Loss: 0.1032(0.3215) Grad: 3.4785  
Epoch: [6][200/411] Data 0.000 (0.010) Elapsed 2m 36s (remain 2m 43s) Loss: 0.1682(0.3301) Grad: 5.7739  
Epoch: [6][300/411] Data 0.000 (0.007) Elapsed 3m 53s (remain 1m 25s) Loss: 0.6705(0.3376) Grad: 9.1516  
Epoch: [6][400/411] Data 0.000 (0.005) Elapsed 5m 10s (remain 0m 7s) Loss: 0.2345(0.3353) Grad: 4.6037  
Epoch: [6][410/411] Data 0.000 (0.005) Elapsed 5m 18s (remain 0m 0s) Loss: 0.2376(0.3342) Grad: 6.7605  
EVAL: [0/103] Data 1.289 (1.289) Elapsed 0m 1s (remain 2m 32s) Loss: 0.0743(0.0743) 
EVAL: [100/103] Data 0.000 (0.066) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0908(0.1674) 


Epoch 6 - avg_train_loss: 0.3342  avg_val_loss: 0.1659  time: 347s
Epoch 6 - Accuracy: 0.948936170212766  F1-Score: 0.9489920127131872
Epoch 6 - Save Best Score: 0.9489 Model


EVAL: [102/103] Data 0.000 (0.065) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0731(0.1659) 
Epoch: [7][0/411] Data 1.275 (1.275) Elapsed 0m 2s (remain 14m 31s) Loss: 0.4429(0.4429) Grad: 9.8094  
Epoch: [7][100/411] Data 0.001 (0.013) Elapsed 1m 19s (remain 4m 3s) Loss: 0.7537(0.2960) Grad: 11.0078  
Epoch: [7][200/411] Data 0.000 (0.007) Elapsed 2m 36s (remain 2m 43s) Loss: 0.4121(0.3014) Grad: 6.3780  
Epoch: [7][300/411] Data 0.000 (0.004) Elapsed 3m 53s (remain 1m 25s) Loss: 0.3004(0.3176) Grad: 7.1445  
Epoch: [7][400/411] Data 0.000 (0.003) Elapsed 5m 9s (remain 0m 7s) Loss: 0.7005(0.3207) Grad: 9.6251  
Epoch: [7][410/411] Data 0.000 (0.003) Elapsed 5m 17s (remain 0m 0s) Loss: 0.5075(0.3226) Grad: 9.5903  
EVAL: [0/103] Data 1.284 (1.284) Elapsed 0m 1s (remain 2m 34s) Loss: 0.1321(0.1321) 
EVAL: [100/103] Data 0.000 (0.062) Elapsed 0m 28s (remain 0m 0s) Loss: 0.1606(0.1974) 


Epoch 7 - avg_train_loss: 0.3226  avg_val_loss: 0.1952  time: 346s
Epoch 7 - Accuracy: 0.9355623100303951  F1-Score: 0.9352833222223154


EVAL: [102/103] Data 0.194 (0.062) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0820(0.1952) 
Epoch: [8][0/411] Data 1.030 (1.030) Elapsed 0m 1s (remain 12m 57s) Loss: 0.3410(0.3410) Grad: 5.6536  
Epoch: [8][100/411] Data 0.000 (0.010) Elapsed 1m 18s (remain 4m 1s) Loss: 0.1027(0.2927) Grad: 2.9440  
Epoch: [8][200/411] Data 0.000 (0.005) Elapsed 2m 35s (remain 2m 42s) Loss: 0.5604(0.2814) Grad: 8.0376  
Epoch: [8][300/411] Data 0.000 (0.004) Elapsed 3m 52s (remain 1m 24s) Loss: 0.2133(0.3061) Grad: 4.7941  
Epoch: [8][400/411] Data 0.000 (0.003) Elapsed 5m 9s (remain 0m 7s) Loss: 0.2294(0.3114) Grad: 5.8846  
Epoch: [8][410/411] Data 0.000 (0.003) Elapsed 5m 17s (remain 0m 0s) Loss: 0.2633(0.3118) Grad: 5.6013  
EVAL: [0/103] Data 1.188 (1.188) Elapsed 0m 1s (remain 2m 22s) Loss: 0.2008(0.2008) 
EVAL: [100/103] Data 0.000 (0.066) Elapsed 0m 28s (remain 0m 0s) Loss: 0.1190(0.2010) 


Epoch 8 - avg_train_loss: 0.3118  avg_val_loss: 0.1983  time: 346s
Epoch 8 - Accuracy: 0.9361702127659575  F1-Score: 0.9360923136403554


EVAL: [102/103] Data 0.000 (0.065) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0832(0.1983) 
Epoch: [9][0/411] Data 1.643 (1.643) Elapsed 0m 2s (remain 17m 6s) Loss: 0.3442(0.3442) Grad: 5.5013  
Epoch: [9][100/411] Data 0.000 (0.016) Elapsed 1m 19s (remain 4m 3s) Loss: 0.3201(0.2657) Grad: 7.3578  
Epoch: [9][200/411] Data 0.000 (0.008) Elapsed 2m 36s (remain 2m 43s) Loss: 0.1826(0.2613) Grad: 4.5825  
Epoch: [9][300/411] Data 0.000 (0.006) Elapsed 3m 53s (remain 1m 25s) Loss: 0.1482(0.2715) Grad: 3.1836  
Epoch: [9][400/411] Data 0.000 (0.004) Elapsed 5m 10s (remain 0m 7s) Loss: 0.5886(0.2785) Grad: 12.3536  
Epoch: [9][410/411] Data 0.000 (0.004) Elapsed 5m 17s (remain 0m 0s) Loss: 0.5704(0.2787) Grad: 8.6381  
EVAL: [0/103] Data 1.182 (1.182) Elapsed 0m 1s (remain 2m 22s) Loss: 0.1580(0.1580) 
EVAL: [100/103] Data 0.000 (0.065) Elapsed 0m 28s (remain 0m 0s) Loss: 0.1881(0.2081) 


Epoch 9 - avg_train_loss: 0.2787  avg_val_loss: 0.2062  time: 347s
Epoch 9 - Accuracy: 0.9300911854103343  F1-Score: 0.930109870342355


EVAL: [102/103] Data 0.256 (0.067) Elapsed 0m 28s (remain 0m 0s) Loss: 0.2155(0.2062) 
Epoch: [10][0/411] Data 1.601 (1.601) Elapsed 0m 2s (remain 16m 30s) Loss: 0.0576(0.0576) Grad: 1.4820  
Epoch: [10][100/411] Data 0.000 (0.016) Elapsed 1m 19s (remain 4m 3s) Loss: 0.7832(0.2771) Grad: 12.0662  
Epoch: [10][200/411] Data 0.000 (0.008) Elapsed 2m 36s (remain 2m 43s) Loss: 0.1700(0.2741) Grad: 4.2757  
Epoch: [10][300/411] Data 0.000 (0.006) Elapsed 3m 53s (remain 1m 25s) Loss: 0.4302(0.2796) Grad: 9.3681  
Epoch: [10][400/411] Data 0.000 (0.004) Elapsed 5m 10s (remain 0m 7s) Loss: 0.2689(0.2741) Grad: 6.6899  
Epoch: [10][410/411] Data 0.000 (0.004) Elapsed 5m 17s (remain 0m 0s) Loss: 0.5148(0.2754) Grad: 10.1389  
EVAL: [0/103] Data 1.188 (1.188) Elapsed 0m 1s (remain 2m 22s) Loss: 0.0787(0.0787) 
EVAL: [100/103] Data 0.000 (0.065) Elapsed 0m 28s (remain 0m 0s) Loss: 0.1950(0.1688) 


Epoch 10 - avg_train_loss: 0.2754  avg_val_loss: 0.1690  time: 347s
Epoch 10 - Accuracy: 0.9458966565349544  F1-Score: 0.9463081433469958


EVAL: [102/103] Data 0.000 (0.063) Elapsed 0m 28s (remain 0m 0s) Loss: 0.1242(0.1690) 
Epoch: [11][0/411] Data 1.592 (1.592) Elapsed 0m 2s (remain 16m 17s) Loss: 0.3250(0.3250) Grad: 5.5026  
Epoch: [11][100/411] Data 0.000 (0.016) Elapsed 1m 19s (remain 4m 3s) Loss: 0.2668(0.2673) Grad: 5.8432  
Epoch: [11][200/411] Data 0.000 (0.008) Elapsed 2m 36s (remain 2m 43s) Loss: 0.2381(0.2736) Grad: 3.4833  
Epoch: [11][300/411] Data 0.000 (0.006) Elapsed 3m 53s (remain 1m 25s) Loss: 0.4317(0.2760) Grad: 7.2781  
Epoch: [11][400/411] Data 0.000 (0.004) Elapsed 5m 10s (remain 0m 7s) Loss: 0.2684(0.2728) Grad: 6.4658  
Epoch: [11][410/411] Data 0.000 (0.004) Elapsed 5m 17s (remain 0m 0s) Loss: 0.4180(0.2737) Grad: 6.1611  
EVAL: [0/103] Data 1.316 (1.316) Elapsed 0m 1s (remain 2m 36s) Loss: 0.0629(0.0629) 
EVAL: [100/103] Data 0.000 (0.071) Elapsed 0m 28s (remain 0m 0s) Loss: 0.2297(0.1551) 


Epoch 11 - avg_train_loss: 0.2737  avg_val_loss: 0.1531  time: 347s
Epoch 11 - Accuracy: 0.9519756838905775  F1-Score: 0.9521283416175315
Epoch 11 - Save Best Score: 0.9520 Model


EVAL: [102/103] Data 0.000 (0.069) Elapsed 0m 29s (remain 0m 0s) Loss: 0.0647(0.1531) 
Epoch: [12][0/411] Data 1.251 (1.251) Elapsed 0m 2s (remain 14m 26s) Loss: 0.2369(0.2369) Grad: 5.6799  
Epoch: [12][100/411] Data 0.000 (0.013) Elapsed 1m 19s (remain 4m 2s) Loss: 0.5321(0.2389) Grad: 8.6355  
Epoch: [12][200/411] Data 0.000 (0.007) Elapsed 2m 35s (remain 2m 42s) Loss: 0.3408(0.2457) Grad: 6.8899  
Epoch: [12][300/411] Data 0.000 (0.004) Elapsed 3m 52s (remain 1m 25s) Loss: 0.2412(0.2498) Grad: 4.9084  
Epoch: [12][400/411] Data 0.000 (0.003) Elapsed 5m 9s (remain 0m 7s) Loss: 0.2005(0.2574) Grad: 5.2762  
Epoch: [12][410/411] Data 0.000 (0.003) Elapsed 5m 17s (remain 0m 0s) Loss: 0.1945(0.2586) Grad: 3.6846  
EVAL: [0/103] Data 1.250 (1.250) Elapsed 0m 1s (remain 2m 28s) Loss: 0.1015(0.1015) 
EVAL: [100/103] Data 0.000 (0.064) Elapsed 0m 28s (remain 0m 0s) Loss: 0.1811(0.1811) 


Epoch 12 - avg_train_loss: 0.2586  avg_val_loss: 0.1796  time: 346s
Epoch 12 - Accuracy: 0.9477203647416413  F1-Score: 0.9478337122543096


EVAL: [102/103] Data 0.138 (0.064) Elapsed 0m 28s (remain 0m 0s) Loss: 0.1470(0.1796) 
Epoch: [13][0/411] Data 1.114 (1.114) Elapsed 0m 1s (remain 13m 25s) Loss: 0.0439(0.0439) Grad: 1.7836  
Epoch: [13][100/411] Data 0.000 (0.012) Elapsed 1m 19s (remain 4m 2s) Loss: 0.3332(0.2485) Grad: 7.5024  
Epoch: [13][200/411] Data 0.000 (0.006) Elapsed 2m 36s (remain 2m 43s) Loss: 0.3552(0.2741) Grad: 6.7792  
Epoch: [13][300/411] Data 0.000 (0.004) Elapsed 3m 52s (remain 1m 25s) Loss: 0.3729(0.2665) Grad: 6.3486  
Epoch: [13][400/411] Data 0.000 (0.003) Elapsed 5m 9s (remain 0m 7s) Loss: 0.0836(0.2662) Grad: 3.6602  
Epoch: [13][410/411] Data 0.000 (0.003) Elapsed 5m 17s (remain 0m 0s) Loss: 0.2768(0.2649) Grad: 7.9318  
EVAL: [0/103] Data 1.293 (1.293) Elapsed 0m 1s (remain 2m 33s) Loss: 0.0811(0.0811) 
EVAL: [100/103] Data 0.000 (0.068) Elapsed 0m 28s (remain 0m 0s) Loss: 0.3276(0.1371) 


Epoch 13 - avg_train_loss: 0.2649  avg_val_loss: 0.1356  time: 347s
Epoch 13 - Accuracy: 0.9544072948328267  F1-Score: 0.9545968987519464
Epoch 13 - Save Best Score: 0.9544 Model


EVAL: [102/103] Data 0.000 (0.067) Elapsed 0m 28s (remain 0m 0s) Loss: 0.1106(0.1356) 
Epoch: [14][0/411] Data 1.576 (1.576) Elapsed 0m 2s (remain 16m 17s) Loss: 0.2659(0.2659) Grad: 6.3076  
Epoch: [14][100/411] Data 0.000 (0.016) Elapsed 1m 19s (remain 4m 3s) Loss: 0.2570(0.2295) Grad: 7.4798  
Epoch: [14][200/411] Data 0.000 (0.008) Elapsed 2m 36s (remain 2m 43s) Loss: 0.1393(0.2276) Grad: 2.7762  
Epoch: [14][300/411] Data 0.000 (0.006) Elapsed 3m 53s (remain 1m 25s) Loss: 0.4390(0.2393) Grad: 7.4872  
Epoch: [14][400/411] Data 0.000 (0.004) Elapsed 5m 10s (remain 0m 7s) Loss: 0.5690(0.2560) Grad: 6.1773  
Epoch: [14][410/411] Data 0.000 (0.004) Elapsed 5m 18s (remain 0m 0s) Loss: 0.1441(0.2550) Grad: 4.4957  
EVAL: [0/103] Data 1.272 (1.272) Elapsed 0m 1s (remain 2m 31s) Loss: 0.0781(0.0781) 
EVAL: [100/103] Data 0.000 (0.072) Elapsed 0m 29s (remain 0m 0s) Loss: 0.2494(0.1446) 


Epoch 14 - avg_train_loss: 0.2550  avg_val_loss: 0.1425  time: 348s
Epoch 14 - Accuracy: 0.9544072948328267  F1-Score: 0.9543749760125241


EVAL: [102/103] Data 0.000 (0.071) Elapsed 0m 29s (remain 0m 0s) Loss: 0.0359(0.1425) 
Epoch: [15][0/411] Data 1.371 (1.371) Elapsed 0m 2s (remain 15m 12s) Loss: 0.2472(0.2472) Grad: 4.3540  
Epoch: [15][100/411] Data 0.001 (0.014) Elapsed 1m 19s (remain 4m 3s) Loss: 0.0664(0.2328) Grad: 2.0849  
Epoch: [15][200/411] Data 0.000 (0.007) Elapsed 2m 36s (remain 2m 43s) Loss: 0.2804(0.2350) Grad: 6.8994  
Epoch: [15][300/411] Data 0.000 (0.005) Elapsed 3m 53s (remain 1m 25s) Loss: 0.0492(0.2279) Grad: 2.0003  
Epoch: [15][400/411] Data 0.001 (0.004) Elapsed 5m 10s (remain 0m 7s) Loss: 0.4431(0.2409) Grad: 4.4875  
Epoch: [15][410/411] Data 0.000 (0.004) Elapsed 5m 18s (remain 0m 0s) Loss: 0.5530(0.2442) Grad: 10.8030  
EVAL: [0/103] Data 1.204 (1.204) Elapsed 0m 1s (remain 2m 24s) Loss: 0.1257(0.1257) 
EVAL: [100/103] Data 0.000 (0.063) Elapsed 0m 28s (remain 0m 0s) Loss: 0.1690(0.1757) 


Epoch 15 - avg_train_loss: 0.2442  avg_val_loss: 0.1749  time: 347s
Epoch 15 - Accuracy: 0.9458966565349544  F1-Score: 0.9456687846828311


EVAL: [102/103] Data 0.000 (0.062) Elapsed 0m 28s (remain 0m 0s) Loss: 0.1194(0.1749) 


Score: 0.95441


Epoch: [1][0/411] Data 1.296 (1.296) Elapsed 0m 2s (remain 15m 0s) Loss: 1.9972(1.9972) Grad: 15.5480  
Epoch: [1][100/411] Data 0.001 (0.013) Elapsed 1m 19s (remain 4m 3s) Loss: 1.1749(1.2491) Grad: 13.4990  
Epoch: [1][200/411] Data 0.001 (0.007) Elapsed 2m 36s (remain 2m 43s) Loss: 0.9456(1.0369) Grad: 11.7309  
Epoch: [1][300/411] Data 0.001 (0.005) Elapsed 3m 53s (remain 1m 25s) Loss: 0.4911(0.9108) Grad: 8.8264  
Epoch: [1][400/411] Data 0.001 (0.003) Elapsed 5m 10s (remain 0m 7s) Loss: 0.6536(0.8567) Grad: 10.6611  
Epoch: [1][410/411] Data 0.000 (0.003) Elapsed 5m 17s (remain 0m 0s) Loss: 0.2456(0.8510) Grad: 4.8068  
EVAL: [0/103] Data 1.479 (1.479) Elapsed 0m 1s (remain 2m 52s) Loss: 0.1248(0.1248) 
EVAL: [100/103] Data 0.000 (0.059) Elapsed 0m 27s (remain 0m 0s) Loss: 0.2595(0.3016) 


Epoch 1 - avg_train_loss: 0.8510  avg_val_loss: 0.3012  time: 346s
Epoch 1 - Accuracy: 0.9069908814589666  F1-Score: 0.9064248062791032
Epoch 1 - Save Best Score: 0.9070 Model


EVAL: [102/103] Data 0.000 (0.057) Elapsed 0m 28s (remain 0m 0s) Loss: 0.4020(0.3012) 
Epoch: [2][0/411] Data 1.378 (1.378) Elapsed 0m 2s (remain 15m 16s) Loss: 0.8046(0.8046) Grad: 10.7439  
Epoch: [2][100/411] Data 0.000 (0.014) Elapsed 1m 19s (remain 4m 3s) Loss: 0.5816(0.5778) Grad: 8.5674  
Epoch: [2][200/411] Data 0.000 (0.007) Elapsed 2m 36s (remain 2m 43s) Loss: 0.2792(0.5636) Grad: 5.3301  
Epoch: [2][300/411] Data 0.000 (0.005) Elapsed 3m 53s (remain 1m 25s) Loss: 0.3286(0.5615) Grad: 7.1444  
Epoch: [2][400/411] Data 0.000 (0.004) Elapsed 5m 10s (remain 0m 7s) Loss: 0.1165(0.5347) Grad: 5.8870  
Epoch: [2][410/411] Data 0.000 (0.004) Elapsed 5m 17s (remain 0m 0s) Loss: 0.6125(0.5310) Grad: 13.9775  
EVAL: [0/103] Data 1.315 (1.315) Elapsed 0m 1s (remain 2m 36s) Loss: 0.0387(0.0387) 
EVAL: [100/103] Data 0.000 (0.054) Elapsed 0m 27s (remain 0m 0s) Loss: 0.0825(0.2090) 


Epoch 2 - avg_train_loss: 0.5310  avg_val_loss: 0.2089  time: 346s
Epoch 2 - Accuracy: 0.9422492401215805  F1-Score: 0.942151919764732
Epoch 2 - Save Best Score: 0.9422 Model


EVAL: [102/103] Data 0.000 (0.053) Elapsed 0m 27s (remain 0m 0s) Loss: 0.1406(0.2089) 
Epoch: [3][0/411] Data 2.241 (2.241) Elapsed 0m 3s (remain 20m 55s) Loss: 0.3300(0.3300) Grad: 8.2049  
Epoch: [3][100/411] Data 0.000 (0.023) Elapsed 1m 20s (remain 4m 5s) Loss: 0.4024(0.4508) Grad: 7.6618  
Epoch: [3][200/411] Data 0.000 (0.011) Elapsed 2m 37s (remain 2m 44s) Loss: 0.4551(0.4599) Grad: 8.4663  
Epoch: [3][300/411] Data 0.000 (0.008) Elapsed 3m 53s (remain 1m 25s) Loss: 0.9875(0.4749) Grad: 13.2880  
Epoch: [3][400/411] Data 0.003 (0.006) Elapsed 5m 10s (remain 0m 7s) Loss: 0.2807(0.4661) Grad: 5.2006  
Epoch: [3][410/411] Data 0.000 (0.006) Elapsed 5m 18s (remain 0m 0s) Loss: 0.4166(0.4618) Grad: 6.3752  
EVAL: [0/103] Data 1.383 (1.383) Elapsed 0m 1s (remain 2m 42s) Loss: 0.0082(0.0082) 
EVAL: [100/103] Data 0.000 (0.062) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0117(0.1474) 


Epoch 3 - avg_train_loss: 0.4618  avg_val_loss: 0.1455  time: 347s
Epoch 3 - Accuracy: 0.9556231003039514  F1-Score: 0.9556388464933475
Epoch 3 - Save Best Score: 0.9556 Model


EVAL: [102/103] Data 0.000 (0.061) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0400(0.1455) 
Epoch: [4][0/411] Data 1.151 (1.151) Elapsed 0m 2s (remain 13m 41s) Loss: 0.6035(0.6035) Grad: 7.8511  
Epoch: [4][100/411] Data 0.001 (0.012) Elapsed 1m 19s (remain 4m 2s) Loss: 0.1766(0.4006) Grad: 4.9080  
Epoch: [4][200/411] Data 0.000 (0.006) Elapsed 2m 36s (remain 2m 43s) Loss: 0.4161(0.4008) Grad: 7.8311  
Epoch: [4][300/411] Data 0.000 (0.004) Elapsed 3m 53s (remain 1m 25s) Loss: 0.6265(0.4022) Grad: 9.6412  
Epoch: [4][400/411] Data 0.000 (0.003) Elapsed 5m 10s (remain 0m 7s) Loss: 0.3255(0.3988) Grad: 5.9907  
Epoch: [4][410/411] Data 0.000 (0.003) Elapsed 5m 17s (remain 0m 0s) Loss: 0.3147(0.3992) Grad: 8.6503  
EVAL: [0/103] Data 1.349 (1.349) Elapsed 0m 1s (remain 2m 38s) Loss: 0.0188(0.0188) 
EVAL: [100/103] Data 0.000 (0.057) Elapsed 0m 27s (remain 0m 0s) Loss: 0.0124(0.1612) 


Epoch 4 - avg_train_loss: 0.3992  avg_val_loss: 0.1594  time: 346s
Epoch 4 - Accuracy: 0.947112462006079  F1-Score: 0.9467354129267361


EVAL: [102/103] Data 0.000 (0.056) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0717(0.1594) 
Epoch: [5][0/411] Data 2.189 (2.189) Elapsed 0m 2s (remain 20m 17s) Loss: 0.7334(0.7334) Grad: 8.8581  
Epoch: [5][100/411] Data 0.000 (0.022) Elapsed 1m 20s (remain 4m 5s) Loss: 0.0656(0.3487) Grad: 2.1055  
Epoch: [5][200/411] Data 0.000 (0.011) Elapsed 2m 36s (remain 2m 44s) Loss: 0.3767(0.3673) Grad: 8.1237  
Epoch: [5][300/411] Data 0.000 (0.008) Elapsed 3m 54s (remain 1m 25s) Loss: 0.3074(0.3757) Grad: 5.6828  
Epoch: [5][400/411] Data 0.000 (0.006) Elapsed 5m 11s (remain 0m 7s) Loss: 0.3619(0.3711) Grad: 7.8618  
Epoch: [5][410/411] Data 0.000 (0.006) Elapsed 5m 18s (remain 0m 0s) Loss: 0.3564(0.3721) Grad: 6.1389  
EVAL: [0/103] Data 1.541 (1.541) Elapsed 0m 1s (remain 2m 58s) Loss: 0.0627(0.0627) 
EVAL: [100/103] Data 0.000 (0.059) Elapsed 0m 27s (remain 0m 0s) Loss: 0.0930(0.1929) 


Epoch 5 - avg_train_loss: 0.3721  avg_val_loss: 0.1931  time: 347s
Epoch 5 - Accuracy: 0.939209726443769  F1-Score: 0.9397423425685442


EVAL: [102/103] Data 0.000 (0.058) Elapsed 0m 28s (remain 0m 0s) Loss: 0.3005(0.1931) 
Epoch: [6][0/411] Data 1.169 (1.169) Elapsed 0m 1s (remain 13m 37s) Loss: 0.2221(0.2221) Grad: 4.9089  
Epoch: [6][100/411] Data 0.000 (0.012) Elapsed 1m 19s (remain 4m 3s) Loss: 0.6967(0.3497) Grad: 10.2944  
Epoch: [6][200/411] Data 0.000 (0.006) Elapsed 2m 36s (remain 2m 43s) Loss: 0.5823(0.3421) Grad: 8.8505  
Epoch: [6][300/411] Data 0.000 (0.004) Elapsed 3m 53s (remain 1m 25s) Loss: 0.0747(0.3506) Grad: 3.4438  
Epoch: [6][400/411] Data 0.000 (0.003) Elapsed 5m 10s (remain 0m 7s) Loss: 0.1667(0.3518) Grad: 6.2593  
Epoch: [6][410/411] Data 0.000 (0.003) Elapsed 5m 18s (remain 0m 0s) Loss: 0.0848(0.3524) Grad: 2.1185  
EVAL: [0/103] Data 1.471 (1.471) Elapsed 0m 1s (remain 2m 51s) Loss: 0.2844(0.2844) 
EVAL: [100/103] Data 0.000 (0.049) Elapsed 0m 26s (remain 0m 0s) Loss: 0.1042(0.2434) 


Epoch 6 - avg_train_loss: 0.3524  avg_val_loss: 0.2417  time: 346s
Epoch 6 - Accuracy: 0.9258358662613981  F1-Score: 0.9269608785328926


EVAL: [102/103] Data 0.000 (0.048) Elapsed 0m 27s (remain 0m 0s) Loss: 0.0772(0.2417) 
Epoch: [7][0/411] Data 1.569 (1.569) Elapsed 0m 2s (remain 16m 33s) Loss: 0.2385(0.2385) Grad: 5.1940  
Epoch: [7][100/411] Data 0.000 (0.016) Elapsed 1m 19s (remain 4m 4s) Loss: 0.2248(0.3246) Grad: 6.7041  
Epoch: [7][200/411] Data 0.000 (0.008) Elapsed 2m 36s (remain 2m 43s) Loss: 0.3121(0.3393) Grad: 5.7705  
Epoch: [7][300/411] Data 0.000 (0.005) Elapsed 3m 53s (remain 1m 25s) Loss: 0.2718(0.3358) Grad: 6.6419  
Epoch: [7][400/411] Data 0.000 (0.004) Elapsed 5m 10s (remain 0m 7s) Loss: 0.6463(0.3292) Grad: 11.1694  
Epoch: [7][410/411] Data 0.000 (0.004) Elapsed 5m 18s (remain 0m 0s) Loss: 0.6621(0.3308) Grad: 10.7683  
EVAL: [0/103] Data 1.467 (1.467) Elapsed 0m 1s (remain 2m 51s) Loss: 0.0126(0.0126) 
EVAL: [100/103] Data 0.000 (0.060) Elapsed 0m 27s (remain 0m 0s) Loss: 0.0441(0.1290) 


Epoch 7 - avg_train_loss: 0.3308  avg_val_loss: 0.1279  time: 347s
Epoch 7 - Accuracy: 0.9610942249240122  F1-Score: 0.9610641817899865
Epoch 7 - Save Best Score: 0.9611 Model


EVAL: [102/103] Data 0.000 (0.059) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0236(0.1279) 
Epoch: [8][0/411] Data 1.512 (1.512) Elapsed 0m 2s (remain 15m 45s) Loss: 0.0432(0.0432) Grad: 1.4119  
Epoch: [8][100/411] Data 0.000 (0.015) Elapsed 1m 19s (remain 4m 3s) Loss: 0.6257(0.2962) Grad: 9.7025  
Epoch: [8][200/411] Data 0.000 (0.008) Elapsed 2m 36s (remain 2m 43s) Loss: 0.4391(0.3058) Grad: 12.4411  
Epoch: [8][300/411] Data 0.000 (0.005) Elapsed 3m 53s (remain 1m 25s) Loss: 0.1586(0.3142) Grad: 4.3745  
Epoch: [8][400/411] Data 0.000 (0.004) Elapsed 5m 10s (remain 0m 7s) Loss: 0.4751(0.3113) Grad: 10.3235  
Epoch: [8][410/411] Data 0.000 (0.004) Elapsed 5m 18s (remain 0m 0s) Loss: 0.2903(0.3094) Grad: 7.5029  
EVAL: [0/103] Data 1.423 (1.423) Elapsed 0m 1s (remain 2m 46s) Loss: 0.0068(0.0068) 
EVAL: [100/103] Data 0.000 (0.052) Elapsed 0m 27s (remain 0m 0s) Loss: 0.0142(0.1344) 


Epoch 8 - avg_train_loss: 0.3094  avg_val_loss: 0.1343  time: 346s
Epoch 8 - Accuracy: 0.9562310030395137  F1-Score: 0.9560878983273078


EVAL: [102/103] Data 0.000 (0.051) Elapsed 0m 27s (remain 0m 0s) Loss: 0.2470(0.1343) 
Epoch: [9][0/411] Data 1.580 (1.580) Elapsed 0m 2s (remain 16m 54s) Loss: 0.1062(0.1062) Grad: 3.6764  
Epoch: [9][100/411] Data 0.001 (0.016) Elapsed 1m 19s (remain 4m 5s) Loss: 0.1194(0.2674) Grad: 2.1361  
Epoch: [9][200/411] Data 0.001 (0.008) Elapsed 2m 36s (remain 2m 43s) Loss: 0.4161(0.2608) Grad: 5.8532  
Epoch: [9][300/411] Data 0.000 (0.006) Elapsed 3m 53s (remain 1m 25s) Loss: 0.3855(0.2733) Grad: 6.4117  
Epoch: [9][400/411] Data 0.000 (0.004) Elapsed 5m 11s (remain 0m 7s) Loss: 0.2592(0.2873) Grad: 4.5112  
Epoch: [9][410/411] Data 0.000 (0.004) Elapsed 5m 18s (remain 0m 0s) Loss: 0.1727(0.2877) Grad: 6.0815  
EVAL: [0/103] Data 1.618 (1.618) Elapsed 0m 1s (remain 3m 7s) Loss: 0.0143(0.0143) 
EVAL: [100/103] Data 0.000 (0.059) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0560(0.1404) 


Epoch 9 - avg_train_loss: 0.2877  avg_val_loss: 0.1397  time: 347s
Epoch 9 - Accuracy: 0.9507598784194529  F1-Score: 0.9514816376166996


EVAL: [102/103] Data 0.000 (0.057) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0176(0.1397) 
Epoch: [10][0/411] Data 2.372 (2.372) Elapsed 0m 3s (remain 21m 32s) Loss: 0.3562(0.3562) Grad: 6.3763  
Epoch: [10][100/411] Data 0.000 (0.024) Elapsed 1m 20s (remain 4m 6s) Loss: 0.2524(0.2913) Grad: 3.9012  
Epoch: [10][200/411] Data 0.000 (0.012) Elapsed 2m 37s (remain 2m 44s) Loss: 0.5364(0.2748) Grad: 10.7360  
Epoch: [10][300/411] Data 0.000 (0.008) Elapsed 3m 54s (remain 1m 25s) Loss: 0.1356(0.2769) Grad: 2.7861  
Epoch: [10][400/411] Data 0.000 (0.006) Elapsed 5m 11s (remain 0m 7s) Loss: 0.1122(0.2840) Grad: 3.1555  
Epoch: [10][410/411] Data 0.000 (0.006) Elapsed 5m 19s (remain 0m 0s) Loss: 0.1688(0.2851) Grad: 4.3306  
EVAL: [0/103] Data 1.410 (1.410) Elapsed 0m 1s (remain 2m 45s) Loss: 0.0358(0.0358) 
EVAL: [100/103] Data 0.000 (0.056) Elapsed 0m 27s (remain 0m 0s) Loss: 0.0082(0.1285) 
EVAL: [102/103] Data 0.009 (0.055) Elapsed 0m 27s (remain 0m 0s) Loss: 0.1077(0.1288) 


Epoch 10 - avg_train_loss: 0.2851  avg_val_loss: 0.1288  time: 347s
Epoch 10 - Accuracy: 0.956838905775076  F1-Score: 0.9568055044505803


Epoch: [11][0/411] Data 1.347 (1.347) Elapsed 0m 2s (remain 15m 7s) Loss: 0.1660(0.1660) Grad: 3.1790  
Epoch: [11][100/411] Data 0.000 (0.014) Elapsed 1m 19s (remain 4m 3s) Loss: 0.2036(0.2511) Grad: 5.6846  
Epoch: [11][200/411] Data 0.000 (0.007) Elapsed 2m 36s (remain 2m 43s) Loss: 0.1981(0.2560) Grad: 4.5048  
Epoch: [11][300/411] Data 0.000 (0.005) Elapsed 3m 53s (remain 1m 25s) Loss: 0.3744(0.2657) Grad: 6.1964  
Epoch: [11][400/411] Data 0.000 (0.004) Elapsed 5m 10s (remain 0m 7s) Loss: 0.2310(0.2714) Grad: 4.9676  
Epoch: [11][410/411] Data 0.000 (0.004) Elapsed 5m 18s (remain 0m 0s) Loss: 0.1896(0.2716) Grad: 6.3736  
EVAL: [0/103] Data 1.473 (1.473) Elapsed 0m 1s (remain 2m 51s) Loss: 0.0175(0.0175) 
EVAL: [100/103] Data 0.000 (0.061) Elapsed 0m 27s (remain 0m 0s) Loss: 0.0095(0.1195) 


Epoch 11 - avg_train_loss: 0.2716  avg_val_loss: 0.1196  time: 347s
Epoch 11 - Accuracy: 0.9629179331306991  F1-Score: 0.9630476699635167
Epoch 11 - Save Best Score: 0.9629 Model


EVAL: [102/103] Data 0.000 (0.060) Elapsed 0m 28s (remain 0m 0s) Loss: 0.1892(0.1196) 
Epoch: [12][0/411] Data 1.331 (1.331) Elapsed 0m 2s (remain 14m 43s) Loss: 0.0584(0.0584) Grad: 2.0533  
Epoch: [12][100/411] Data 0.000 (0.014) Elapsed 1m 19s (remain 4m 3s) Loss: 0.1251(0.2478) Grad: 4.1468  
Epoch: [12][200/411] Data 0.000 (0.007) Elapsed 2m 36s (remain 2m 43s) Loss: 0.4198(0.2537) Grad: 8.3050  
Epoch: [12][300/411] Data 0.000 (0.005) Elapsed 3m 53s (remain 1m 25s) Loss: 0.1504(0.2598) Grad: 6.4174  
Epoch: [12][400/411] Data 0.000 (0.004) Elapsed 5m 10s (remain 0m 7s) Loss: 0.1558(0.2599) Grad: 5.8439  
Epoch: [12][410/411] Data 0.000 (0.004) Elapsed 5m 18s (remain 0m 0s) Loss: 0.5197(0.2616) Grad: 7.6452  
EVAL: [0/103] Data 1.343 (1.343) Elapsed 0m 1s (remain 2m 38s) Loss: 0.0385(0.0385) 
EVAL: [100/103] Data 0.000 (0.053) Elapsed 0m 27s (remain 0m 0s) Loss: 0.0083(0.1350) 


Epoch 12 - avg_train_loss: 0.2616  avg_val_loss: 0.1348  time: 346s
Epoch 12 - Accuracy: 0.9544072948328267  F1-Score: 0.9545805074884925


EVAL: [102/103] Data 0.000 (0.052) Elapsed 0m 27s (remain 0m 0s) Loss: 0.0376(0.1348) 
Epoch: [13][0/411] Data 1.249 (1.249) Elapsed 0m 2s (remain 14m 12s) Loss: 0.2155(0.2155) Grad: 5.2851  
Epoch: [13][100/411] Data 0.000 (0.013) Elapsed 1m 19s (remain 4m 3s) Loss: 0.1150(0.2138) Grad: 4.2748  
Epoch: [13][200/411] Data 0.000 (0.007) Elapsed 2m 36s (remain 2m 43s) Loss: 0.1592(0.2393) Grad: 3.4579  
Epoch: [13][300/411] Data 0.000 (0.004) Elapsed 3m 53s (remain 1m 25s) Loss: 0.1390(0.2441) Grad: 6.6881  
Epoch: [13][400/411] Data 0.000 (0.003) Elapsed 5m 10s (remain 0m 7s) Loss: 0.0997(0.2390) Grad: 4.0757  
Epoch: [13][410/411] Data 0.000 (0.003) Elapsed 5m 17s (remain 0m 0s) Loss: 0.2410(0.2442) Grad: 5.9114  
EVAL: [0/103] Data 1.768 (1.768) Elapsed 0m 1s (remain 3m 21s) Loss: 0.0275(0.0275) 
EVAL: [100/103] Data 0.000 (0.060) Elapsed 0m 27s (remain 0m 0s) Loss: 0.1078(0.1677) 


Epoch 13 - avg_train_loss: 0.2442  avg_val_loss: 0.1661  time: 346s
Epoch 13 - Accuracy: 0.9477203647416413  F1-Score: 0.947818193181437


EVAL: [102/103] Data 0.000 (0.059) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0046(0.1661) 
Epoch: [14][0/411] Data 2.139 (2.139) Elapsed 0m 2s (remain 20m 19s) Loss: 0.3076(0.3076) Grad: 4.8815  
Epoch: [14][100/411] Data 0.000 (0.021) Elapsed 1m 19s (remain 4m 5s) Loss: 0.5592(0.2526) Grad: 9.5596  
Epoch: [14][200/411] Data 0.000 (0.011) Elapsed 2m 36s (remain 2m 43s) Loss: 0.2532(0.2573) Grad: 5.7763  
Epoch: [14][300/411] Data 0.000 (0.007) Elapsed 3m 53s (remain 1m 25s) Loss: 0.0983(0.2469) Grad: 4.2592  
Epoch: [14][400/411] Data 0.000 (0.006) Elapsed 5m 10s (remain 0m 7s) Loss: 0.2859(0.2470) Grad: 5.7375  
Epoch: [14][410/411] Data 0.000 (0.005) Elapsed 5m 18s (remain 0m 0s) Loss: 0.0932(0.2473) Grad: 2.4940  
EVAL: [0/103] Data 1.466 (1.466) Elapsed 0m 1s (remain 2m 51s) Loss: 0.0096(0.0096) 
EVAL: [100/103] Data 0.000 (0.050) Elapsed 0m 26s (remain 0m 0s) Loss: 0.0343(0.1428) 


Epoch 14 - avg_train_loss: 0.2473  avg_val_loss: 0.1405  time: 346s
Epoch 14 - Accuracy: 0.9562310030395137  F1-Score: 0.956433542784227


EVAL: [102/103] Data 0.000 (0.049) Elapsed 0m 27s (remain 0m 0s) Loss: 0.0220(0.1405) 
Epoch: [15][0/411] Data 1.694 (1.694) Elapsed 0m 2s (remain 17m 4s) Loss: 0.1019(0.1019) Grad: 2.4770  
Epoch: [15][100/411] Data 0.001 (0.017) Elapsed 1m 19s (remain 4m 4s) Loss: 0.2132(0.2071) Grad: 7.2361  
Epoch: [15][200/411] Data 0.000 (0.009) Elapsed 2m 36s (remain 2m 43s) Loss: 0.1533(0.2218) Grad: 4.6519  
Epoch: [15][300/411] Data 0.001 (0.006) Elapsed 3m 53s (remain 1m 25s) Loss: 0.4092(0.2287) Grad: 7.9639  
Epoch: [15][400/411] Data 0.000 (0.005) Elapsed 5m 10s (remain 0m 7s) Loss: 0.2066(0.2313) Grad: 5.3352  
Epoch: [15][410/411] Data 0.000 (0.004) Elapsed 5m 18s (remain 0m 0s) Loss: 0.1138(0.2324) Grad: 4.0168  
EVAL: [0/103] Data 1.673 (1.673) Elapsed 0m 1s (remain 3m 11s) Loss: 0.0549(0.0549) 
EVAL: [100/103] Data 0.000 (0.058) Elapsed 0m 27s (remain 0m 0s) Loss: 0.0015(0.1482) 


Epoch 15 - avg_train_loss: 0.2324  avg_val_loss: 0.1465  time: 347s
Epoch 15 - Accuracy: 0.9580547112462006  F1-Score: 0.957902883431073


EVAL: [102/103] Data 0.000 (0.057) Elapsed 0m 27s (remain 0m 0s) Loss: 0.0921(0.1465) 


Score: 0.96292


Epoch: [1][0/411] Data 1.514 (1.514) Elapsed 0m 2s (remain 16m 30s) Loss: 1.9804(1.9804) Grad: 8.5740  
Epoch: [1][100/411] Data 0.000 (0.019) Elapsed 1m 19s (remain 4m 4s) Loss: 0.8255(1.2675) Grad: 8.8545  
Epoch: [1][200/411] Data 0.000 (0.009) Elapsed 2m 36s (remain 2m 43s) Loss: 0.6455(1.0270) Grad: 11.4010  
Epoch: [1][300/411] Data 0.000 (0.006) Elapsed 3m 53s (remain 1m 25s) Loss: 0.7524(0.9084) Grad: 9.3486  
Epoch: [1][400/411] Data 0.000 (0.005) Elapsed 5m 10s (remain 0m 7s) Loss: 0.6559(0.8501) Grad: 11.1791  
Epoch: [1][410/411] Data 0.000 (0.005) Elapsed 5m 18s (remain 0m 0s) Loss: 0.5890(0.8444) Grad: 9.0910  
EVAL: [0/103] Data 1.098 (1.098) Elapsed 0m 1s (remain 2m 14s) Loss: 0.2441(0.2441) 
EVAL: [100/103] Data 0.000 (0.071) Elapsed 0m 29s (remain 0m 0s) Loss: 0.1486(0.2589) 


Epoch 1 - avg_train_loss: 0.8444  avg_val_loss: 0.2568  time: 348s
Epoch 1 - Accuracy: 0.9161094224924012  F1-Score: 0.9158273478790747
Epoch 1 - Save Best Score: 0.9161 Model


EVAL: [102/103] Data 0.000 (0.069) Elapsed 0m 29s (remain 0m 0s) Loss: 0.2135(0.2568) 
Epoch: [2][0/411] Data 1.550 (1.550) Elapsed 0m 2s (remain 16m 28s) Loss: 0.5296(0.5296) Grad: 8.9391  
Epoch: [2][100/411] Data 0.000 (0.016) Elapsed 1m 19s (remain 4m 3s) Loss: 0.5499(0.5672) Grad: 12.2235  
Epoch: [2][200/411] Data 0.000 (0.008) Elapsed 2m 36s (remain 2m 43s) Loss: 1.0595(0.5671) Grad: 14.8716  
Epoch: [2][300/411] Data 0.000 (0.005) Elapsed 3m 53s (remain 1m 25s) Loss: 1.5026(0.5373) Grad: 19.7302  
Epoch: [2][400/411] Data 0.000 (0.004) Elapsed 5m 10s (remain 0m 7s) Loss: 0.9058(0.5335) Grad: 11.3662  
Epoch: [2][410/411] Data 0.000 (0.004) Elapsed 5m 17s (remain 0m 0s) Loss: 0.6964(0.5328) Grad: 20.0575  
EVAL: [0/103] Data 1.004 (1.004) Elapsed 0m 1s (remain 2m 4s) Loss: 0.0614(0.0614) 
EVAL: [100/103] Data 0.000 (0.067) Elapsed 0m 28s (remain 0m 0s) Loss: 0.3240(0.2184) 


Epoch 2 - avg_train_loss: 0.5328  avg_val_loss: 0.2175  time: 347s
Epoch 2 - Accuracy: 0.9264437689969605  F1-Score: 0.926521040216564
Epoch 2 - Save Best Score: 0.9264 Model


EVAL: [102/103] Data 0.000 (0.066) Elapsed 0m 29s (remain 0m 0s) Loss: 0.3459(0.2175) 
Epoch: [3][0/411] Data 1.847 (1.847) Elapsed 0m 2s (remain 18m 18s) Loss: 0.4695(0.4695) Grad: 8.2941  
Epoch: [3][100/411] Data 0.000 (0.019) Elapsed 1m 19s (remain 4m 4s) Loss: 0.6882(0.4714) Grad: 11.7649  
Epoch: [3][200/411] Data 0.000 (0.009) Elapsed 2m 36s (remain 2m 43s) Loss: 0.2620(0.4385) Grad: 6.0951  
Epoch: [3][300/411] Data 0.001 (0.006) Elapsed 3m 53s (remain 1m 25s) Loss: 0.6789(0.4608) Grad: 11.1848  
Epoch: [3][400/411] Data 0.001 (0.005) Elapsed 5m 10s (remain 0m 7s) Loss: 0.5821(0.4603) Grad: 13.6675  
Epoch: [3][410/411] Data 0.000 (0.005) Elapsed 5m 18s (remain 0m 0s) Loss: 0.5306(0.4612) Grad: 9.7052  
EVAL: [0/103] Data 1.129 (1.129) Elapsed 0m 1s (remain 2m 17s) Loss: 0.0387(0.0387) 
EVAL: [100/103] Data 0.000 (0.071) Elapsed 0m 29s (remain 0m 0s) Loss: 0.0904(0.1821) 


Epoch 3 - avg_train_loss: 0.4612  avg_val_loss: 0.1795  time: 348s
Epoch 3 - Accuracy: 0.9477203647416413  F1-Score: 0.9477681982620598
Epoch 3 - Save Best Score: 0.9477 Model


EVAL: [102/103] Data 0.000 (0.070) Elapsed 0m 29s (remain 0m 0s) Loss: 0.0782(0.1795) 
Epoch: [4][0/411] Data 1.312 (1.312) Elapsed 0m 2s (remain 14m 34s) Loss: 0.1741(0.1741) Grad: 7.4759  
Epoch: [4][100/411] Data 0.000 (0.013) Elapsed 1m 19s (remain 4m 3s) Loss: 0.2501(0.3872) Grad: 7.6876  
Epoch: [4][200/411] Data 0.000 (0.007) Elapsed 2m 36s (remain 2m 43s) Loss: 0.3733(0.3821) Grad: 7.8746  
Epoch: [4][300/411] Data 0.000 (0.005) Elapsed 3m 53s (remain 1m 25s) Loss: 0.2821(0.3877) Grad: 5.6562  
Epoch: [4][400/411] Data 0.000 (0.004) Elapsed 5m 10s (remain 0m 7s) Loss: 0.7079(0.3982) Grad: 10.2858  
Epoch: [4][410/411] Data 0.000 (0.004) Elapsed 5m 17s (remain 0m 0s) Loss: 0.3229(0.3992) Grad: 14.2172  
EVAL: [0/103] Data 1.093 (1.093) Elapsed 0m 1s (remain 2m 12s) Loss: 0.1993(0.1993) 
EVAL: [100/103] Data 0.000 (0.067) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0738(0.3544) 


Epoch 4 - avg_train_loss: 0.3992  avg_val_loss: 0.3495  time: 347s
Epoch 4 - Accuracy: 0.921580547112462  F1-Score: 0.9214352929330374


EVAL: [102/103] Data 0.000 (0.066) Elapsed 0m 28s (remain 0m 0s) Loss: 0.1485(0.3495) 
Epoch: [5][0/411] Data 1.407 (1.407) Elapsed 0m 2s (remain 15m 11s) Loss: 0.4275(0.4275) Grad: 7.5272  
Epoch: [5][100/411] Data 0.000 (0.014) Elapsed 1m 19s (remain 4m 3s) Loss: 0.3145(0.3574) Grad: 7.0478  
Epoch: [5][200/411] Data 0.000 (0.007) Elapsed 2m 36s (remain 2m 43s) Loss: 0.3085(0.3502) Grad: 12.4444  
Epoch: [5][300/411] Data 0.000 (0.005) Elapsed 3m 53s (remain 1m 25s) Loss: 0.6258(0.3682) Grad: 8.4701  
Epoch: [5][400/411] Data 0.000 (0.004) Elapsed 5m 10s (remain 0m 7s) Loss: 0.1273(0.3794) Grad: 4.3958  
Epoch: [5][410/411] Data 0.000 (0.004) Elapsed 5m 17s (remain 0m 0s) Loss: 0.3076(0.3791) Grad: 6.5847  
EVAL: [0/103] Data 1.175 (1.175) Elapsed 0m 1s (remain 2m 26s) Loss: 0.0940(0.0940) 
EVAL: [100/103] Data 0.000 (0.071) Elapsed 0m 29s (remain 0m 0s) Loss: 0.0841(0.1713) 


Epoch 5 - avg_train_loss: 0.3791  avg_val_loss: 0.1712  time: 348s
Epoch 5 - Accuracy: 0.941033434650456  F1-Score: 0.9408674159301189


EVAL: [102/103] Data 0.000 (0.069) Elapsed 0m 29s (remain 0m 0s) Loss: 0.3182(0.1712) 
Epoch: [6][0/411] Data 1.882 (1.882) Elapsed 0m 2s (remain 18m 14s) Loss: 0.4229(0.4229) Grad: 8.1965  
Epoch: [6][100/411] Data 0.000 (0.019) Elapsed 1m 19s (remain 4m 4s) Loss: 0.7679(0.3541) Grad: 11.7889  
Epoch: [6][200/411] Data 0.001 (0.010) Elapsed 2m 36s (remain 2m 43s) Loss: 0.6173(0.3621) Grad: 8.7446  
Epoch: [6][300/411] Data 0.000 (0.006) Elapsed 3m 53s (remain 1m 25s) Loss: 0.3495(0.3491) Grad: 5.9396  
Epoch: [6][400/411] Data 0.000 (0.005) Elapsed 5m 10s (remain 0m 7s) Loss: 0.6079(0.3473) Grad: 9.2479  
Epoch: [6][410/411] Data 0.000 (0.005) Elapsed 5m 18s (remain 0m 0s) Loss: 0.4203(0.3461) Grad: 8.5486  
EVAL: [0/103] Data 1.093 (1.093) Elapsed 0m 1s (remain 2m 19s) Loss: 0.0502(0.0502) 
EVAL: [100/103] Data 0.000 (0.070) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0350(0.1899) 


Epoch 6 - avg_train_loss: 0.3461  avg_val_loss: 0.1875  time: 348s
Epoch 6 - Accuracy: 0.9355623100303951  F1-Score: 0.935676180025031


EVAL: [102/103] Data 0.000 (0.069) Elapsed 0m 29s (remain 0m 0s) Loss: 0.0939(0.1875) 
Epoch: [7][0/411] Data 1.621 (1.621) Elapsed 0m 2s (remain 16m 54s) Loss: 0.5556(0.5556) Grad: 10.1060  
Epoch: [7][100/411] Data 0.000 (0.016) Elapsed 1m 19s (remain 4m 4s) Loss: 0.2982(0.3092) Grad: 6.9237  
Epoch: [7][200/411] Data 0.000 (0.008) Elapsed 2m 36s (remain 2m 43s) Loss: 0.2773(0.3116) Grad: 7.0936  
Epoch: [7][300/411] Data 0.000 (0.006) Elapsed 3m 53s (remain 1m 25s) Loss: 0.6729(0.3083) Grad: 15.2929  
Epoch: [7][400/411] Data 0.000 (0.004) Elapsed 5m 10s (remain 0m 7s) Loss: 0.5718(0.3157) Grad: 8.6064  
Epoch: [7][410/411] Data 0.000 (0.004) Elapsed 5m 18s (remain 0m 0s) Loss: 0.2994(0.3168) Grad: 4.4411  
EVAL: [0/103] Data 1.113 (1.113) Elapsed 0m 1s (remain 2m 14s) Loss: 0.0639(0.0639) 
EVAL: [100/103] Data 0.000 (0.068) Elapsed 0m 28s (remain 0m 0s) Loss: 0.1395(0.1598) 


Epoch 7 - avg_train_loss: 0.3168  avg_val_loss: 0.1595  time: 347s
Epoch 7 - Accuracy: 0.9458966565349544  F1-Score: 0.9459042916522826


EVAL: [102/103] Data 0.000 (0.066) Elapsed 0m 28s (remain 0m 0s) Loss: 0.3140(0.1595) 
Epoch: [8][0/411] Data 1.229 (1.229) Elapsed 0m 2s (remain 14m 20s) Loss: 0.5440(0.5440) Grad: 6.9803  
Epoch: [8][100/411] Data 0.000 (0.012) Elapsed 1m 19s (remain 4m 2s) Loss: 0.0785(0.3152) Grad: 2.9786  
Epoch: [8][200/411] Data 0.000 (0.006) Elapsed 2m 36s (remain 2m 43s) Loss: 0.2364(0.2943) Grad: 5.7559  
Epoch: [8][300/411] Data 0.000 (0.004) Elapsed 3m 53s (remain 1m 25s) Loss: 0.3174(0.2900) Grad: 7.9095  
Epoch: [8][400/411] Data 0.000 (0.003) Elapsed 5m 10s (remain 0m 7s) Loss: 0.2957(0.3035) Grad: 7.7700  
Epoch: [8][410/411] Data 0.000 (0.003) Elapsed 5m 17s (remain 0m 0s) Loss: 0.1564(0.3044) Grad: 5.2777  
EVAL: [0/103] Data 1.332 (1.332) Elapsed 0m 1s (remain 2m 37s) Loss: 0.0095(0.0095) 
EVAL: [100/103] Data 0.000 (0.068) Elapsed 0m 28s (remain 0m 0s) Loss: 0.1685(0.1629) 


Epoch 8 - avg_train_loss: 0.3044  avg_val_loss: 0.1615  time: 347s
Epoch 8 - Accuracy: 0.9440729483282675  F1-Score: 0.9442432473578328


EVAL: [102/103] Data 0.000 (0.067) Elapsed 0m 29s (remain 0m 0s) Loss: 0.1796(0.1615) 
Epoch: [9][0/411] Data 1.934 (1.934) Elapsed 0m 2s (remain 18m 30s) Loss: 0.5631(0.5631) Grad: 11.9583  
Epoch: [9][100/411] Data 0.000 (0.019) Elapsed 1m 19s (remain 4m 4s) Loss: 0.3137(0.3119) Grad: 6.2918  
Epoch: [9][200/411] Data 0.000 (0.010) Elapsed 2m 36s (remain 2m 43s) Loss: 0.5031(0.3035) Grad: 7.8882  
Epoch: [9][300/411] Data 0.000 (0.007) Elapsed 3m 53s (remain 1m 25s) Loss: 0.1777(0.3153) Grad: 5.0153  
Epoch: [9][400/411] Data 0.000 (0.005) Elapsed 5m 10s (remain 0m 7s) Loss: 0.3636(0.3062) Grad: 8.5793  
Epoch: [9][410/411] Data 0.000 (0.005) Elapsed 5m 18s (remain 0m 0s) Loss: 0.4223(0.3061) Grad: 7.6580  
EVAL: [0/103] Data 0.890 (0.890) Elapsed 0m 1s (remain 1m 52s) Loss: 0.0218(0.0218) 
EVAL: [100/103] Data 0.000 (0.068) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0495(0.1220) 
EVAL: [102/103] Data 0.000 (0.067) Elapsed 0m 29s (remain 0m 0s) Loss: 0.3346(0.1225) 


Epoch 9 - avg_train_loss: 0.3061  avg_val_loss: 0.1225  time: 348s
Epoch 9 - Accuracy: 0.9604863221884499  F1-Score: 0.9604349954285736
Epoch 9 - Save Best Score: 0.9605 Model


Epoch: [10][0/411] Data 1.422 (1.422) Elapsed 0m 2s (remain 15m 12s) Loss: 0.1937(0.1937) Grad: 4.0849  
Epoch: [10][100/411] Data 0.000 (0.014) Elapsed 1m 19s (remain 4m 2s) Loss: 0.4980(0.2935) Grad: 7.7513  
Epoch: [10][200/411] Data 0.000 (0.007) Elapsed 2m 36s (remain 2m 43s) Loss: 0.4673(0.2846) Grad: 7.3120  
Epoch: [10][300/411] Data 0.000 (0.005) Elapsed 3m 53s (remain 1m 25s) Loss: 0.2005(0.2772) Grad: 4.6942  
Epoch: [10][400/411] Data 0.000 (0.004) Elapsed 5m 10s (remain 0m 7s) Loss: 0.1834(0.2810) Grad: 3.5952  
Epoch: [10][410/411] Data 0.000 (0.004) Elapsed 5m 17s (remain 0m 0s) Loss: 0.1426(0.2808) Grad: 3.2915  
EVAL: [0/103] Data 1.127 (1.127) Elapsed 0m 1s (remain 2m 16s) Loss: 0.0534(0.0534) 
EVAL: [100/103] Data 0.000 (0.071) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0231(0.1444) 


Epoch 10 - avg_train_loss: 0.2808  avg_val_loss: 0.1474  time: 347s
Epoch 10 - Accuracy: 0.9537993920972644  F1-Score: 0.9538550567598966


EVAL: [102/103] Data 0.000 (0.070) Elapsed 0m 29s (remain 0m 0s) Loss: 0.6872(0.1474) 
Epoch: [11][0/411] Data 1.304 (1.304) Elapsed 0m 2s (remain 14m 42s) Loss: 0.0455(0.0455) Grad: 1.4524  
Epoch: [11][100/411] Data 0.000 (0.013) Elapsed 1m 19s (remain 4m 2s) Loss: 0.1465(0.2525) Grad: 4.0006  
Epoch: [11][200/411] Data 0.000 (0.007) Elapsed 2m 36s (remain 2m 43s) Loss: 0.4907(0.2684) Grad: 6.5423  
Epoch: [11][300/411] Data 0.000 (0.005) Elapsed 3m 53s (remain 1m 25s) Loss: 0.3817(0.2686) Grad: 6.4558  
Epoch: [11][400/411] Data 0.000 (0.003) Elapsed 5m 10s (remain 0m 7s) Loss: 0.3378(0.2736) Grad: 8.9261  
Epoch: [11][410/411] Data 0.000 (0.003) Elapsed 5m 17s (remain 0m 0s) Loss: 0.3654(0.2737) Grad: 10.0294  
EVAL: [0/103] Data 1.025 (1.025) Elapsed 0m 1s (remain 2m 6s) Loss: 0.0089(0.0089) 
EVAL: [100/103] Data 0.000 (0.071) Elapsed 0m 29s (remain 0m 0s) Loss: 0.0255(0.1745) 


Epoch 11 - avg_train_loss: 0.2737  avg_val_loss: 0.1738  time: 347s
Epoch 11 - Accuracy: 0.9452887537993921  F1-Score: 0.9461810840410658


EVAL: [102/103] Data 0.000 (0.070) Elapsed 0m 29s (remain 0m 0s) Loss: 0.2665(0.1738) 
Epoch: [12][0/411] Data 1.034 (1.034) Elapsed 0m 1s (remain 12m 50s) Loss: 0.1650(0.1650) Grad: 5.1701  
Epoch: [12][100/411] Data 0.001 (0.011) Elapsed 1m 18s (remain 4m 2s) Loss: 0.1029(0.2525) Grad: 2.7946  
Epoch: [12][200/411] Data 0.000 (0.005) Elapsed 2m 35s (remain 2m 42s) Loss: 0.3481(0.2456) Grad: 7.1683  
Epoch: [12][300/411] Data 0.000 (0.004) Elapsed 3m 52s (remain 1m 25s) Loss: 0.3893(0.2592) Grad: 7.9773  
Epoch: [12][400/411] Data 0.001 (0.003) Elapsed 5m 9s (remain 0m 7s) Loss: 0.0890(0.2583) Grad: 2.7906  
Epoch: [12][410/411] Data 0.000 (0.003) Elapsed 5m 17s (remain 0m 0s) Loss: 0.4968(0.2588) Grad: 8.5530  
EVAL: [0/103] Data 1.044 (1.044) Elapsed 0m 1s (remain 2m 7s) Loss: 0.0577(0.0577) 
EVAL: [100/103] Data 0.000 (0.064) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0148(0.1199) 


Epoch 12 - avg_train_loss: 0.2588  avg_val_loss: 0.1273  time: 346s
Epoch 12 - Accuracy: 0.9610942249240122  F1-Score: 0.9613574472306426
Epoch 12 - Save Best Score: 0.9611 Model


EVAL: [102/103] Data 0.000 (0.063) Elapsed 0m 28s (remain 0m 0s) Loss: 1.2058(0.1273) 
Epoch: [13][0/411] Data 1.479 (1.479) Elapsed 0m 2s (remain 15m 35s) Loss: 0.0626(0.0626) Grad: 2.4246  
Epoch: [13][100/411] Data 0.000 (0.015) Elapsed 1m 19s (remain 4m 3s) Loss: 0.0353(0.2196) Grad: 1.2056  
Epoch: [13][200/411] Data 0.000 (0.008) Elapsed 2m 36s (remain 2m 43s) Loss: 0.3871(0.2387) Grad: 4.5126  
Epoch: [13][300/411] Data 0.000 (0.005) Elapsed 3m 53s (remain 1m 25s) Loss: 0.2784(0.2524) Grad: 4.6157  
Epoch: [13][400/411] Data 0.000 (0.004) Elapsed 5m 10s (remain 0m 7s) Loss: 0.2336(0.2520) Grad: 5.1049  
Epoch: [13][410/411] Data 0.000 (0.004) Elapsed 5m 17s (remain 0m 0s) Loss: 0.1937(0.2522) Grad: 5.3616  
EVAL: [0/103] Data 0.948 (0.948) Elapsed 0m 1s (remain 1m 58s) Loss: 0.0171(0.0171) 
EVAL: [100/103] Data 0.000 (0.072) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0144(0.1351) 


Epoch 13 - avg_train_loss: 0.2522  avg_val_loss: 0.1350  time: 347s
Epoch 13 - Accuracy: 0.9586626139817629  F1-Score: 0.9587794284112514


EVAL: [102/103] Data 0.000 (0.071) Elapsed 0m 29s (remain 0m 0s) Loss: 0.2859(0.1350) 
Epoch: [14][0/411] Data 1.307 (1.307) Elapsed 0m 2s (remain 14m 48s) Loss: 0.0251(0.0251) Grad: 0.9756  
Epoch: [14][100/411] Data 0.000 (0.013) Elapsed 1m 19s (remain 4m 3s) Loss: 0.4052(0.2256) Grad: 9.2203  
Epoch: [14][200/411] Data 0.000 (0.007) Elapsed 2m 36s (remain 2m 43s) Loss: 0.2741(0.2277) Grad: 3.4405  
Epoch: [14][300/411] Data 0.000 (0.005) Elapsed 3m 53s (remain 1m 25s) Loss: 0.0466(0.2216) Grad: 1.8990  
Epoch: [14][400/411] Data 0.000 (0.003) Elapsed 5m 10s (remain 0m 7s) Loss: 0.0961(0.2223) Grad: 2.1443  
Epoch: [14][410/411] Data 0.000 (0.003) Elapsed 5m 17s (remain 0m 0s) Loss: 0.1732(0.2255) Grad: 4.3042  
EVAL: [0/103] Data 1.124 (1.124) Elapsed 0m 1s (remain 2m 16s) Loss: 0.0017(0.0017) 
EVAL: [100/103] Data 0.000 (0.066) Elapsed 0m 28s (remain 0m 0s) Loss: 0.0034(0.1168) 


Epoch 14 - avg_train_loss: 0.2255  avg_val_loss: 0.1173  time: 347s
Epoch 14 - Accuracy: 0.9617021276595744  F1-Score: 0.9617280675557517
Epoch 14 - Save Best Score: 0.9617 Model


EVAL: [102/103] Data 0.000 (0.065) Elapsed 0m 28s (remain 0m 0s) Loss: 0.3200(0.1173) 
Epoch: [15][0/411] Data 2.037 (2.037) Elapsed 0m 2s (remain 19m 33s) Loss: 0.2127(0.2127) Grad: 4.9403  
Epoch: [15][100/411] Data 0.001 (0.020) Elapsed 1m 19s (remain 4m 5s) Loss: 0.2682(0.2464) Grad: 6.0461  
Epoch: [15][200/411] Data 0.001 (0.010) Elapsed 2m 36s (remain 2m 43s) Loss: 0.2061(0.2318) Grad: 3.4569  
Epoch: [15][300/411] Data 0.000 (0.007) Elapsed 3m 53s (remain 1m 25s) Loss: 0.1517(0.2330) Grad: 4.5352  
Epoch: [15][400/411] Data 0.000 (0.005) Elapsed 5m 10s (remain 0m 7s) Loss: 0.1323(0.2361) Grad: 4.4070  
Epoch: [15][410/411] Data 0.000 (0.005) Elapsed 5m 18s (remain 0m 0s) Loss: 0.1074(0.2362) Grad: 4.1803  
EVAL: [0/103] Data 0.999 (0.999) Elapsed 0m 1s (remain 2m 3s) Loss: 0.0211(0.0211) 
EVAL: [100/103] Data 0.000 (0.072) Elapsed 0m 29s (remain 0m 0s) Loss: 0.0355(0.1412) 


Epoch 15 - avg_train_loss: 0.2362  avg_val_loss: 0.1421  time: 348s
Epoch 15 - Accuracy: 0.9610942249240122  F1-Score: 0.9611643789203331


EVAL: [102/103] Data 0.000 (0.071) Elapsed 0m 29s (remain 0m 0s) Loss: 0.4239(0.1421) 


Score: 0.96170
Score: 0.96390
