In [1]:
import os
import time
import math
import random
import numpy as np
import pandas as pd
import torch
from torch import nn, einsum
import torch.nn.functional as F
import torch_optimizer as optim
from torch.utils.data import TensorDataset,DataLoader
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from tqdm.auto import tqdm
from einops import rearrange
from einops.layers.torch import Rearrange

In [2]:
DEBUG = False

In [3]:
MODEL_NAME = 'transformer_v19'
base_dir = "./"
if not os.path.exists(f'models/{MODEL_NAME}'):
    os.makedirs(f'models/{MODEL_NAME}')

In [32]:
train_df = pd.read_csv(base_dir + 'train.csv')
test_df = pd.read_csv(base_dir + 'test.csv')

In [33]:
le = LabelEncoder()
train_df['pressure'] = le.fit_transform(train_df['pressure'])

In [34]:
train_df['RC'] = (train_df['R'].astype(str) + '_' + train_df['C'].astype(str))
train_df['RC'] = train_df['RC'].map({'20_50':0, '20_20':1, '50_20':2, '50_50':3, '5_50':4, '5_20':5, '50_10':6, '20_10':7, '5_10':8})
test_df['RC'] = (test_df['R'].astype(str) + '_' + test_df['C'].astype(str))
test_df['RC'] = test_df['RC'].map({'20_50':0, '20_20':1, '50_20':2, '50_50':3, '5_50':4, '5_20':5, '50_10':6, '20_10':7, '5_10':8})
# train_df['u_in_cat'] = train_df['u_in'].round().astype(int)
# test_df['u_in_cat'] = test_df['u_in'].round().astype(int)
train_df['u_in_0'] = (train_df['u_in'].round() == 0).astype(int)
test_df['u_in_0'] = (test_df['u_in'].round() == 0).astype(int)
train_df['u_in_5'] = (train_df['u_in'].round() == 5).astype(int)
test_df['u_in_5'] = (test_df['u_in'].round() == 5).astype(int)

In [35]:
# features = train_df.columns.drop(['id','breath_id','pressure'])

In [36]:
from sklearn.preprocessing import RobustScaler, StandardScaler
RS = StandardScaler()
all_df = pd.concat([train_df,test_df])
train_df['u_in'] = np.log1p(train_df['u_in'] - all_df['u_in'].min())
test_df['u_in'] = np.log1p(test_df['u_in'] - all_df['u_in'].min())
all_df['u_in'] = np.log1p(all_df['u_in'] - all_df['u_in'].min())

RS.fit(all_df[['u_in','time_step']])
train_df[['u_in','time_step']] = RS.transform(train_df[['u_in','time_step']])
test_df[['u_in','time_step']] = RS.transform(test_df[['u_in','time_step']])

# from sklearn.preprocessing import RobustScaler, StandardScaler
# RS = StandardScaler()
# all_df = pd.concat([train_df,test_df])
# RS.fit(all_df[['u_in','time_step']])
# train_df[['u_in','time_step']] = RS.transform(train_df[['u_in','time_step']])
# test_df[['u_in','time_step']] = RS.transform(test_df[['u_in','time_step']])

In [40]:
X_tr = train_df[['RC','u_in','u_out','u_in_0','u_in_5','time_step']].values.reshape(-1,80,6)
X_test = test_df[['RC','u_in','u_out','u_in_0','u_in_5','time_step']].values.reshape(-1,80,6)
# X_tr = train_df[features].values.reshape(-1,80,18)
# X_test = test_df[features].values.reshape(-1,80,18)

In [41]:
y_tr = train_df['pressure'].values.reshape(-1,80)

In [11]:
# pseudo_label = np.load("subs/sub_logits_transformer_v11.npy")
# from scipy.special import softmax
# pseudo_label = softmax(pseudo_label,axis=2)
# mask = test_df['u_out'].values.reshape(-1,80) == 0
# tmp1 = pseudo_label.max(2)
# res = []
# for i in tqdm(range(len(tmp1))):
#     res.append(tmp1[i][mask[i]].mean())
# res = np.array(res)
# mask = res > 0.6
# np.mean(mask)
# pseudo_label = pseudo_label.argmax(-1)
# X_tr = np.concatenate([X_tr,X_test[mask]],axis=0)
# y_tr = np.concatenate([y_tr,pseudo_label[mask]],axis=0)
# import gc
# del pseudo_label, mask, res, tmp1; gc.collect()

In [12]:
# unique_pressures = train_df["pressure"].unique()
# sorted_pressures = np.sort(unique_pressures)
# total_pressures_len = len(sorted_pressures)
# def find_nearest(prediction):
#     insert_idx = np.searchsorted(sorted_pressures, prediction)
#     if insert_idx == total_pressures_len:
#         # If the predicted value is bigger than the highest pressure in the train dataset,
#         # return the max value.
#         return sorted_pressures[-1]
#     elif insert_idx == 0:
#         # Same control but for the lower bound.
#         return sorted_pressures[0]
#     lower_val = sorted_pressures[insert_idx - 1]
#     upper_val = sorted_pressures[insert_idx]
#     return lower_val if abs(lower_val - prediction) < abs(upper_val - prediction) else upper_val
# pseudo_label = pd.read_csv("pressure_submission.csv")['pressure'].values
# for i in range(len(pseudo_label)):
#     pseudo_label[i] = find_nearest(pseudo_label[i])
# pseudo_label = le.transform(pseudo_label).reshape(-1,80)
# X_tr = np.concatenate([X_tr,X_test],axis=0)
# y_tr = np.concatenate([y_tr,pseudo_label],axis=0)
# import gc
# del pseudo_label; gc.collect()

In [42]:
X_tr.shape,X_test.shape,y_tr.shape

((75450, 80, 6), (50300, 80, 6), (75450, 80))

In [43]:
if DEBUG:
    X_tr = X_tr[:1000]
    y_tr = y_tr[:1000]
    X_test = X_test[:1000]

In [44]:
def train_one_epoch(model, optimizer, scheduler, train_dataloader, epoch, device = torch.device('cpu')):
    model.train()
    MA_loss = 0
    count = 0
    for X,y in train_dataloader:
        X = X.to(device)
        y = y.to(device)
        y = F.one_hot(y,950).float()
        y[:,:,:-1] += 0.1 * y[:,:,1:]
        y[:,:,1:] += 0.1 * y[:,:,:-1]
        y[y==1] = 0.8
        
        optimizer.zero_grad()
        mask1 = X[:,:,2] == 0
        mask2 = X[:,:,2] == 1
        pred = model(X)

        pred = torch.sigmoid(pred[mask1].reshape(-1,950))
        y = y[mask1].reshape(-1,950)

        loss = -torch.sum(y * torch.log(1e-8 + pred) + (1-y) * torch.log(1 - pred + 1e-8),dim=-1).mean()
        loss.backward()
        optimizer.step()
        if epoch < 100:
            scheduler.step()
        
        MA_loss += loss.item() * len(y)
        count += len(y)
    MA_loss /= count
    return MA_loss

def evaluation(model, val_dataloader, device = torch.device('cpu')):
    model.eval()
    criterion = nn.L1Loss()
    MA_loss = 0
    count = 0
    with torch.no_grad():
        for X,y in val_dataloader:
            X = X.to(device)
            y = y.to(device)
            mask = X[:,:,2] == 0
            pred = model(X)
            pred = torch.argmax(pred,dim=-1)
            pred = pred[mask].reshape(-1).cpu().numpy()
            pred = torch.Tensor(le.inverse_transform(pred)).to(device)
            y = y[mask].reshape(-1).cpu().long().numpy()
            y = torch.Tensor(le.inverse_transform(y)).to(device)
            loss = criterion(pred, y)
            # loss = criterion(pred.reshape(-1), y.reshape(-1))
            MA_loss += loss.item() * len(y)
            count += len(y)
        MA_loss /= count
    return MA_loss

def inference(model, test_dataloader, device = torch.device('cpu'), istest = False):
    model.eval()
    prediction = []
    with torch.no_grad():
        if istest:
            for X in test_dataloader:
                X = X[0]
                X = X.to(device)
                pred = model(X).cpu()
                pred = torch.argmax(pred,dim=-1)
                prediction.append(pred)
        else:
            for X, y in test_dataloader:
                X = X.to(device)
                pred = model(X).cpu()
                pred = torch.argmax(pred,dim=-1)
                prediction.append(pred)
    prediction = torch.cat(prediction,dim=0).numpy()
    prediction = le.inverse_transform(prediction.reshape(-1)).reshape(-1,80)
    return prediction

In [45]:
import torch
from torch import nn, einsum
import torch.nn.functional as F

from einops import rearrange
from einops.layers.torch import Rearrange

# helper functions

def exists(val):
    return val is not None

def default(val, d):
    return val if exists(val) else d

def calc_same_padding(kernel_size):
    pad = kernel_size // 2
    return (pad, pad - (kernel_size + 1) % 2)

# helper classes

class Swish(nn.Module):
    def forward(self, x):
        return x * x.sigmoid()

class GLU(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.dim = dim

    def forward(self, x):
        out, gate = x.chunk(2, dim=self.dim)
        return out * gate.sigmoid()

class DepthWiseConv1d(nn.Module):
    def __init__(self, chan_in, chan_out, kernel_size, padding):
        super().__init__()
        self.padding = padding
        self.conv = nn.Conv1d(chan_in, chan_out, kernel_size, groups = chan_in)

    def forward(self, x):
        x = F.pad(x, self.padding)
        return self.conv(x)

# attention, feedforward, and conv module

class Scale(nn.Module):
    def __init__(self, scale, fn):
        super().__init__()
        self.fn = fn
        self.scale = scale

    def forward(self, x, **kwargs):
        return self.fn(x, **kwargs) * self.scale

class PreNorm(nn.Module):
    def __init__(self, dim, fn):
        super().__init__()
        self.fn = fn
        self.norm = nn.LayerNorm(dim)

    def forward(self, x, **kwargs):
        x = self.norm(x)
        return self.fn(x, **kwargs)

class FeedForward(nn.Module):
    def __init__(
        self,
        dim,
        mult = 4,
        dropout = 0.
    ):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(dim, dim * mult),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(dim * mult, dim),
            nn.Dropout(dropout)
        )

    def forward(self, x):
        return self.net(x)

class ConformerConvModule(nn.Module):
    def __init__(
        self,
        dim,
        causal = False,
        expansion_factor = 2,
        kernel_size = 31,
        dropout = 0.):
        super().__init__()

        inner_dim = dim * expansion_factor
        padding = calc_same_padding(kernel_size) if not causal else (kernel_size - 1, 0)

        self.net = nn.Sequential(
            nn.LayerNorm(dim),
            Rearrange('b n c -> b c n'),
            nn.Conv1d(dim, inner_dim * 2, 1),
            GLU(dim=1),
            DepthWiseConv1d(inner_dim, inner_dim, kernel_size = kernel_size, padding = padding),
            nn.BatchNorm1d(inner_dim) if not causal else nn.Identity(),
            Swish(),
            nn.Conv1d(inner_dim, dim, 1),
            Rearrange('b c n -> b n c'),
            nn.Dropout(dropout)
        )

    def forward(self, x):
        return self.net(x)

In [47]:
class CustomAttention(nn.Module):
    def __init__(
        self,
        dim,
        heads = 8,
        dim_head = 64,
        dropout = 0.,
        max_pos_emb = 512,
        causal = False
    ):
        super().__init__()
        inner_dim = dim_head * heads
        self.heads= heads
        self.scale = dim_head ** -0.5
        self.to_q = nn.Linear(dim, inner_dim, bias = False)
        self.to_kv = nn.Linear(dim, inner_dim * 2, bias = False)
        self.to_out = nn.Linear(inner_dim, dim)

        self.max_pos_emb = max_pos_emb
        # self.rel_pos_emb1 = nn.Linear(1, heads, bias=False)
        # self.rel_pos_emb2 = nn.Linear(1, heads, bias=False)
        self.rel_pos_emb = nn.Sequential(nn.Linear(1, dim_head),nn.GELU(),nn.Linear(dim_head, dim_head))
        self.dropout = nn.Dropout(dropout)

        self.causal = causal

    def forward(self, x, position, context = None, mask = None, context_mask = None):
        n, device, h, max_pos_emb, has_context = x.shape[-2], x.device, self.heads, self.max_pos_emb, exists(context)
        context = default(context, x)

        q, k, v = (self.to_q(x), *self.to_kv(context).chunk(2, dim = -1))
        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h = h), (q, k, v))

        dots = einsum('b h i d, b h j d -> b h i j', q, k) * self.scale

        # shaw's relative positional embedding
        dist = rearrange(position, 'b i -> b i () ()') - rearrange(position, 'b j -> b () j ()')
        # rel_pos_emb = self.rel_pos_emb1(F.relu(dist)) + self.rel_pos_emb2(F.relu(-dist))   #(bijh)
        # pos_attn = rel_pos_emb.permute(0,3,1,2)
        rel_pos_emb = self.rel_pos_emb(dist)
        pos_attn = einsum('b h n d, b n r d -> b h n r', q, rel_pos_emb) * self.scale
        dots = dots + pos_attn

        if exists(mask) or exists(context_mask):
            mask = default(mask, lambda: torch.ones(*x.shape[:2], device = device))
            context_mask = default(context_mask, mask) if not has_context else default(context_mask, lambda: torch.ones(*context.shape[:2], device = device))
            mask_value = -torch.finfo(dots.dtype).max
            mask = rearrange(mask, 'b i -> b () i ()') * rearrange(context_mask, 'b j -> b () () j')
            dots.masked_fill_(~mask, mask_value)

        if self.causal:
            mask = torch.tril(torch.ones(dots.shape[-2:],device=dots.device)).T
            mask = rearrange(mask, 'n r -> () () n r')
            dots = dots - mask * 999

        attn = dots.softmax(dim = -1)

        out = einsum('b h i j, b h j d -> b h i d', attn, v)
        out = rearrange(out, 'b h n d -> b n (h d)')
        out = self.to_out(out)
        return self.dropout(out)


class CustomConformerBlock(nn.Module):
    def __init__(
        self,
        *,
        dim,
        dim_head = 64,
        heads = 8,
        ff_mult = 4,
        conv_expansion_factor = 2,
        conv_kernel_size = 31,
        attn_dropout = 0.,
        ff_dropout = 0.,
        conv_dropout = 0.,
        causal = False
    ):
        super().__init__()
        self.ff1 = FeedForward(dim = dim, mult = ff_mult, dropout = ff_dropout)
        self.attn = CustomAttention(dim = dim, dim_head = dim_head, heads = heads, dropout = attn_dropout, causal = causal)
        self.conv = ConformerConvModule(dim = dim, causal = causal, expansion_factor = conv_expansion_factor, kernel_size = conv_kernel_size, dropout = conv_dropout)
        self.ff2 = FeedForward(dim = dim, mult = ff_mult, dropout = ff_dropout)

        self.attn = PreNorm(dim, self.attn)
        self.ff1 = Scale(0.5, PreNorm(dim, self.ff1))
        self.ff2 = Scale(0.5, PreNorm(dim, self.ff2))

    def forward(self, x, pos, mask = None):
        x = self.ff1(x) + x
        x = self.attn(x, position = pos, mask = mask) + x
        x = self.conv(x) + x
        x = self.ff2(x) + x
        return x

In [48]:
class PositionalEncoding(nn.Module):

    def __init__(self, d_model, max_len = 5000):
        super().__init__()
        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(1, max_len, d_model)
        pe[0, :, 0::2] = torch.sin(position * div_term)
        pe[0, :, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self):
        return self.pe

class BrainModel(nn.Module):
    def __init__(self):
        super().__init__()
        DIM = 256
        n_layers = 4
        self.input_layer = nn.Sequential(nn.Linear(5,DIM),nn.Mish(),nn.Linear(DIM,DIM),nn.Mish())
        self.emb_RC = nn.Embedding(9,DIM)
        self.emb_u_in = nn.Embedding(101,DIM)
        self.scale_layer = nn.Linear(2*DIM,DIM)
        self.encoder = nn.ModuleList()
        for i in range(n_layers):
            self.encoder.append(CustomConformerBlock(dim = DIM,
                          dim_head = DIM//8,
                          heads = 8,
                          ff_mult = 4,
                          conv_expansion_factor = 2,
                          conv_kernel_size = 5,
                          attn_dropout = 0.1,
                          ff_dropout = 0.2,
                          conv_dropout = 0.05,
                          causal = False))
        self.fc = nn.Sequential(nn.Linear(DIM,DIM),nn.Mish(),nn.Linear(DIM,950))
        
    def forward(self, X):
        #(B,L,C)
        pos = X[:,:,-1]
        X_dense = self.input_layer(X[:,:,1:])
        X = torch.cat([X_dense, self.emb_RC(X[:,:,0].long())],dim=-1)
        X = self.scale_layer(X)
        for layer in self.encoder:
            X = layer(X,pos)
        y = self.fc(X)
        return y

In [49]:
N_EPOCHS = 300
N_FOLDS = 5
BATCH_SIZE = 128
oof_path = base_dir+f"oofs/oof_{MODEL_NAME}"
device = torch.device('cuda:0')
kf = StratifiedKFold(N_FOLDS,shuffle=True, random_state=42)
oof = np.zeros([75450,80])
y_true = le.inverse_transform(y_tr[:75450].reshape(-1)).reshape(oof.shape)
for fold, (train_index, val_index) in enumerate(kf.split(X_tr[:75450,0,0], X_tr[:75450,0,0])):
    print("fold:",fold)
    model_path = base_dir+f'models/{MODEL_NAME}/model_{fold}.pt'

    model = BrainModel().to(device)
    
    train_index = np.concatenate([train_index,np.arange(75450,len(X_tr))])

    train_dataset = TensorDataset(torch.Tensor(X_tr[train_index]),torch.LongTensor(y_tr[train_index]))
    val_dataset = TensorDataset(torch.Tensor(X_tr[val_index]),torch.LongTensor(y_tr[val_index]))
    train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=2)
    val_dataloader = DataLoader(val_dataset, batch_size=3*BATCH_SIZE, shuffle=False, drop_last=False, num_workers=2)

    if (not os.path.exists(model_path)) or True:
        optimizer = optim.Ranger(model.parameters(), lr=5e-3, weight_decay=0, alpha=0.5, k=5)
        scheduler1 = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max = int(5 * len(train_dataloader)), eta_min=1e-5, last_epoch=-1)
        scheduler2 = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.3, patience=10, threshold=0.0001, min_lr=1e-5, verbose=True)
        
        best_score = 999  
        for epoch in tqdm(range(N_EPOCHS),leave=False):
            Train_loss = train_one_epoch(model, optimizer, scheduler1, train_dataloader, epoch, device)
            val_loss = evaluation(model, val_dataloader, device)
            if epoch >= 100:
                scheduler2.step(val_loss)
            if val_loss < best_score:
                best_score = val_loss
                torch.save(model.state_dict(),model_path)
            print(f"epoch {epoch}")
            print(f"Train_loss: {Train_loss}")
            print(f"val_loss: {val_loss}")
            print(f"best loss: {best_score}")
            print("*********************************")
    model.load_state_dict(torch.load(model_path))
    val_pred = inference(model, val_dataloader, device, False)
    oof[val_index] = val_pred
    mask = X_tr[val_index,:,2]==0
    print(f"fold {fold} score:",mean_absolute_error(y_true[val_index][mask], oof[val_index][mask]))
mask = X_tr[:75450,:,2]==0
print("CV score:",mean_absolute_error(y_true[mask], oof[mask]))
np.save(oof_path,oof)

fold: 0


HBox(children=(FloatProgress(value=0.0, max=300.0), HTML(value='')))

	addcmul_(Number value, Tensor tensor1, Tensor tensor2)
Consider using one of the following signatures instead:
	addcmul_(Tensor tensor1, Tensor tensor2, *, Number value) (Triggered internally at  ..\torch\csrc\utils\python_arg_parser.cpp:1025.)
  exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)


epoch 0
Train_loss: 15.986838961387319
val_loss: 2.5670292145290547
best loss: 2.5670292145290547
*********************************
epoch 1
Train_loss: 6.480173484608512
val_loss: 1.7791233783686908
best loss: 1.7791233783686908
*********************************
epoch 2
Train_loss: 5.960901202312708
val_loss: 1.1844997011580676
best loss: 1.1844997011580676
*********************************
epoch 3
Train_loss: 5.60958658609226
val_loss: 0.7876959423536998
best loss: 0.7876959423536998
*********************************
epoch 4
Train_loss: 5.3796870260288285
val_loss: 0.6117173619078342
best loss: 0.6117173619078342
*********************************
epoch 5
Train_loss: 5.367123644797926
val_loss: 0.72032560790852
best loss: 0.6117173619078342
*********************************
epoch 6
Train_loss: 5.467260171024537
val_loss: 0.770443998267314
best loss: 0.6117173619078342
*********************************
epoch 7
Train_loss: 5.483155807481745
val_loss: 0.8698997534911287
best loss: 0.61171

epoch 62
Train_loss: 3.7358270619719485
val_loss: 0.19745694423032709
best loss: 0.17392319032401912
*********************************
epoch 63
Train_loss: 3.5613778158317326
val_loss: 0.17692741507255008
best loss: 0.17392319032401912
*********************************
epoch 64
Train_loss: 3.429143888980296
val_loss: 0.16701396469958987
best loss: 0.16701396469958987
*********************************
epoch 65
Train_loss: 3.4114748143108367
val_loss: 0.17050840772727802
best loss: 0.16701396469958987
*********************************
epoch 66
Train_loss: 3.5190103494449323
val_loss: 0.2002860592453604
best loss: 0.16701396469958987
*********************************
epoch 67
Train_loss: 3.6758987183144844
val_loss: 0.22580784707918933
best loss: 0.16701396469958987
*********************************
epoch 68
Train_loss: 3.8498115788614475
val_loss: 0.2685995540299427
best loss: 0.16701396469958987
*********************************
epoch 69
Train_loss: 3.92675395336769
val_loss: 0.23489111

epoch 123
Train_loss: 3.6490234424852632
val_loss: 0.2106663910778467
best loss: 0.15716927243790518
*********************************
epoch 124
Train_loss: 3.632966570992863
val_loss: 0.21537777474619918
best loss: 0.15716927243790518
*********************************
Epoch    26: reducing learning rate of group 0 to 1.5000e-03.
epoch 125
Train_loss: 3.630953912954998
val_loss: 0.19719511773909335
best loss: 0.15716927243790518
*********************************
epoch 126
Train_loss: 3.352154959473374
val_loss: 0.16362588809336379
best loss: 0.15716927243790518
*********************************
epoch 127
Train_loss: 3.299366246750593
val_loss: 0.16964062694569418
best loss: 0.15716927243790518
*********************************
epoch 128
Train_loss: 3.2801505983230834
val_loss: 0.16661016012235097
best loss: 0.15716927243790518
*********************************
epoch 129
Train_loss: 3.263396641588307
val_loss: 0.15940625093025007
best loss: 0.15716927243790518
**************************

epoch 183
Train_loss: 2.954185710841532
val_loss: 0.1481051826271881
best loss: 0.1459256093680247
*********************************
epoch 184
Train_loss: 2.95208471078138
val_loss: 0.14702007806006798
best loss: 0.1459256093680247
*********************************
epoch 185
Train_loss: 2.9510052606065127
val_loss: 0.14733394510516024
best loss: 0.1459256093680247
*********************************
epoch 186
Train_loss: 2.950563579846338
val_loss: 0.14661642461256844
best loss: 0.1459256093680247
*********************************
epoch 187
Train_loss: 2.947646602760517
val_loss: 0.1476826525944956
best loss: 0.1459256093680247
*********************************
epoch 188
Train_loss: 2.948307527872717
val_loss: 0.1471434779506367
best loss: 0.1459256093680247
*********************************
epoch 189
Train_loss: 2.946651430003691
val_loss: 0.14652050036144457
best loss: 0.1459256093680247
*********************************
Epoch    91: reducing learning rate of group 0 to 1.3500e-04.
epo

epoch 243
Train_loss: 2.8584203519029887
val_loss: 0.14222181763325192
best loss: 0.14222181763325192
*********************************
epoch 244
Train_loss: 2.857307589232596
val_loss: 0.14245326942044553
best loss: 0.14222181763325192
*********************************
epoch 245
Train_loss: 2.8581434350718578
val_loss: 0.14246692749857623
best loss: 0.14222181763325192
*********************************
epoch 246
Train_loss: 2.858590644781092
val_loss: 0.14230715474934694
best loss: 0.14222181763325192
*********************************
epoch 247
Train_loss: 2.8602566469384465
val_loss: 0.14260291153726076
best loss: 0.14222181763325192
*********************************
epoch 248
Train_loss: 2.8592946458469988
val_loss: 0.14251788342416863
best loss: 0.14222181763325192
*********************************
epoch 249
Train_loss: 2.859008695245008
val_loss: 0.14238680949245017
best loss: 0.14222181763325192
*********************************
epoch 250
Train_loss: 2.8597965598350696
val_loss: 

HBox(children=(FloatProgress(value=0.0, max=300.0), HTML(value='')))

epoch 0
Train_loss: 15.187339897216582
val_loss: 2.0077972624271223
best loss: 2.0077972624271223
*********************************
epoch 1
Train_loss: 6.43866029958777
val_loss: 1.1907401582423127
best loss: 1.1907401582423127
*********************************
epoch 2
Train_loss: 5.9371331643295155
val_loss: 1.1324756958095874
best loss: 1.1324756958095874
*********************************
epoch 3
Train_loss: 5.5751837001046605
val_loss: 1.0392523747241842
best loss: 1.0392523747241842
*********************************
epoch 4
Train_loss: 5.363919622727346
val_loss: 0.6175010417556518
best loss: 0.6175010417556518
*********************************
epoch 5
Train_loss: 5.338855033603209
val_loss: 0.6468271130388062
best loss: 0.6175010417556518
*********************************
epoch 6
Train_loss: 5.452475249046611
val_loss: 0.8456083096494381
best loss: 0.6175010417556518
*********************************
epoch 7
Train_loss: 5.494450397838508
val_loss: 0.9173887478927604
best loss: 0.6

epoch 62
Train_loss: 3.735045729237546
val_loss: 0.19894762074535124
best loss: 0.17221035269172136
*********************************
epoch 63
Train_loss: 3.561178112964405
val_loss: 0.1758793890390063
best loss: 0.17221035269172136
*********************************
epoch 64
Train_loss: 3.430753696217084
val_loss: 0.16612065361283654
best loss: 0.16612065361283654
*********************************
epoch 65
Train_loss: 3.4160441127795425
val_loss: 0.17005123884344636
best loss: 0.16612065361283654
*********************************
epoch 66
Train_loss: 3.520593997009253
val_loss: 0.19635929395936283
best loss: 0.16612065361283654
*********************************
epoch 67
Train_loss: 3.6893859889975826
val_loss: 0.22030606937441066
best loss: 0.16612065361283654
*********************************
epoch 68
Train_loss: 3.8318012042122365
val_loss: 0.2507165095136677
best loss: 0.16612065361283654
*********************************
epoch 69
Train_loss: 3.929007780887686
val_loss: 0.2380319675

epoch 124
Train_loss: 3.633802494217706
val_loss: 0.19203692373925307
best loss: 0.15669050110298
*********************************
epoch 125
Train_loss: 3.630469079915954
val_loss: 0.21843350467914383
best loss: 0.15669050110298
*********************************
epoch 126
Train_loss: 3.6256932629190906
val_loss: 0.20209417671022356
best loss: 0.15669050110298
*********************************
epoch 127
Train_loss: 3.6142574114049792
val_loss: 0.22684569223032244
best loss: 0.15669050110298
*********************************
epoch 128
Train_loss: 3.604625409407334
val_loss: 0.2198043000860962
best loss: 0.15669050110298
*********************************
epoch 129
Train_loss: 3.6130841918225065
val_loss: 0.19119045847278746
best loss: 0.15669050110298
*********************************
epoch 130
Train_loss: 3.596791389202567
val_loss: 0.18895833158310907
best loss: 0.15669050110298
*********************************
epoch 131
Train_loss: 3.593748250417473
val_loss: 0.20845233089944287
best

Epoch    86: reducing learning rate of group 0 to 4.5000e-04.
epoch 185
Train_loss: 3.048828372263581
val_loss: 0.14957915184981993
best loss: 0.14949539444619964
*********************************
epoch 186
Train_loss: 2.950798740927688
val_loss: 0.1437985534969936
best loss: 0.1437985534969936
*********************************
epoch 187
Train_loss: 2.935612871187617
val_loss: 0.14426397123452384
best loss: 0.1437985534969936
*********************************
epoch 188
Train_loss: 2.9298961098130634
val_loss: 0.1444991353007665
best loss: 0.1437985534969936
*********************************
epoch 189
Train_loss: 2.9294280470373764
val_loss: 0.14306345863874417
best loss: 0.14306345863874417
*********************************
epoch 190
Train_loss: 2.9232219130156407
val_loss: 0.1429847620889495
best loss: 0.1429847620889495
*********************************
epoch 191
Train_loss: 2.922013823581974
val_loss: 0.1436609558341665
best loss: 0.1429847620889495
*********************************

epoch 245
Train_loss: 2.8277823098374455
val_loss: 0.13965903863030765
best loss: 0.13898775364179627
*********************************
Epoch   147: reducing learning rate of group 0 to 4.0500e-05.
epoch 246
Train_loss: 2.8276740334081625
val_loss: 0.13975399160068908
best loss: 0.13898775364179627
*********************************
epoch 247
Train_loss: 2.816292212141058
val_loss: 0.13908025573300034
best loss: 0.13898775364179627
*********************************
epoch 248
Train_loss: 2.81540187179735
val_loss: 0.13887669476793232
best loss: 0.13887669476793232
*********************************
epoch 249
Train_loss: 2.813852254088871
val_loss: 0.13937862096114226
best loss: 0.13887669476793232
*********************************
epoch 250
Train_loss: 2.815270596917017
val_loss: 0.13895753648705994
best loss: 0.13887669476793232
*********************************
epoch 251
Train_loss: 2.811958029205576
val_loss: 0.1389443422914336
best loss: 0.13887669476793232
***************************

HBox(children=(FloatProgress(value=0.0, max=300.0), HTML(value='')))

epoch 0
Train_loss: 16.18567198714709
val_loss: 3.585577989101509
best loss: 3.585577989101509
*********************************
epoch 1
Train_loss: 6.474237113819644
val_loss: 1.3874224926089522
best loss: 1.3874224926089522
*********************************
epoch 2
Train_loss: 5.943839400702508
val_loss: 1.0997642655569118
best loss: 1.0997642655569118
*********************************
epoch 3
Train_loss: 5.596039424723521
val_loss: 0.7518286831267236
best loss: 0.7518286831267236
*********************************
epoch 4
Train_loss: 5.3755637759904795
val_loss: 0.6189456712045524
best loss: 0.6189456712045524
*********************************
epoch 5
Train_loss: 5.345136824847776
val_loss: 0.6667943407785376
best loss: 0.6189456712045524
*********************************
epoch 6
Train_loss: 5.4729328262055255
val_loss: 1.0596925545695763
best loss: 0.6189456712045524
*********************************
epoch 7
Train_loss: 5.474201383544833
val_loss: 0.897415724978012
best loss: 0.6189

epoch 62
Train_loss: 3.746429173098124
val_loss: 0.2005922400164363
best loss: 0.17555550078289262
*********************************
epoch 63
Train_loss: 3.560230733177541
val_loss: 0.17977312647480387
best loss: 0.17555550078289262
*********************************
epoch 64
Train_loss: 3.42881452942794
val_loss: 0.17095119279184148
best loss: 0.17095119279184148
*********************************
epoch 65
Train_loss: 3.4116820852203493
val_loss: 0.17560153233684414
best loss: 0.17095119279184148
*********************************
epoch 66
Train_loss: 3.51791374774085
val_loss: 0.19156208873491776
best loss: 0.17095119279184148
*********************************
epoch 67
Train_loss: 3.6787972743361266
val_loss: 0.2261481002480043
best loss: 0.17095119279184148
*********************************
epoch 68
Train_loss: 3.851178960736453
val_loss: 0.2641441364616877
best loss: 0.17095119279184148
*********************************
epoch 69
Train_loss: 3.9369168037772604
val_loss: 0.2341794597045

epoch 123
Train_loss: 3.6243899796033356
val_loss: 0.2275159031175492
best loss: 0.16015057726507212
*********************************
epoch 124
Train_loss: 3.6246605473731033
val_loss: 0.20225974065181365
best loss: 0.16015057726507212
*********************************
epoch 125
Train_loss: 3.64396319216873
val_loss: 0.20249128526604604
best loss: 0.16015057726507212
*********************************
epoch 126
Train_loss: 3.6184435091507177
val_loss: 0.20054329373190038
best loss: 0.16015057726507212
*********************************
epoch 127
Train_loss: 3.620118263298624
val_loss: 0.22657713602372415
best loss: 0.16015057726507212
*********************************
epoch 128
Train_loss: 3.613472003783623
val_loss: 0.2145547671383545
best loss: 0.16015057726507212
*********************************
epoch 129
Train_loss: 3.6005095106072083
val_loss: 0.18823276373233347
best loss: 0.16015057726507212
*********************************
epoch 130
Train_loss: 3.600182166059777
val_loss: 0.19

epoch 184
Train_loss: 3.0447121373048276
val_loss: 0.15169967808375545
best loss: 0.1511058422283303
*********************************
epoch 185
Train_loss: 3.0481608238650058
val_loss: 0.1537193378320299
best loss: 0.1511058422283303
*********************************
epoch 186
Train_loss: 3.0422603387726452
val_loss: 0.15495580867108663
best loss: 0.1511058422283303
*********************************
epoch 187
Train_loss: 3.0420006289780246
val_loss: 0.15400275190045734
best loss: 0.1511058422283303
*********************************
epoch 188
Train_loss: 3.038693910084614
val_loss: 0.154976830847659
best loss: 0.1511058422283303
*********************************
epoch 189
Train_loss: 3.0361950878140203
val_loss: 0.150604843388359
best loss: 0.150604843388359
*********************************
epoch 190
Train_loss: 3.03383881736852
val_loss: 0.15088441755102522
best loss: 0.150604843388359
*********************************
epoch 191
Train_loss: 3.0403706790939315
val_loss: 0.153936467145

epoch 244
Train_loss: 2.7959057952236295
val_loss: 0.14073119101731688
best loss: 0.14071860738585917
*********************************
epoch 245
Train_loss: 2.795717453093935
val_loss: 0.14128559140880403
best loss: 0.14071860738585917
*********************************
epoch 246
Train_loss: 2.794811128072012
val_loss: 0.1406831624563039
best loss: 0.1406831624563039
*********************************
epoch 247
Train_loss: 2.7951534069262447
val_loss: 0.14109700568790925
best loss: 0.1406831624563039
*********************************
epoch 248
Train_loss: 2.7932599296762484
val_loss: 0.14119428967177317
best loss: 0.1406831624563039
*********************************
epoch 249
Train_loss: 2.792055957904599
val_loss: 0.14102872466868246
best loss: 0.1406831624563039
*********************************
epoch 250
Train_loss: 2.7925257938097507
val_loss: 0.14033514751741133
best loss: 0.14033514751741133
*********************************
epoch 251
Train_loss: 2.791071499950846
val_loss: 0.1410

HBox(children=(FloatProgress(value=0.0, max=300.0), HTML(value='')))

epoch 0
Train_loss: 17.82370408096023
val_loss: 3.421132542521537
best loss: 3.421132542521537
*********************************
epoch 1
Train_loss: 6.54408388702443
val_loss: 1.2924404085161636
best loss: 1.2924404085161636
*********************************
epoch 2
Train_loss: 6.0469748559744945
val_loss: 1.8152607168559993
best loss: 1.2924404085161636
*********************************
epoch 3
Train_loss: 5.6519764020069925
val_loss: 0.7319401689208999
best loss: 0.7319401689208999
*********************************
epoch 4
Train_loss: 5.439888509443373
val_loss: 0.6391275363116163
best loss: 0.6391275363116163
*********************************
epoch 5
Train_loss: 5.411366424265024
val_loss: 0.6932173096898029
best loss: 0.6391275363116163
*********************************
epoch 6
Train_loss: 5.508243990832176
val_loss: 0.9467526799052653
best loss: 0.6391275363116163
*********************************
epoch 7
Train_loss: 5.520968984931683
val_loss: 1.1378412091583257
best loss: 0.6391

epoch 62
Train_loss: 3.742297385464943
val_loss: 0.20087198834383882
best loss: 0.17214846375486936
*********************************
epoch 63
Train_loss: 3.554678370990771
val_loss: 0.1758944338689504
best loss: 0.17214846375486936
*********************************
epoch 64
Train_loss: 3.425454532704687
val_loss: 0.16488297812970112
best loss: 0.16488297812970112
*********************************
epoch 65
Train_loss: 3.4098545365424076
val_loss: 0.16893162379243387
best loss: 0.16488297812970112
*********************************
epoch 66
Train_loss: 3.5063643144634207
val_loss: 0.19385919952464284
best loss: 0.16488297812970112
*********************************
epoch 67
Train_loss: 3.6776867527389188
val_loss: 0.2439618096797377
best loss: 0.16488297812970112
*********************************
epoch 68
Train_loss: 3.848148311126386
val_loss: 0.2596764787825532
best loss: 0.16488297812970112
*********************************
epoch 69
Train_loss: 3.9198166439844218
val_loss: 0.2382415737

epoch 123
Train_loss: 3.2647767993629166
val_loss: 0.16847148239720477
best loss: 0.15520026763913147
*********************************
epoch 124
Train_loss: 3.254468933609772
val_loss: 0.1599028585692875
best loss: 0.15520026763913147
*********************************
epoch 125
Train_loss: 3.252344476456792
val_loss: 0.16116828401759772
best loss: 0.15520026763913147
*********************************
epoch 126
Train_loss: 3.2424999823589706
val_loss: 0.16814995742923988
best loss: 0.15520026763913147
*********************************
epoch 127
Train_loss: 3.2355989912190704
val_loss: 0.15864755668547728
best loss: 0.15520026763913147
*********************************
epoch 128
Train_loss: 3.232408497911293
val_loss: 0.1641572721870176
best loss: 0.15520026763913147
*********************************
epoch 129
Train_loss: 3.2293939099797524
val_loss: 0.15659908560722996
best loss: 0.15520026763913147
*********************************
epoch 130
Train_loss: 3.227822387789756
val_loss: 0.1

epoch 184
Train_loss: 3.0713016077573334
val_loss: 0.15319304422642488
best loss: 0.15211422373555872
*********************************
epoch 185
Train_loss: 3.0748411136061335
val_loss: 0.15196564786544114
best loss: 0.15196564786544114
*********************************
epoch 186
Train_loss: 3.0710533077921296
val_loss: 0.1547790753156144
best loss: 0.15196564786544114
*********************************
epoch 187
Train_loss: 3.0700384214182566
val_loss: 0.15605124683652455
best loss: 0.15196564786544114
*********************************
epoch 188
Train_loss: 3.069523061489357
val_loss: 0.1594861147211352
best loss: 0.15196564786544114
*********************************
epoch 189
Train_loss: 3.069437528781777
val_loss: 0.15225727765112193
best loss: 0.15196564786544114
*********************************
epoch 190
Train_loss: 3.0669373490933167
val_loss: 0.15794454541652558
best loss: 0.15196564786544114
*********************************
epoch 191
Train_loss: 3.064312630146729
val_loss: 0.

epoch 244
Train_loss: 2.787071502796006
val_loss: 0.14016515908712543
best loss: 0.13987444939395566
*********************************
epoch 245
Train_loss: 2.7842413387547023
val_loss: 0.13992918591728634
best loss: 0.13987444939395566
*********************************
epoch 246
Train_loss: 2.7843713346604444
val_loss: 0.13945785356886536
best loss: 0.13945785356886536
*********************************
epoch 247
Train_loss: 2.78376414785101
val_loss: 0.13962344909171442
best loss: 0.13945785356886536
*********************************
epoch 248
Train_loss: 2.7845891489790384
val_loss: 0.14014031950637532
best loss: 0.13945785356886536
*********************************
epoch 249
Train_loss: 2.7817043159743826
val_loss: 0.1396827866470543
best loss: 0.13945785356886536
*********************************
epoch 250
Train_loss: 2.78242111551502
val_loss: 0.13998714410058508
best loss: 0.13945785356886536
*********************************
epoch 251
Train_loss: 2.783311734748565
val_loss: 0.13

HBox(children=(FloatProgress(value=0.0, max=300.0), HTML(value='')))

epoch 0
Train_loss: 16.083799121444866
val_loss: 2.0572829801258607
best loss: 2.0572829801258607
*********************************
epoch 1
Train_loss: 6.483368869553994
val_loss: 1.257884837947335
best loss: 1.257884837947335
*********************************
epoch 2
Train_loss: 5.976434335760088
val_loss: 0.969340746856444
best loss: 0.969340746856444
*********************************
epoch 3
Train_loss: 5.650135253885095
val_loss: 0.7265890116600879
best loss: 0.7265890116600879
*********************************
epoch 4
Train_loss: 5.418634197687749
val_loss: 0.6302943036046065
best loss: 0.6302943036046065
*********************************
epoch 5
Train_loss: 5.389579655964895
val_loss: 0.7913249828108825
best loss: 0.6302943036046065
*********************************
epoch 6
Train_loss: 5.485076296830385
val_loss: 1.241960748161058
best loss: 0.6302943036046065
*********************************
epoch 7
Train_loss: 5.5230641651939285
val_loss: 1.1178015847824836
best loss: 0.630294

epoch 62
Train_loss: 3.7483377338575554
val_loss: 0.19973608713904795
best loss: 0.17658792764589892
*********************************
epoch 63
Train_loss: 3.5736400641902613
val_loss: 0.18034801466253453
best loss: 0.17658792764589892
*********************************
epoch 64
Train_loss: 3.4361455324780286
val_loss: 0.17017609958096053
best loss: 0.17017609958096053
*********************************
epoch 65
Train_loss: 3.4204809467842616
val_loss: 0.17732186230309263
best loss: 0.17017609958096053
*********************************
epoch 66
Train_loss: 3.521418842305153
val_loss: 0.196561033173315
best loss: 0.17017609958096053
*********************************
epoch 67
Train_loss: 3.6896506779716645
val_loss: 0.22774116772507366
best loss: 0.17017609958096053
*********************************
epoch 68
Train_loss: 3.851082552135407
val_loss: 0.2705822750835465
best loss: 0.17017609958096053
*********************************
epoch 69
Train_loss: 3.9347006564969096
val_loss: 0.24303803

epoch 124
Train_loss: 3.641758701624104
val_loss: 0.19435415846262827
best loss: 0.1606635109428921
*********************************
epoch 125
Train_loss: 3.6163587118206357
val_loss: 0.1928327224131917
best loss: 0.1606635109428921
*********************************
epoch 126
Train_loss: 3.619924397361224
val_loss: 0.21747208201263463
best loss: 0.1606635109428921
*********************************
epoch 127
Train_loss: 3.616057953573452
val_loss: 0.22219867145181182
best loss: 0.1606635109428921
*********************************
epoch 128
Train_loss: 3.61471769536181
val_loss: 0.2250852916645232
best loss: 0.1606635109428921
*********************************
epoch 129
Train_loss: 3.6155541028830793
val_loss: 0.2008106626897117
best loss: 0.1606635109428921
*********************************
epoch 130
Train_loss: 3.611509263579901
val_loss: 0.2041366165384181
best loss: 0.1606635109428921
*********************************
epoch 131
Train_loss: 3.6043508684942025
val_loss: 0.208789826844

epoch 185
Train_loss: 3.060695544530543
val_loss: 0.155539790234363
best loss: 0.15513344894608247
*********************************
epoch 186
Train_loss: 3.0552751506257385
val_loss: 0.1591666337393565
best loss: 0.15513344894608247
*********************************
epoch 187
Train_loss: 3.0551433205755485
val_loss: 0.1571909496090947
best loss: 0.15513344894608247
*********************************
epoch 188
Train_loss: 3.056672784668049
val_loss: 0.16035986657045811
best loss: 0.15513344894608247
*********************************
epoch 189
Train_loss: 3.05526103844443
val_loss: 0.15499221971057986
best loss: 0.15499221971057986
*********************************
epoch 190
Train_loss: 3.052735943084421
val_loss: 0.1548233566679713
best loss: 0.1548233566679713
*********************************
epoch 191
Train_loss: 3.046504331550411
val_loss: 0.15519347060328484
best loss: 0.1548233566679713
*********************************
epoch 192
Train_loss: 3.046997475768218
val_loss: 0.157545097

epoch 245
Train_loss: 2.8076348440367704
val_loss: 0.14408038461010292
best loss: 0.1439759977850891
*********************************
epoch 246
Train_loss: 2.8060839653971765
val_loss: 0.14420273022814734
best loss: 0.1439759977850891
*********************************
epoch 247
Train_loss: 2.8060601696896668
val_loss: 0.14461168188981197
best loss: 0.1439759977850891
*********************************
epoch 248
Train_loss: 2.803517262638548
val_loss: 0.144338895427485
best loss: 0.1439759977850891
*********************************
epoch 249
Train_loss: 2.8021437361532184
val_loss: 0.1438393733800109
best loss: 0.1438393733800109
*********************************
epoch 250
Train_loss: 2.802381591140181
val_loss: 0.14461966632862008
best loss: 0.1438393733800109
*********************************
epoch 251
Train_loss: 2.8013731435319023
val_loss: 0.14427856561157953
best loss: 0.1438393733800109
*********************************
epoch 252
Train_loss: 2.8005620494453267
val_loss: 0.1441364