In [1]:
import os
import time
import math
import random
import numpy as np
import pandas as pd
import torch
from torch import nn, einsum
import torch.nn.functional as F
import torch_optimizer as optim
from torch.utils.data import TensorDataset,DataLoader
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from tqdm.auto import tqdm
from einops import rearrange
from einops.layers.torch import Rearrange

In [2]:
DEBUG = False

In [3]:
MODEL_NAME = 'transformer_v19'
base_dir = "./"
if not os.path.exists(f'models/{MODEL_NAME}'):
    os.makedirs(f'models/{MODEL_NAME}')

In [4]:
train_df = pd.read_csv(base_dir + 'train.csv')
test_df = pd.read_csv(base_dir + 'test.csv')

In [5]:
le = LabelEncoder()
train_df['pressure'] = le.fit_transform(train_df['pressure'])

In [6]:
train_df['RC'] = (train_df['R'].astype(str) + '_' + train_df['C'].astype(str))
train_df['RC'] = train_df['RC'].map({'20_50':0, '20_20':1, '50_20':2, '50_50':3, '5_50':4, '5_20':5, '50_10':6, '20_10':7, '5_10':8})
test_df['RC'] = (test_df['R'].astype(str) + '_' + test_df['C'].astype(str))
test_df['RC'] = test_df['RC'].map({'20_50':0, '20_20':1, '50_20':2, '50_50':3, '5_50':4, '5_20':5, '50_10':6, '20_10':7, '5_10':8})
# train_df['u_in_cat'] = train_df['u_in'].round().astype(int)
# test_df['u_in_cat'] = test_df['u_in'].round().astype(int)
train_df['u_in_0'] = (train_df['u_in'].round() == 0).astype(int)
test_df['u_in_0'] = (test_df['u_in'].round() == 0).astype(int)
train_df['u_in_5'] = (train_df['u_in'].round() == 5).astype(int)
test_df['u_in_5'] = (test_df['u_in'].round() == 5).astype(int)

In [7]:
# features = train_df.columns.drop(['id','breath_id','pressure'])

In [8]:
from sklearn.preprocessing import RobustScaler, StandardScaler
RS = StandardScaler()
all_df = pd.concat([train_df,test_df])
train_df['u_in'] = np.log1p(train_df['u_in'] - all_df['u_in'].min())
test_df['u_in'] = np.log1p(test_df['u_in'] - all_df['u_in'].min())
all_df['u_in'] = np.log1p(all_df['u_in'] - all_df['u_in'].min())

RS.fit(all_df[['u_in','time_step']])
train_df[['u_in','time_step']] = RS.transform(train_df[['u_in','time_step']])
test_df[['u_in','time_step']] = RS.transform(test_df[['u_in','time_step']])

# from sklearn.preprocessing import RobustScaler, StandardScaler
# RS = StandardScaler()
# all_df = pd.concat([train_df,test_df])
# RS.fit(all_df[['u_in','time_step']])
# train_df[['u_in','time_step']] = RS.transform(train_df[['u_in','time_step']])
# test_df[['u_in','time_step']] = RS.transform(test_df[['u_in','time_step']])

In [9]:
X_tr = train_df[['RC','u_in','u_out','u_in_0','u_in_5','time_step']].values.reshape(-1,80,6)
X_test = test_df[['RC','u_in','u_out','u_in_0','u_in_5','time_step']].values.reshape(-1,80,6)
# X_tr = train_df[features].values.reshape(-1,80,18)
# X_test = test_df[features].values.reshape(-1,80,18)

In [10]:
y_tr = train_df['pressure'].values.reshape(-1,80)

In [11]:
# pseudo_label = np.load("subs/sub_logits_transformer_v11.npy")
# from scipy.special import softmax
# pseudo_label = softmax(pseudo_label,axis=2)
# mask = test_df['u_out'].values.reshape(-1,80) == 0
# tmp1 = pseudo_label.max(2)
# res = []
# for i in tqdm(range(len(tmp1))):
#     res.append(tmp1[i][mask[i]].mean())
# res = np.array(res)
# mask = res > 0.6
# np.mean(mask)
# pseudo_label = pseudo_label.argmax(-1)
# X_tr = np.concatenate([X_tr,X_test[mask]],axis=0)
# y_tr = np.concatenate([y_tr,pseudo_label[mask]],axis=0)
# import gc
# del pseudo_label, mask, res, tmp1; gc.collect()

In [12]:
# unique_pressures = train_df["pressure"].unique()
# sorted_pressures = np.sort(unique_pressures)
# total_pressures_len = len(sorted_pressures)
# def find_nearest(prediction):
#     insert_idx = np.searchsorted(sorted_pressures, prediction)
#     if insert_idx == total_pressures_len:
#         # If the predicted value is bigger than the highest pressure in the train dataset,
#         # return the max value.
#         return sorted_pressures[-1]
#     elif insert_idx == 0:
#         # Same control but for the lower bound.
#         return sorted_pressures[0]
#     lower_val = sorted_pressures[insert_idx - 1]
#     upper_val = sorted_pressures[insert_idx]
#     return lower_val if abs(lower_val - prediction) < abs(upper_val - prediction) else upper_val
# pseudo_label = pd.read_csv("pressure_submission.csv")['pressure'].values
# for i in range(len(pseudo_label)):
#     pseudo_label[i] = find_nearest(pseudo_label[i])
# pseudo_label = le.transform(pseudo_label).reshape(-1,80)
# X_tr = np.concatenate([X_tr,X_test],axis=0)
# y_tr = np.concatenate([y_tr,pseudo_label],axis=0)
# import gc
# del pseudo_label; gc.collect()

In [13]:
X_tr.shape,X_test.shape,y_tr.shape

((75450, 80, 6), (50300, 80, 6), (75450, 80))

In [14]:
if DEBUG:
    X_tr = X_tr[:1000]
    y_tr = y_tr[:1000]
    X_test = X_test[:1000]

In [15]:
def train_one_epoch(model, optimizer, train_dataloader, epoch, device = torch.device('cpu')):
    model.eval()
    MA_loss = 0
    count = 0
    for X,y in train_dataloader:
        X = X.to(device)
        y = y.to(device)
        y = F.one_hot(y,950).float()
        y[:,:,:-1] += 0.1 * y[:,:,1:]
        y[:,:,1:] += 0.1 * y[:,:,:-1]
        y[y==1] = 0.8
        
        optimizer.zero_grad()
        mask1 = X[:,:,2] == 0
        mask2 = X[:,:,2] == 1
        pred = model(X)

        pred = torch.sigmoid(pred[mask1].reshape(-1,950))
        y = y[mask1].reshape(-1,950)

        loss = -torch.sum(y * torch.log(1e-8 + pred) + (1-y) * torch.log(1 - pred + 1e-8),dim=-1).mean()
        loss.backward()
        optimizer.step()
        
        MA_loss += loss.item() * len(y)
        count += len(y)
    MA_loss /= count
    return MA_loss

def evaluation(model, val_dataloader, device = torch.device('cpu')):
    model.eval()
    criterion = nn.L1Loss()
    MA_loss = 0
    count = 0
    with torch.no_grad():
        for X,y in val_dataloader:
            X = X.to(device)
            y = y.to(device)
            mask = X[:,:,2] == 0
            pred = model(X)
            pred = torch.argmax(pred,dim=-1)
            pred = pred[mask].reshape(-1).cpu().numpy()
            pred = torch.Tensor(le.inverse_transform(pred)).to(device)
            y = y[mask].reshape(-1).cpu().long().numpy()
            y = torch.Tensor(le.inverse_transform(y)).to(device)
            loss = criterion(pred, y)
            # loss = criterion(pred.reshape(-1), y.reshape(-1))
            MA_loss += loss.item() * len(y)
            count += len(y)
        MA_loss /= count
    return MA_loss

def inference(model, test_dataloader, device = torch.device('cpu'), istest = False):
    model.eval()
    prediction = []
    with torch.no_grad():
        if istest:
            for X in test_dataloader:
                X = X[0]
                X = X.to(device)
                pred = model(X).cpu()
                pred = torch.argmax(pred,dim=-1)
                prediction.append(pred)
        else:
            for X, y in test_dataloader:
                X = X.to(device)
                pred = model(X).cpu()
                pred = torch.argmax(pred,dim=-1)
                prediction.append(pred)
    prediction = torch.cat(prediction,dim=0).numpy()
    prediction = le.inverse_transform(prediction.reshape(-1)).reshape(-1,80)
    return prediction

In [16]:
import torch
from torch import nn, einsum
import torch.nn.functional as F

from einops import rearrange
from einops.layers.torch import Rearrange

# helper functions

def exists(val):
    return val is not None

def default(val, d):
    return val if exists(val) else d

def calc_same_padding(kernel_size):
    pad = kernel_size // 2
    return (pad, pad - (kernel_size + 1) % 2)

# helper classes

class Swish(nn.Module):
    def forward(self, x):
        return x * x.sigmoid()

class GLU(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.dim = dim

    def forward(self, x):
        out, gate = x.chunk(2, dim=self.dim)
        return out * gate.sigmoid()

class DepthWiseConv1d(nn.Module):
    def __init__(self, chan_in, chan_out, kernel_size, padding):
        super().__init__()
        self.padding = padding
        self.conv = nn.Conv1d(chan_in, chan_out, kernel_size, groups = chan_in)

    def forward(self, x):
        x = F.pad(x, self.padding)
        return self.conv(x)

# attention, feedforward, and conv module

class Scale(nn.Module):
    def __init__(self, scale, fn):
        super().__init__()
        self.fn = fn
        self.scale = scale

    def forward(self, x, **kwargs):
        return self.fn(x, **kwargs) * self.scale

class PreNorm(nn.Module):
    def __init__(self, dim, fn):
        super().__init__()
        self.fn = fn
        self.norm = nn.LayerNorm(dim)

    def forward(self, x, **kwargs):
        x = self.norm(x)
        return self.fn(x, **kwargs)

class FeedForward(nn.Module):
    def __init__(
        self,
        dim,
        mult = 4,
        dropout = 0.
    ):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(dim, dim * mult),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(dim * mult, dim),
            nn.Dropout(dropout)
        )

    def forward(self, x):
        return self.net(x)

class ConformerConvModule(nn.Module):
    def __init__(
        self,
        dim,
        causal = False,
        expansion_factor = 2,
        kernel_size = 31,
        dropout = 0.):
        super().__init__()

        inner_dim = dim * expansion_factor
        padding = calc_same_padding(kernel_size) if not causal else (kernel_size - 1, 0)

        self.net = nn.Sequential(
            nn.LayerNorm(dim),
            Rearrange('b n c -> b c n'),
            nn.Conv1d(dim, inner_dim * 2, 1),
            GLU(dim=1),
            DepthWiseConv1d(inner_dim, inner_dim, kernel_size = kernel_size, padding = padding),
            nn.BatchNorm1d(inner_dim) if not causal else nn.Identity(),
            Swish(),
            nn.Conv1d(inner_dim, dim, 1),
            Rearrange('b c n -> b n c'),
            nn.Dropout(dropout)
        )

    def forward(self, x):
        return self.net(x)

In [17]:
class CustomAttention(nn.Module):
    def __init__(
        self,
        dim,
        heads = 8,
        dim_head = 64,
        dropout = 0.,
        max_pos_emb = 512,
        causal = False
    ):
        super().__init__()
        inner_dim = dim_head * heads
        self.heads= heads
        self.scale = dim_head ** -0.5
        self.to_q = nn.Linear(dim, inner_dim, bias = False)
        self.to_kv = nn.Linear(dim, inner_dim * 2, bias = False)
        self.to_out = nn.Linear(inner_dim, dim)

        self.max_pos_emb = max_pos_emb
        # self.rel_pos_emb1 = nn.Linear(1, heads, bias=False)
        # self.rel_pos_emb2 = nn.Linear(1, heads, bias=False)
        self.rel_pos_emb = nn.Sequential(nn.Linear(1, dim_head),nn.GELU(),nn.Linear(dim_head, dim_head))
        self.dropout = nn.Dropout(dropout)

        self.causal = causal

    def forward(self, x, position, context = None, mask = None, context_mask = None):
        n, device, h, max_pos_emb, has_context = x.shape[-2], x.device, self.heads, self.max_pos_emb, exists(context)
        context = default(context, x)

        q, k, v = (self.to_q(x), *self.to_kv(context).chunk(2, dim = -1))
        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h = h), (q, k, v))

        dots = einsum('b h i d, b h j d -> b h i j', q, k) * self.scale

        # shaw's relative positional embedding
        dist = rearrange(position, 'b i -> b i () ()') - rearrange(position, 'b j -> b () j ()')
        # rel_pos_emb = self.rel_pos_emb1(F.relu(dist)) + self.rel_pos_emb2(F.relu(-dist))   #(bijh)
        # pos_attn = rel_pos_emb.permute(0,3,1,2)
        rel_pos_emb = self.rel_pos_emb(dist)
        pos_attn = einsum('b h n d, b n r d -> b h n r', q, rel_pos_emb) * self.scale
        dots = dots + pos_attn

        if exists(mask) or exists(context_mask):
            mask = default(mask, lambda: torch.ones(*x.shape[:2], device = device))
            context_mask = default(context_mask, mask) if not has_context else default(context_mask, lambda: torch.ones(*context.shape[:2], device = device))
            mask_value = -torch.finfo(dots.dtype).max
            mask = rearrange(mask, 'b i -> b () i ()') * rearrange(context_mask, 'b j -> b () () j')
            dots.masked_fill_(~mask, mask_value)

        if self.causal:
            mask = torch.tril(torch.ones(dots.shape[-2:],device=dots.device)).T
            mask = rearrange(mask, 'n r -> () () n r')
            dots = dots - mask * 999

        attn = dots.softmax(dim = -1)

        out = einsum('b h i j, b h j d -> b h i d', attn, v)
        out = rearrange(out, 'b h n d -> b n (h d)')
        out = self.to_out(out)
        return self.dropout(out)


class CustomConformerBlock(nn.Module):
    def __init__(
        self,
        *,
        dim,
        dim_head = 64,
        heads = 8,
        ff_mult = 4,
        conv_expansion_factor = 2,
        conv_kernel_size = 31,
        attn_dropout = 0.,
        ff_dropout = 0.,
        conv_dropout = 0.,
        causal = False
    ):
        super().__init__()
        self.ff1 = FeedForward(dim = dim, mult = ff_mult, dropout = ff_dropout)
        self.attn = CustomAttention(dim = dim, dim_head = dim_head, heads = heads, dropout = attn_dropout, causal = causal)
        self.conv = ConformerConvModule(dim = dim, causal = causal, expansion_factor = conv_expansion_factor, kernel_size = conv_kernel_size, dropout = conv_dropout)
        self.ff2 = FeedForward(dim = dim, mult = ff_mult, dropout = ff_dropout)

        self.attn = PreNorm(dim, self.attn)
        self.ff1 = Scale(0.5, PreNorm(dim, self.ff1))
        self.ff2 = Scale(0.5, PreNorm(dim, self.ff2))

    def forward(self, x, pos, mask = None):
        x = self.ff1(x) + x
        x = self.attn(x, position = pos, mask = mask) + x
        x = self.conv(x) + x
        x = self.ff2(x) + x
        return x

In [18]:
class PositionalEncoding(nn.Module):

    def __init__(self, d_model, max_len = 5000):
        super().__init__()
        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(1, max_len, d_model)
        pe[0, :, 0::2] = torch.sin(position * div_term)
        pe[0, :, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self):
        return self.pe

class BrainModel(nn.Module):
    def __init__(self):
        super().__init__()
        DIM = 256
        n_layers = 4
        self.input_layer = nn.Sequential(nn.Linear(5,DIM),nn.Mish(),nn.Linear(DIM,DIM),nn.Mish())
        self.emb_RC = nn.Embedding(9,DIM)
        self.emb_u_in = nn.Embedding(101,DIM)
        self.scale_layer = nn.Linear(2*DIM,DIM)
        self.encoder = nn.ModuleList()
        for i in range(n_layers):
            self.encoder.append(CustomConformerBlock(dim = DIM,
                          dim_head = DIM//8,
                          heads = 8,
                          ff_mult = 4,
                          conv_expansion_factor = 2,
                          conv_kernel_size = 5,
                          attn_dropout = 0.1,
                          ff_dropout = 0.2,
                          conv_dropout = 0.05,
                          causal = False))
        self.fc = nn.Sequential(nn.Linear(DIM,DIM),nn.Mish(),nn.Linear(DIM,950))
        
    def forward(self, X):
        #(B,L,C)
        pos = X[:,:,-1]
        X_dense = self.input_layer(X[:,:,1:])
        X = torch.cat([X_dense, self.emb_RC(X[:,:,0].long())],dim=-1)
        X = self.scale_layer(X)
        for layer in self.encoder:
            X = layer(X,pos)
        y = self.fc(X)
        return y

In [19]:
N_EPOCHS = 100
N_FOLDS = 5
BATCH_SIZE = 128
oof_path = f"oofs/oof_finetune_{MODEL_NAME}"
device = torch.device('cuda:0')
kf = StratifiedKFold(N_FOLDS,shuffle=True, random_state=42)
oof = np.zeros(y_tr[:75450].shape)
y_true = le.inverse_transform(y_tr[:75450].reshape(-1)).reshape(y_tr[:75450].shape)
for fold, (train_index, val_index) in enumerate(kf.split(X_tr[:75450,0,0], X_tr[:75450,0,0])):
    print("fold:",fold)
    for RC in range(9):
      print("RC",RC)
      model_path = f'models/{MODEL_NAME}/model_{fold}_RC_{RC}.pt'

      train_index = np.concatenate([train_index,np.arange(75450,len(X_tr))])
      tr_idx = np.intersect1d(train_index,np.where(X_tr[:,0,0]==RC))
      val_idx = np.intersect1d(val_index,np.where(X_tr[:,0,0]==RC))
      test_idx = np.where(X_test[:,0,0]==RC)

      train_dataset = TensorDataset(torch.Tensor(X_tr[tr_idx]),torch.LongTensor(y_tr[tr_idx]))
      val_dataset = TensorDataset(torch.Tensor(X_tr[val_idx]),torch.LongTensor(y_tr[val_idx]))
      train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=2)
      val_dataloader = DataLoader(val_dataset, batch_size=3*BATCH_SIZE, shuffle=False, drop_last=False, num_workers=2)

      model = BrainModel().to(device)
      if (not os.path.exists(model_path)):
          model.load_state_dict(torch.load(f'models/{MODEL_NAME}/model_{fold}.pt'))
          optimizer = optim.Ranger(model.parameters(), lr=1e-4, weight_decay=0, alpha=0.5, k=5)
          scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=10, threshold=0.0001, min_lr=1e-6, verbose=True)
          best_score = evaluation(model, val_dataloader, device)
          torch.save(model.state_dict(),model_path)
          print(f"initial loss: {best_score}")
          stop_step = 0
          for epoch in tqdm(range(N_EPOCHS),leave=False):
              Train_loss = train_one_epoch(model, optimizer, train_dataloader, epoch, device)
              val_loss = evaluation(model, val_dataloader, device)
              scheduler.step(val_loss)
              if val_loss < best_score:
                best_score = val_loss
                torch.save(model.state_dict(),model_path)
                stop_step = 0
              else:
                stop_step += 1
              print(f"epoch {epoch}")
              print(f"Train_loss: {Train_loss}")
              print(f"val_loss: {val_loss}")
              print(f"best loss: {best_score}")
              print("*********************************")
              if stop_step > 20:
                break
      model.load_state_dict(torch.load(model_path))
      val_pred = inference(model, val_dataloader, device, False)
      oof[val_idx] = val_pred
    mask = X_tr[val_index,:,2]==0
    print(f"fold {fold}, score:",mean_absolute_error(y_true[val_index][mask], oof[val_index][mask]))
mask = X_tr[:75450,:,2]==0
print("CV score:",mean_absolute_error(y_true[mask], oof[mask]))
np.save(oof_path,oof)

fold: 0
RC 0
initial loss: 0.12861722443215698


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

	addcmul_(Number value, Tensor tensor1, Tensor tensor2)
Consider using one of the following signatures instead:
	addcmul_(Tensor tensor1, Tensor tensor2, *, Number value) (Triggered internally at  ..\torch\csrc\utils\python_arg_parser.cpp:1025.)
  exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)


epoch 0
Train_loss: 2.6059817874583393
val_loss: 0.1278085380728838
best loss: 0.1278085380728838
*********************************
epoch 1
Train_loss: 2.5845617990994043
val_loss: 0.12747869371303228
best loss: 0.12747869371303228
*********************************
epoch 2
Train_loss: 2.5676203790743144
val_loss: 0.12765808772065432
best loss: 0.12747869371303228
*********************************
epoch 3
Train_loss: 2.555985259394525
val_loss: 0.12728918631803265
best loss: 0.12728918631803265
*********************************
epoch 4
Train_loss: 2.545664573881004
val_loss: 0.12702733848459516
best loss: 0.12702733848459516
*********************************
epoch 5
Train_loss: 2.5369464755436213
val_loss: 0.12711703404722877
best loss: 0.12702733848459516
*********************************
epoch 6
Train_loss: 2.5278598163107153
val_loss: 0.1268522940540507
best loss: 0.1268522940540507
*********************************
epoch 7
Train_loss: 2.5210798758976622
val_loss: 0.1269361959936364


epoch 61
Train_loss: 2.2967173423649303
val_loss: 0.12597995655008162
best loss: 0.12594234787558753
*********************************
Epoch    63: reducing learning rate of group 0 to 1.0000e-06.
epoch 62
Train_loss: 2.296484461973666
val_loss: 0.1261506569618938
best loss: 0.12594234787558753
*********************************
epoch 63
Train_loss: 2.2941959196284474
val_loss: 0.12601033367504208
best loss: 0.12594234787558753
*********************************
epoch 64
Train_loss: 2.295100433387619
val_loss: 0.12600019960646197
best loss: 0.12594234787558753
*********************************
epoch 65
Train_loss: 2.2945679431410366
val_loss: 0.12598862790453777
best loss: 0.12594234787558753
*********************************
epoch 66
Train_loss: 2.2947584086309325
val_loss: 0.12594812590632234
best loss: 0.12594234787558753
*********************************
epoch 67
Train_loss: 2.2942288762448073
val_loss: 0.12600454349456838
best loss: 0.12594234787558753
******************************

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.4911415090743927
val_loss: 0.1310442881977918
best loss: 0.1310442881977918
*********************************
epoch 1
Train_loss: 2.4714728379431348
val_loss: 0.13118804751405777
best loss: 0.1310442881977918
*********************************
epoch 2
Train_loss: 2.45775638227505
val_loss: 0.13135261165388804
best loss: 0.1310442881977918
*********************************
epoch 3
Train_loss: 2.4472285906441047
val_loss: 0.13130343492078056
best loss: 0.1310442881977918
*********************************
epoch 4
Train_loss: 2.4382077170560503
val_loss: 0.1311993933831222
best loss: 0.1310442881977918
*********************************
epoch 5
Train_loss: 2.424423956232599
val_loss: 0.1311369759575361
best loss: 0.1310442881977918
*********************************
epoch 6
Train_loss: 2.421740737257437
val_loss: 0.13129396920009662
best loss: 0.1310442881977918
*********************************
epoch 7
Train_loss: 2.412827035915916
val_loss: 0.13129965454277434
best los

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.9029980957904082
val_loss: 0.20543665021320207
best loss: 0.20543665021320207
*********************************
epoch 1
Train_loss: 2.877357284011952
val_loss: 0.20340919506740832
best loss: 0.20340919506740832
*********************************
epoch 2
Train_loss: 2.8602327314346874
val_loss: 0.20208088640660782
best loss: 0.20208088640660782
*********************************
epoch 3
Train_loss: 2.8479147113873537
val_loss: 0.20324984790641565
best loss: 0.20208088640660782
*********************************
epoch 4
Train_loss: 2.8367257724708237
val_loss: 0.20211796967859333
best loss: 0.20208088640660782
*********************************
epoch 5
Train_loss: 2.8253941848390416
val_loss: 0.2016825371850231
best loss: 0.2016825371850231
*********************************
epoch 6
Train_loss: 2.8144508378494937
val_loss: 0.2024064404382609
best loss: 0.2016825371850231
*********************************
epoch 7
Train_loss: 2.8064072916028855
val_loss: 0.2016948977919843

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.863224666276131
val_loss: 0.22198739229538525
best loss: 0.22198739229538525
*********************************
epoch 1
Train_loss: 2.8362160600912567
val_loss: 0.22162201392945782
best loss: 0.22162201392945782
*********************************
epoch 2
Train_loss: 2.8160805672152542
val_loss: 0.22146641554784025
best loss: 0.22146641554784025
*********************************
epoch 3
Train_loss: 2.8006010041305682
val_loss: 0.2216234009762921
best loss: 0.22146641554784025
*********************************
epoch 4
Train_loss: 2.7868233877810513
val_loss: 0.22155670317766415
best loss: 0.22146641554784025
*********************************
epoch 5
Train_loss: 2.7749603087367136
val_loss: 0.22162478488788415
best loss: 0.22146641554784025
*********************************
epoch 6
Train_loss: 2.7649095971502424
val_loss: 0.22177343683892004
best loss: 0.22146641554784025
*********************************
epoch 7
Train_loss: 2.755281739284025
val_loss: 0.22152615144992

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.298091469369275
val_loss: 0.08360019045787359
best loss: 0.08360019045787359
*********************************
epoch 1
Train_loss: 2.2849968170441692
val_loss: 0.08351859645505949
best loss: 0.08351859645505949
*********************************
epoch 2
Train_loss: 2.272494333672835
val_loss: 0.08334110151334113
best loss: 0.08334110151334113
*********************************
epoch 3
Train_loss: 2.2632980288396283
val_loss: 0.08330960764322289
best loss: 0.08330960764322289
*********************************
epoch 4
Train_loss: 2.254164466574615
val_loss: 0.08320081887953958
best loss: 0.08320081887953958
*********************************
epoch 5
Train_loss: 2.24912774700457
val_loss: 0.08303906636518275
best loss: 0.08303906636518275
*********************************
epoch 6
Train_loss: 2.2427601226687153
val_loss: 0.08297035714550616
best loss: 0.08297035714550616
*********************************
epoch 7
Train_loss: 2.235931052882792
val_loss: 0.08305910572318911

epoch 61
Train_loss: 2.066555097100021
val_loss: 0.08267548408287155
best loss: 0.08267119109604491
*********************************
epoch 62
Train_loss: 2.0665150755207846
val_loss: 0.08268407217353765
best loss: 0.08267119109604491
*********************************
epoch 63
Train_loss: 2.0660372259979045
val_loss: 0.08270411134328082
best loss: 0.08267119109604491
*********************************
epoch 64
Train_loss: 2.0664727537494922
val_loss: 0.08270411380359714
best loss: 0.08267119109604491
*********************************
epoch 65
Train_loss: 2.064199784463484
val_loss: 0.08268979809680846
best loss: 0.08267119109604491
*********************************
epoch 66
Train_loss: 2.0663071142743137
val_loss: 0.0827084091528918
best loss: 0.08267119109604491
*********************************
RC 5
initial loss: 0.08777280055309668


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.2465580045131133
val_loss: 0.08721433140283676
best loss: 0.08721433140283676
*********************************
epoch 1
Train_loss: 2.2283364840658058
val_loss: 0.08715793508807067
best loss: 0.08715793508807067
*********************************
epoch 2
Train_loss: 2.2137911945943523
val_loss: 0.08673839501033763
best loss: 0.08673839501033763
*********************************
epoch 3
Train_loss: 2.203138576871024
val_loss: 0.08663936102041471
best loss: 0.08663936102041471
*********************************
epoch 4
Train_loss: 2.1927159067509345
val_loss: 0.08657333221984886
best loss: 0.08657333221984886
*********************************
epoch 5
Train_loss: 2.185965205808691
val_loss: 0.08661322441283635
best loss: 0.08657333221984886
*********************************
epoch 6
Train_loss: 2.1775434337971404
val_loss: 0.08665448948998987
best loss: 0.08657333221984886
*********************************
epoch 7
Train_loss: 2.1696563106444104
val_loss: 0.0865705803088

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.4425815350268474
val_loss: 0.1337474747637227
best loss: 0.1337474747637227
*********************************
epoch 1
Train_loss: 2.412661140705347
val_loss: 0.13309071691615118
best loss: 0.13309071691615118
*********************************
epoch 2
Train_loss: 2.3938118063912346
val_loss: 0.13303858191359544
best loss: 0.13303858191359544
*********************************
epoch 3
Train_loss: 2.3789017552932155
val_loss: 0.13320340261940583
best loss: 0.13303858191359544
*********************************
epoch 4
Train_loss: 2.366728631648255
val_loss: 0.1329847626453665
best loss: 0.1329847626453665
*********************************
epoch 5
Train_loss: 2.3565340044407956
val_loss: 0.13296205952058798
best loss: 0.13296205952058798
*********************************
epoch 6
Train_loss: 2.34670436435907
val_loss: 0.13276949178836783
best loss: 0.13276949178836783
*********************************
epoch 7
Train_loss: 2.339588289320473
val_loss: 0.13261223478521567
be

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.359633783689153
val_loss: 0.14304549512545994
best loss: 0.14304549512545994
*********************************
epoch 1
Train_loss: 2.3466238492789637
val_loss: 0.14233491550728156
best loss: 0.14233491550728156
*********************************
epoch 2
Train_loss: 2.334655339598733
val_loss: 0.14215917687722618
best loss: 0.14215917687722618
*********************************
epoch 3
Train_loss: 2.323437822568578
val_loss: 0.14214771857215727
best loss: 0.14214771857215727
*********************************
epoch 4
Train_loss: 2.316895154955645
val_loss: 0.14169310331231239
best loss: 0.14169310331231239
*********************************
epoch 5
Train_loss: 2.3133929640271687
val_loss: 0.14162051340277823
best loss: 0.14162051340277823
*********************************
epoch 6
Train_loss: 2.302938204261066
val_loss: 0.1425622212620076
best loss: 0.14162051340277823
*********************************
epoch 7
Train_loss: 2.30001761012467
val_loss: 0.14171410290288117
b

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.3322182063363766
val_loss: 0.13794329356829357
best loss: 0.13794329356829357
*********************************
epoch 1
Train_loss: 2.308118819798442
val_loss: 0.1374316714202153
best loss: 0.1374316714202153
*********************************
epoch 2
Train_loss: 2.29718847995026
val_loss: 0.13781785346334613
best loss: 0.1374316714202153
*********************************
epoch 3
Train_loss: 2.2879589873378627
val_loss: 0.13797147866299656
best loss: 0.1374316714202153
*********************************
epoch 4
Train_loss: 2.27669860348091
val_loss: 0.13763885912100793
best loss: 0.1374316714202153
*********************************
epoch 5
Train_loss: 2.268691504675491
val_loss: 0.13735556674474417
best loss: 0.13735556674474417
*********************************
epoch 6
Train_loss: 2.261854059250981
val_loss: 0.136867897009362
best loss: 0.136867897009362
*********************************
epoch 7
Train_loss: 2.2537864133134473
val_loss: 0.1369510536247143
best loss:

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.5394796615387802
val_loss: 0.1320091488111852
best loss: 0.1320091488111852
*********************************
epoch 1
Train_loss: 2.522145614117154
val_loss: 0.13179560508690644
best loss: 0.13179560508690644
*********************************
epoch 2
Train_loss: 2.5070713287653494
val_loss: 0.1316267898008527
best loss: 0.1316267898008527
*********************************
epoch 3
Train_loss: 2.4946091725815376
val_loss: 0.13108571161490856
best loss: 0.13108571161490856
*********************************
epoch 4
Train_loss: 2.4846233453254527
val_loss: 0.13151424196923017
best loss: 0.13108571161490856
*********************************
epoch 5
Train_loss: 2.4765119073669415
val_loss: 0.1313714016497737
best loss: 0.13108571161490856
*********************************
epoch 6
Train_loss: 2.4666475146957696
val_loss: 0.13144498387948456
best loss: 0.13108571161490856
*********************************
epoch 7
Train_loss: 2.4600143899995586
val_loss: 0.131084268964205
b

epoch 61
Train_loss: 2.3790211854564443
val_loss: 0.13102655113232703
best loss: 0.13082311853280787
*********************************
epoch 62
Train_loss: 2.379105977731397
val_loss: 0.13102511805893166
best loss: 0.13082311853280787
*********************************
epoch 63
Train_loss: 2.3793664940774932
val_loss: 0.1310078010694576
best loss: 0.13082311853280787
*********************************
epoch 64
Train_loss: 2.3791584645742354
val_loss: 0.1310164582651904
best loss: 0.13082311853280787
*********************************
epoch 65
Train_loss: 2.3786689722223717
val_loss: 0.13101789793988214
best loss: 0.13082311853280787
*********************************
epoch 66
Train_loss: 2.3783512097323976
val_loss: 0.13104098631447833
best loss: 0.13082311853280787
*********************************
epoch 67
Train_loss: 2.3790035650074586
val_loss: 0.13099336899512998
best loss: 0.13082311853280787
*********************************
epoch 68
Train_loss: 2.37838215847917
val_loss: 0.13097028

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.420536829673506
val_loss: 0.12285628137745225
best loss: 0.12285628137745225
*********************************
epoch 1
Train_loss: 2.408471677985212
val_loss: 0.12279012676191708
best loss: 0.12279012676191708
*********************************
epoch 2
Train_loss: 2.3946849566676662
val_loss: 0.12244235361266607
best loss: 0.12244235361266607
*********************************
epoch 3
Train_loss: 2.3820513452095398
val_loss: 0.12285817348820048
best loss: 0.12244235361266607
*********************************
epoch 4
Train_loss: 2.3716686590829505
val_loss: 0.12269751035425192
best loss: 0.12244235361266607
*********************************
epoch 5
Train_loss: 2.3604817362189947
val_loss: 0.12292999980418108
best loss: 0.12244235361266607
*********************************
epoch 6
Train_loss: 2.355750789612596
val_loss: 0.12290354435771428
best loss: 0.12244235361266607
*********************************
epoch 7
Train_loss: 2.3474701710417327
val_loss: 0.12277123349691

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.8265859252409764
val_loss: 0.2015210063623036
best loss: 0.2015210063623036
*********************************
epoch 1
Train_loss: 2.801085857793164
val_loss: 0.20160062935266526
best loss: 0.2015210063623036
*********************************
epoch 2
Train_loss: 2.787339268658367
val_loss: 0.20099936725670206
best loss: 0.20099936725670206
*********************************
epoch 3
Train_loss: 2.7716377188163803
val_loss: 0.20085797394453028
best loss: 0.20085797394453028
*********************************
epoch 4
Train_loss: 2.7609371533108624
val_loss: 0.20050792552549243
best loss: 0.20050792552549243
*********************************
epoch 5
Train_loss: 2.7477997810329655
val_loss: 0.1998009737438806
best loss: 0.1998009737438806
*********************************
epoch 6
Train_loss: 2.740829755374888
val_loss: 0.20024573127939624
best loss: 0.1998009737438806
*********************************
epoch 7
Train_loss: 2.7314914355692275
val_loss: 0.19965544815229955
be

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.794888310278494
val_loss: 0.2104968049513618
best loss: 0.2104968049513618
*********************************
epoch 1
Train_loss: 2.7681642836739577
val_loss: 0.20927905187396317
best loss: 0.20927905187396317
*********************************
epoch 2
Train_loss: 2.749134115754169
val_loss: 0.20880139266522815
best loss: 0.20880139266522815
*********************************
epoch 3
Train_loss: 2.7335681393212696
val_loss: 0.20844175157229372
best loss: 0.20844175157229372
*********************************
epoch 4
Train_loss: 2.7202498678516607
val_loss: 0.20834039452958666
best loss: 0.20834039452958666
*********************************
epoch 5
Train_loss: 2.7089557278813974
val_loss: 0.20815294125341224
best loss: 0.20815294125341224
*********************************
epoch 6
Train_loss: 2.6989904745651816
val_loss: 0.20877500302043328
best loss: 0.20815294125341224
*********************************
epoch 7
Train_loss: 2.6895535004211437
val_loss: 0.208497297479516

epoch 61
Train_loss: 2.5476695442106596
val_loss: 0.20790161948364855
best loss: 0.2078335779220743
*********************************
epoch 62
Train_loss: 2.5484329709958504
val_loss: 0.20790023143533282
best loss: 0.2078335779220743
*********************************
epoch 63
Train_loss: 2.5482580176495047
val_loss: 0.20789467685664803
best loss: 0.2078335779220743
*********************************
epoch 64
Train_loss: 2.5482614165955857
val_loss: 0.20794604745909273
best loss: 0.2078335779220743
*********************************
epoch 65
Train_loss: 2.5477290059056235
val_loss: 0.20796132892482064
best loss: 0.2078335779220743
*********************************
epoch 66
Train_loss: 2.5478385181768908
val_loss: 0.20790161577586996
best loss: 0.2078335779220743
*********************************
RC 4
initial loss: 0.08557974416772075


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.261501331608823
val_loss: 0.08492390300275376
best loss: 0.08492390300275376
*********************************
epoch 1
Train_loss: 2.2496910797163236
val_loss: 0.08494538907251918
best loss: 0.08492390300275376
*********************************
epoch 2
Train_loss: 2.2378362028690595
val_loss: 0.08486805750755759
best loss: 0.08486805750755759
*********************************
epoch 3
Train_loss: 2.2280516481737327
val_loss: 0.08495541094036983
best loss: 0.08486805750755759
*********************************
epoch 4
Train_loss: 2.2204414254363805
val_loss: 0.08476352738710682
best loss: 0.08476352738710682
*********************************
epoch 5
Train_loss: 2.213045303268084
val_loss: 0.08484658023780882
best loss: 0.08476352738710682
*********************************
epoch 6
Train_loss: 2.207316108550834
val_loss: 0.08472486469328969
best loss: 0.08472486469328969
*********************************
epoch 7
Train_loss: 2.2012551993868716
val_loss: 0.08476495908017

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.196835739481576
val_loss: 0.08528472872137989
best loss: 0.08528472872137989
*********************************
epoch 1
Train_loss: 2.1775297915509206
val_loss: 0.08519265025827895
best loss: 0.08519265025827895
*********************************
epoch 2
Train_loss: 2.167676356922888
val_loss: 0.08534107945401372
best loss: 0.08519265025827895
*********************************
epoch 3
Train_loss: 2.1562380207126193
val_loss: 0.08564891925163719
best loss: 0.08519265025827895
*********************************
epoch 4
Train_loss: 2.1482131840898178
val_loss: 0.08550873984063945
best loss: 0.08519265025827895
*********************************
epoch 5
Train_loss: 2.1398066202896495
val_loss: 0.08555821306619718
best loss: 0.08519265025827895
*********************************
epoch 6
Train_loss: 2.1317645070890197
val_loss: 0.08543315572876604
best loss: 0.08519265025827895
*********************************
epoch 7
Train_loss: 2.125578755972483
val_loss: 0.08555271944136

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.3845708047184053
val_loss: 0.1368383355689548
best loss: 0.1368383355689548
*********************************
epoch 1
Train_loss: 2.356395253566268
val_loss: 0.13623548175588415
best loss: 0.13623548175588415
*********************************
epoch 2
Train_loss: 2.3370805267027013
val_loss: 0.13592270043767946
best loss: 0.13592270043767946
*********************************
epoch 3
Train_loss: 2.3226240306083095
val_loss: 0.1355645138472142
best loss: 0.1355645138472142
*********************************
epoch 4
Train_loss: 2.311177237448061
val_loss: 0.13505919176138434
best loss: 0.13505919176138434
*********************************
epoch 5
Train_loss: 2.2996738967365093
val_loss: 0.1350415383016473
best loss: 0.1350415383016473
*********************************
epoch 6
Train_loss: 2.2918597597632426
val_loss: 0.13457909067404455
best loss: 0.13457909067404455
*********************************
epoch 7
Train_loss: 2.2834914900774095
val_loss: 0.1344100891460775
be

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.30962078846785
val_loss: 0.12813536771524187
best loss: 0.12813536771524187
*********************************
epoch 1
Train_loss: 2.2968799428376414
val_loss: 0.1278413581373224
best loss: 0.1278413581373224
*********************************
epoch 2
Train_loss: 2.290402797376584
val_loss: 0.1276027068519626
best loss: 0.1276027068519626
*********************************
epoch 3
Train_loss: 2.2806376258529992
val_loss: 0.12813727885158532
best loss: 0.1276027068519626
*********************************
epoch 4
Train_loss: 2.272537002625354
val_loss: 0.12779553052920842
best loss: 0.1276027068519626
*********************************
epoch 5
Train_loss: 2.26520524706515
val_loss: 0.12762561852826504
best loss: 0.1276027068519626
*********************************
epoch 6
Train_loss: 2.2610957950720834
val_loss: 0.12717504589706502
best loss: 0.12717504589706502
*********************************
epoch 7
Train_loss: 2.2538022906740505
val_loss: 0.12758361118284237
best l

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.2815968085924614
val_loss: 0.1294799003888879
best loss: 0.1294799003888879
*********************************
epoch 1
Train_loss: 2.266427677510385
val_loss: 0.12947708376193987
best loss: 0.12947708376193987
*********************************
epoch 2
Train_loss: 2.251670820174495
val_loss: 0.12936001645459305
best loss: 0.12936001645459305
*********************************
epoch 3
Train_loss: 2.2444795534901227
val_loss: 0.12935436929986255
best loss: 0.12935436929986255
*********************************
epoch 4
Train_loss: 2.2351179192765107
val_loss: 0.12937552882767822
best loss: 0.12935436929986255
*********************************
epoch 5
Train_loss: 2.227281435468532
val_loss: 0.12933744643992465
best loss: 0.12933744643992465
*********************************
epoch 6
Train_loss: 2.221981066405862
val_loss: 0.12961248360798316
best loss: 0.12933744643992465
*********************************
epoch 7
Train_loss: 2.2126051102320994
val_loss: 0.1296054332655563


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.5082280605190825
val_loss: 0.12448077820536187
best loss: 0.12448077820536187
*********************************
epoch 1
Train_loss: 2.485050114705277
val_loss: 0.12407129053810814
best loss: 0.12407129053810814
*********************************
epoch 2
Train_loss: 2.468172437585161
val_loss: 0.12418415649418224
best loss: 0.12407129053810814
*********************************
epoch 3
Train_loss: 2.454662268444039
val_loss: 0.1240553716201827
best loss: 0.1240553716201827
*********************************
epoch 4
Train_loss: 2.444348415819392
val_loss: 0.12410890400956845
best loss: 0.1240553716201827
*********************************
epoch 5
Train_loss: 2.4345232406984865
val_loss: 0.12414363219868488
best loss: 0.1240553716201827
*********************************
epoch 6
Train_loss: 2.4245177365389723
val_loss: 0.12414219386828705
best loss: 0.1240553716201827
*********************************
epoch 7
Train_loss: 2.4174015599929795
val_loss: 0.12387160762413942
be

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.3734731678713406
val_loss: 0.1382061008601785
best loss: 0.1382061008601785
*********************************
epoch 1
Train_loss: 2.3521061377260044
val_loss: 0.13742687588384772
best loss: 0.13742687588384772
*********************************
epoch 2
Train_loss: 2.3323281307113968
val_loss: 0.13717343814773064
best loss: 0.13717343814773064
*********************************
epoch 3
Train_loss: 2.3236716481399604
val_loss: 0.13703916036669925
best loss: 0.13703916036669925
*********************************
epoch 4
Train_loss: 2.309003938675123
val_loss: 0.13699565549153803
best loss: 0.13699565549153803
*********************************
epoch 5
Train_loss: 2.301147620714393
val_loss: 0.13708076130472807
best loss: 0.13699565549153803
*********************************
epoch 6
Train_loss: 2.2894567709977265
val_loss: 0.13701835150302863
best loss: 0.13699565549153803
*********************************
epoch 7
Train_loss: 2.281075415979924
val_loss: 0.1365076972027898

epoch 61
Train_loss: 2.035018128025377
val_loss: 0.1353937113359789
best loss: 0.13530860196737204
*********************************
epoch 62
Train_loss: 2.0361328695191543
val_loss: 0.13541640909130342
best loss: 0.13530860196737204
*********************************
epoch 63
Train_loss: 2.03793386542631
val_loss: 0.1354655839414818
best loss: 0.13530860196737204
*********************************
epoch 64
Train_loss: 2.0352987756326986
val_loss: 0.13542018705741318
best loss: 0.13530860196737204
*********************************
epoch 65
Train_loss: 2.033357880177234
val_loss: 0.1353539946858824
best loss: 0.13530860196737204
*********************************
epoch 66
Train_loss: 2.035856978742087
val_loss: 0.13541451949369743
best loss: 0.13530860196737204
*********************************
epoch 67
Train_loss: 2.036449203796205
val_loss: 0.13544099569859464
best loss: 0.13530860196737204
*********************************
Epoch    69: reducing learning rate of group 0 to 1.0000e-06.
ep

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.7811933145295353
val_loss: 0.21585192406676204
best loss: 0.21585192406676204
*********************************
epoch 1
Train_loss: 2.7545981898402077
val_loss: 0.21530272912109805
best loss: 0.21530272912109805
*********************************
epoch 2
Train_loss: 2.737073413677963
val_loss: 0.21474254590947128
best loss: 0.21474254590947128
*********************************
epoch 3
Train_loss: 2.7204485027057546
val_loss: 0.21474804135638795
best loss: 0.21474254590947128
*********************************
epoch 4
Train_loss: 2.7074020395936165
val_loss: 0.21383226230599695
best loss: 0.21383226230599695
*********************************
epoch 5
Train_loss: 2.6947038993317123
val_loss: 0.2134862638833597
best loss: 0.2134862638833597
*********************************
epoch 6
Train_loss: 2.6851680416784696
val_loss: 0.2134670493062814
best loss: 0.2134670493062814
*********************************
epoch 7
Train_loss: 2.6752126183693368
val_loss: 0.2137457640964848

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.7573633753183504
val_loss: 0.21160754505454413
best loss: 0.21160754505454413
*********************************
epoch 1
Train_loss: 2.7260616954004226
val_loss: 0.21001096351986703
best loss: 0.21001096351986703
*********************************
epoch 2
Train_loss: 2.704867466724693
val_loss: 0.20895306163165583
best loss: 0.20895306163165583
*********************************
epoch 3
Train_loss: 2.688124850785187
val_loss: 0.20951117447985826
best loss: 0.20895306163165583
*********************************
epoch 4
Train_loss: 2.6738944591122444
val_loss: 0.21019144517686386
best loss: 0.20895306163165583
*********************************
epoch 5
Train_loss: 2.6606468913398813
val_loss: 0.21165057680751997
best loss: 0.20895306163165583
*********************************
epoch 6
Train_loss: 2.6500714536857473
val_loss: 0.2122100794145604
best loss: 0.20895306163165583
*********************************
epoch 7
Train_loss: 2.6408343862259103
val_loss: 0.21255715991685

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.1812981190582037
val_loss: 0.08674427036775395
best loss: 0.08674427036775395
*********************************
epoch 1
Train_loss: 2.1629575166025288
val_loss: 0.0869502824125293
best loss: 0.08674427036775395
*********************************
epoch 2
Train_loss: 2.1488796524088767
val_loss: 0.08654398418127095
best loss: 0.08654398418127095
*********************************
epoch 3
Train_loss: 2.1399397807570772
val_loss: 0.086227818720713
best loss: 0.086227818720713
*********************************
epoch 4
Train_loss: 2.1295558121618114
val_loss: 0.08636801988699093
best loss: 0.086227818720713
*********************************
epoch 5
Train_loss: 2.122622822207881
val_loss: 0.08614341220356883
best loss: 0.08614341220356883
*********************************
epoch 6
Train_loss: 2.1175221218590785
val_loss: 0.08640521684807931
best loss: 0.08614341220356883
*********************************
epoch 7
Train_loss: 2.1085750854188774
val_loss: 0.08634512950617258
b

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.147595991270811
val_loss: 0.08085985949909141
best loss: 0.08085985949909141
*********************************
epoch 1
Train_loss: 2.1252462050312433
val_loss: 0.08043045244159974
best loss: 0.08043045244159974
*********************************
epoch 2
Train_loss: 2.1097998432656513
val_loss: 0.08026804308273545
best loss: 0.08026804308273545
*********************************
epoch 3
Train_loss: 2.097374092343036
val_loss: 0.08020610955931078
best loss: 0.08020610955931078
*********************************
epoch 4
Train_loss: 2.086787109620371
val_loss: 0.07991570683681905
best loss: 0.07991570683681905
*********************************
epoch 5
Train_loss: 2.078637403936497
val_loss: 0.07984964716333542
best loss: 0.07984964716333542
*********************************
epoch 6
Train_loss: 2.0714633227805686
val_loss: 0.07973540539684805
best loss: 0.07973540539684805
*********************************
epoch 7
Train_loss: 2.063874063048926
val_loss: 0.0797175157705822

epoch 61
Train_loss: 1.8392538444165987
val_loss: 0.07915322739126258
best loss: 0.07915322739126258
*********************************
epoch 62
Train_loss: 1.8379989195788244
val_loss: 0.07920277132116661
best loss: 0.07915322739126258
*********************************
epoch 63
Train_loss: 1.838104404366549
val_loss: 0.07927572056724046
best loss: 0.07915322739126258
*********************************
epoch 64
Train_loss: 1.8368728865435804
val_loss: 0.07974091341067033
best loss: 0.07915322739126258
*********************************
epoch 65
Train_loss: 1.8371920847764096
val_loss: 0.07922479499323978
best loss: 0.07915322739126258
*********************************
epoch 66
Train_loss: 1.836147548013685
val_loss: 0.0792619560672351
best loss: 0.07915322739126258
*********************************
epoch 67
Train_loss: 1.836193676674927
val_loss: 0.07926195238701585
best loss: 0.07915322739126258
*********************************
epoch 68
Train_loss: 1.8358255197708282
val_loss: 0.0791945

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.358088693200608
val_loss: 0.12623035398839844
best loss: 0.12623035398839844
*********************************
epoch 1
Train_loss: 2.3209175500680206
val_loss: 0.12598157187837944
best loss: 0.12598157187837944
*********************************
epoch 2
Train_loss: 2.3005691665224086
val_loss: 0.12576220808874164
best loss: 0.12576220808874164
*********************************
epoch 3
Train_loss: 2.2845006476715515
val_loss: 0.12600174875814935
best loss: 0.12576220808874164
*********************************
epoch 4
Train_loss: 2.271457568366418
val_loss: 0.12586138638693872
best loss: 0.12576220808874164
*********************************
epoch 5
Train_loss: 2.2615163244929626
val_loss: 0.1258479402326121
best loss: 0.12576220808874164
*********************************
epoch 6
Train_loss: 2.2514487405945376
val_loss: 0.1257193457932251
best loss: 0.1257193457932251
*********************************
epoch 7
Train_loss: 2.2424534346072784
val_loss: 0.1256857264890952

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.250109491549032
val_loss: 0.1334579991530112
best loss: 0.1334579991530112
*********************************
epoch 1
Train_loss: 2.234819996668264
val_loss: 0.13331861844984702
best loss: 0.13331861844984702
*********************************
epoch 2
Train_loss: 2.22040857518802
val_loss: 0.13343126771260458
best loss: 0.13331861844984702
*********************************
epoch 3
Train_loss: 2.21673153737909
val_loss: 0.13337017677675395
best loss: 0.13331861844984702
*********************************
epoch 4
Train_loss: 2.204148754701693
val_loss: 0.13307041960638044
best loss: 0.13307041960638044
*********************************
epoch 5
Train_loss: 2.199157607700951
val_loss: 0.13337017181091426
best loss: 0.13307041960638044
*********************************
epoch 6
Train_loss: 2.189171820502426
val_loss: 0.13312578530663755
best loss: 0.13307041960638044
*********************************
epoch 7
Train_loss: 2.1840952683350885
val_loss: 0.13348854797088525
best

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.2359419846410686
val_loss: 0.1343689009758195
best loss: 0.1343689009758195
*********************************
epoch 1
Train_loss: 2.21508984500339
val_loss: 0.1343026424719216
best loss: 0.1343026424719216
*********************************
epoch 2
Train_loss: 2.2027489140112873
val_loss: 0.13447745439533798
best loss: 0.1343026424719216
*********************************
epoch 3
Train_loss: 2.192578002300423
val_loss: 0.13442106829440573
best loss: 0.1343026424719216
*********************************
epoch 4
Train_loss: 2.1823349305525093
val_loss: 0.1346113829597624
best loss: 0.1343026424719216
*********************************
epoch 5
Train_loss: 2.1725057073381038
val_loss: 0.1341630818022443
best loss: 0.1341630818022443
*********************************
epoch 6
Train_loss: 2.1677463059660074
val_loss: 0.1350385446812301
best loss: 0.1341630818022443
*********************************
epoch 7
Train_loss: 2.1589262280056736
val_loss: 0.13442247354101952
best los

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.5319979988063674
val_loss: 0.13140300134466795
best loss: 0.13140300134466795
*********************************
epoch 1
Train_loss: 2.49161121744298
val_loss: 0.13080661106451807
best loss: 0.13080661106451807
*********************************
epoch 2
Train_loss: 2.4740242169697884
val_loss: 0.1305654692006476
best loss: 0.1305654692006476
*********************************
epoch 3
Train_loss: 2.4605974228389
val_loss: 0.13041672652459702
best loss: 0.13041672652459702
*********************************
epoch 4
Train_loss: 2.448742604601125
val_loss: 0.13052069735031813
best loss: 0.13041672652459702
*********************************
epoch 5
Train_loss: 2.439064066170397
val_loss: 0.13059434313814877
best loss: 0.13041672652459702
*********************************
epoch 6
Train_loss: 2.4300581351768713
val_loss: 0.13077628855142331
best loss: 0.13041672652459702
*********************************
epoch 7
Train_loss: 2.4212781912114405
val_loss: 0.13046293587603583
be

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.4076128638083825
val_loss: 0.13765155064623766
best loss: 0.13765155064623766
*********************************
epoch 1
Train_loss: 2.3757203912609577
val_loss: 0.13726566760596653
best loss: 0.13726566760596653
*********************************
epoch 2
Train_loss: 2.358008182236368
val_loss: 0.13730349605357256
best loss: 0.13726566760596653
*********************************
epoch 3
Train_loss: 2.34817840446869
val_loss: 0.1372845828443363
best loss: 0.13726566760596653
*********************************
epoch 4
Train_loss: 2.335458022975501
val_loss: 0.13720702838210982
best loss: 0.13720702838210982
*********************************
epoch 5
Train_loss: 2.327963282384586
val_loss: 0.1371332561634415
best loss: 0.1371332561634415
*********************************
epoch 6
Train_loss: 2.3163436072533736
val_loss: 0.13714082690755397
best loss: 0.1371332561634415
*********************************
epoch 7
Train_loss: 2.30847474889305
val_loss: 0.1374207746484292
best 

epoch 62
Train_loss: 2.0273366318142103
val_loss: 0.13654688172880725
best loss: 0.13619126201995843
*********************************
epoch 63
Train_loss: 2.027167390173227
val_loss: 0.13652606757146926
best loss: 0.13619126201995843
*********************************
epoch 64
Train_loss: 2.0204872309670012
val_loss: 0.136041824708154
best loss: 0.136041824708154
*********************************
epoch 65
Train_loss: 2.017887701017333
val_loss: 0.1369913939013937
best loss: 0.136041824708154
*********************************
epoch 66
Train_loss: 2.021271436548239
val_loss: 0.13647689278129418
best loss: 0.136041824708154
*********************************
epoch 67
Train_loss: 2.013286276417766
val_loss: 0.136446616641192
best loss: 0.136041824708154
*********************************
epoch 68
Train_loss: 2.00883226898676
val_loss: 0.13645797941769786
best loss: 0.136041824708154
*********************************
epoch 69
Train_loss: 2.005314535052886
val_loss: 0.13645040419834562
best lo

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.793462978482492
val_loss: 0.20495663584209797
best loss: 0.20495663584209797
*********************************
epoch 1
Train_loss: 2.7645801954284073
val_loss: 0.20561557851704135
best loss: 0.20495663584209797
*********************************
epoch 2
Train_loss: 2.7464707306205622
val_loss: 0.20473149288063797
best loss: 0.20473149288063797
*********************************
epoch 3
Train_loss: 2.7283612451927852
val_loss: 0.20411235785238727
best loss: 0.20411235785238727
*********************************
epoch 4
Train_loss: 2.7184046854254533
val_loss: 0.20417276282162433
best loss: 0.20411235785238727
*********************************
epoch 5
Train_loss: 2.7064901556417156
val_loss: 0.20386661959885155
best loss: 0.20386661959885155
*********************************
epoch 6
Train_loss: 2.69599008126178
val_loss: 0.20420571085421327
best loss: 0.20386661959885155
*********************************
epoch 7
Train_loss: 2.6840239603328744
val_loss: 0.20402173954448

epoch 61
Train_loss: 2.4133230509327226
val_loss: 0.20304980325794295
best loss: 0.2028397475673896
*********************************
Epoch    63: reducing learning rate of group 0 to 1.0000e-06.
epoch 62
Train_loss: 2.4113355851162437
val_loss: 0.20298251677562754
best loss: 0.2028397475673896
*********************************
epoch 63
Train_loss: 2.411233169866518
val_loss: 0.20296056999100578
best loss: 0.2028397475673896
*********************************
epoch 64
Train_loss: 2.4114771486934727
val_loss: 0.20299762371426283
best loss: 0.2028397475673896
*********************************
epoch 65
Train_loss: 2.4124605220576534
val_loss: 0.20301135565952452
best loss: 0.2028397475673896
*********************************
epoch 66
Train_loss: 2.41187621606489
val_loss: 0.20296879624004036
best loss: 0.2028397475673896
*********************************
RC 3
initial loss: 0.22119709825112285


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.7721487663456728
val_loss: 0.2200435173315424
best loss: 0.2200435173315424
*********************************
epoch 1
Train_loss: 2.733301702567087
val_loss: 0.22028227393599578
best loss: 0.2200435173315424
*********************************
epoch 2
Train_loss: 2.711408803617722
val_loss: 0.21982000809054503
best loss: 0.21982000809054503
*********************************
epoch 3
Train_loss: 2.6945425209787346
val_loss: 0.21983666438802457
best loss: 0.21982000809054503
*********************************
epoch 4
Train_loss: 2.6802998885258096
val_loss: 0.2192189256455537
best loss: 0.2192189256455537
*********************************
epoch 5
Train_loss: 2.668544168494793
val_loss: 0.21899958942792047
best loss: 0.21899958942792047
*********************************
epoch 6
Train_loss: 2.6565611266671483
val_loss: 0.21894822540510686
best loss: 0.21894822540510686
*********************************
epoch 7
Train_loss: 2.647377064794143
val_loss: 0.21875666589056234
be

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.185035124644126
val_loss: 0.08908345465147902
best loss: 0.08908345465147902
*********************************
epoch 1
Train_loss: 2.167429111379961
val_loss: 0.08928613652657139
best loss: 0.08908345465147902
*********************************
epoch 2
Train_loss: 2.1573285795242807
val_loss: 0.08912341809165908
best loss: 0.08908345465147902
*********************************
epoch 3
Train_loss: 2.145569190067835
val_loss: 0.08896212847942513
best loss: 0.08896212847942513
*********************************
epoch 4
Train_loss: 2.1376062957652553
val_loss: 0.08878941230293318
best loss: 0.08878941230293318
*********************************
epoch 5
Train_loss: 2.131308058658219
val_loss: 0.08862383553392451
best loss: 0.08862383553392451
*********************************
epoch 6
Train_loss: 2.1239096071122874
val_loss: 0.08874373448375368
best loss: 0.08862383553392451
*********************************
epoch 7
Train_loss: 2.116888895580329
val_loss: 0.0886837861473092

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.1470270089717354
val_loss: 0.08312921316418558
best loss: 0.08312921316418558
*********************************
epoch 1
Train_loss: 2.125877301904744
val_loss: 0.08309485448928114
best loss: 0.08309485448928114
*********************************
epoch 2
Train_loss: 2.111738151774986
val_loss: 0.0830769847564616
best loss: 0.0830769847564616
*********************************
epoch 3
Train_loss: 2.1021928376546803
val_loss: 0.08312783973561853
best loss: 0.0830769847564616
*********************************
epoch 4
Train_loss: 2.0929337679196913
val_loss: 0.08302613583323189
best loss: 0.08302613583323189
*********************************
epoch 5
Train_loss: 2.0818933560407795
val_loss: 0.08289419880607607
best loss: 0.08289419880607607
*********************************
epoch 6
Train_loss: 2.0757276995880374
val_loss: 0.08282960146871822
best loss: 0.08282960146871822
*********************************
epoch 7
Train_loss: 2.0677949164582627
val_loss: 0.0829629163110082

epoch 62
Train_loss: 1.8126267981043735
val_loss: 0.08257534423543558
best loss: 0.08233207610208852
*********************************
epoch 63
Train_loss: 1.8105688272188811
val_loss: 0.0827814991652038
best loss: 0.08233207610208852
*********************************
epoch 64
Train_loss: 1.807666114535395
val_loss: 0.08242003409693605
best loss: 0.08233207610208852
*********************************
Epoch    66: reducing learning rate of group 0 to 1.0000e-05.
epoch 65
Train_loss: 1.80208579242905
val_loss: 0.08242415916480528
best loss: 0.08233207610208852
*********************************
epoch 66
Train_loss: 1.7865756057635127
val_loss: 0.08241591269085123
best loss: 0.08233207610208852
*********************************
epoch 67
Train_loss: 1.7864258899640175
val_loss: 0.08243515509453683
best loss: 0.08233207610208852
*********************************
epoch 68
Train_loss: 1.784800476349009
val_loss: 0.08245439684160652
best loss: 0.08233207610208852
********************************

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.4318164737893664
val_loss: 0.12251694002903722
best loss: 0.12251694002903722
*********************************
epoch 1
Train_loss: 2.331686995682461
val_loss: 0.12112858479973738
best loss: 0.12112858479973738
*********************************
epoch 2
Train_loss: 2.3086435905318887
val_loss: 0.12105631159535397
best loss: 0.12105631159535397
*********************************
epoch 3
Train_loss: 2.2917549330087854
val_loss: 0.12088822337718125
best loss: 0.12088822337718125
*********************************
epoch 4
Train_loss: 2.2784845695466296
val_loss: 0.12085040591672125
best loss: 0.12085040591672125
*********************************
epoch 5
Train_loss: 2.26645043917614
val_loss: 0.12073022656597662
best loss: 0.12073022656597662
*********************************
epoch 6
Train_loss: 2.2563465878161257
val_loss: 0.12090251182787534
best loss: 0.12073022656597662
*********************************
epoch 7
Train_loss: 2.2462978626492163
val_loss: 0.12081006582800

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.269825620407983
val_loss: 0.12462696991967075
best loss: 0.12462696991967075
*********************************
epoch 1
Train_loss: 2.2561330905220434
val_loss: 0.12490757388477267
best loss: 0.12462696991967075
*********************************
epoch 2
Train_loss: 2.24236353426147
val_loss: 0.12493621366512585
best loss: 0.12462696991967075
*********************************
epoch 3
Train_loss: 2.2360933684641475
val_loss: 0.12496102476954936
best loss: 0.12462696991967075
*********************************
epoch 4
Train_loss: 2.227587678778853
val_loss: 0.12487130969562119
best loss: 0.12462696991967075
*********************************
epoch 5
Train_loss: 2.217251336289735
val_loss: 0.12461742444049914
best loss: 0.12461742444049914
*********************************
epoch 6
Train_loss: 2.214320889581212
val_loss: 0.12512900181204292
best loss: 0.12461742444049914
*********************************
epoch 7
Train_loss: 2.2035326292472375
val_loss: 0.12497820378963415

epoch 61
Train_loss: 2.137833622458648
val_loss: 0.12446089736708034
best loss: 0.1243730914598731
*********************************
epoch 62
Train_loss: 2.1352061545803367
val_loss: 0.12442463025466487
best loss: 0.1243730914598731
*********************************
epoch 63
Train_loss: 2.139108243850689
val_loss: 0.12446280460161481
best loss: 0.1243730914598731
*********************************
epoch 64
Train_loss: 2.136852962948374
val_loss: 0.12446090496432989
best loss: 0.1243730914598731
*********************************
epoch 65
Train_loss: 2.1363663434074325
val_loss: 0.12447235351809982
best loss: 0.1243730914598731
*********************************
epoch 66
Train_loss: 2.1378215043948443
val_loss: 0.124416997663155
best loss: 0.1243730914598731
*********************************
epoch 67
Train_loss: 2.139220036145196
val_loss: 0.12444372145981653
best loss: 0.1243730914598731
*********************************
epoch 68
Train_loss: 2.134768168395981
val_loss: 0.12445135271916767

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.236037725001917
val_loss: 0.13479460957817962
best loss: 0.13479460957817962
*********************************
epoch 1
Train_loss: 2.21579903479023
val_loss: 0.1340819631219364
best loss: 0.1340819631219364
*********************************
epoch 2
Train_loss: 2.2037986380281307
val_loss: 0.13408899362507906
best loss: 0.1340819631219364
*********************************
epoch 3
Train_loss: 2.1932824489633242
val_loss: 0.1340143486286635
best loss: 0.1340143486286635
*********************************
epoch 4
Train_loss: 2.1809204175573824
val_loss: 0.13423546771011022
best loss: 0.1340143486286635
*********************************
epoch 5
Train_loss: 2.1716083070595253
val_loss: 0.1340594183597172
best loss: 0.1340143486286635
*********************************
epoch 6
Train_loss: 2.1676516214583668
val_loss: 0.1344270166424614
best loss: 0.1340143486286635
*********************************
epoch 7
Train_loss: 2.1593049128844215
val_loss: 0.1342312416891807
best lo

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.4978064472892005
val_loss: 0.13064049165187247
best loss: 0.13064049165187247
*********************************
epoch 1
Train_loss: 2.4781394663299596
val_loss: 0.13038008273006027
best loss: 0.13038008273006027
*********************************
epoch 2
Train_loss: 2.4609909898145164
val_loss: 0.13039309708496047
best loss: 0.13038008273006027
*********************************
epoch 3
Train_loss: 2.447859313563218
val_loss: 0.1303337907602485
best loss: 0.1303337907602485
*********************************
epoch 4
Train_loss: 2.4348583446401872
val_loss: 0.13042348294391032
best loss: 0.1303337907602485
*********************************
epoch 5
Train_loss: 2.426320654390379
val_loss: 0.1302874913142507
best loss: 0.1302874913142507
*********************************
epoch 6
Train_loss: 2.4174471152398604
val_loss: 0.1303106392818178
best loss: 0.1302874913142507
*********************************
epoch 7
Train_loss: 2.4093992521971135
val_loss: 0.13045387795827493
be

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.3963221268042214
val_loss: 0.13347055134772806
best loss: 0.13347055134772806
*********************************
epoch 1
Train_loss: 2.3775544412101057
val_loss: 0.1331203862323844
best loss: 0.1331203862323844
*********************************
epoch 2
Train_loss: 2.3632744315488954
val_loss: 0.1327588738340478
best loss: 0.1327588738340478
*********************************
epoch 3
Train_loss: 2.351234905430586
val_loss: 0.13269641403603996
best loss: 0.13269641403603996
*********************************
epoch 4
Train_loss: 2.3373736125215396
val_loss: 0.13257907132845273
best loss: 0.13257907132845273
*********************************
epoch 5
Train_loss: 2.3306701668342575
val_loss: 0.13289705062525226
best loss: 0.13257907132845273
*********************************
epoch 6
Train_loss: 2.3210298509613887
val_loss: 0.13274184211972642
best loss: 0.13257907132845273
*********************************
epoch 7
Train_loss: 2.313692154320792
val_loss: 0.1327645590288286


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.7721939184820052
val_loss: 0.22265050474726047
best loss: 0.22265050474726047
*********************************
epoch 1
Train_loss: 2.747597185467589
val_loss: 0.22036051353516095
best loss: 0.22036051353516095
*********************************
epoch 2
Train_loss: 2.730431325635751
val_loss: 0.21926363613661157
best loss: 0.21926363613661157
*********************************
epoch 3
Train_loss: 2.713031035710421
val_loss: 0.21876879587040562
best loss: 0.21876879587040562
*********************************
epoch 4
Train_loss: 2.7024341242140175
val_loss: 0.21925538088602656
best loss: 0.21876879587040562
*********************************
epoch 5
Train_loss: 2.690410907067338
val_loss: 0.2188622694969042
best loss: 0.21876879587040562
*********************************
epoch 6
Train_loss: 2.678760326693035
val_loss: 0.21899010368058322
best loss: 0.21876879587040562
*********************************
epoch 7
Train_loss: 2.672421631490196
val_loss: 0.21863684135541644


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.7628561764026283
val_loss: 0.2130864027238
best loss: 0.2130864027238
*********************************
epoch 1
Train_loss: 2.735164800115989
val_loss: 0.2126570868119649
best loss: 0.2126570868119649
*********************************
epoch 2
Train_loss: 2.7140488386071473
val_loss: 0.21262652117900127
best loss: 0.21262652117900127
*********************************
epoch 3
Train_loss: 2.6974812231294414
val_loss: 0.212488959372066
best loss: 0.212488959372066
*********************************
epoch 4
Train_loss: 2.6835089627764748
val_loss: 0.21252509015626936
best loss: 0.212488959372066
*********************************
epoch 5
Train_loss: 2.671735154303827
val_loss: 0.21267098793587846
best loss: 0.212488959372066
*********************************
epoch 6
Train_loss: 2.659986943344705
val_loss: 0.21253204332656989
best loss: 0.212488959372066
*********************************
epoch 7
Train_loss: 2.649609797994462
val_loss: 0.2122930550542155
best loss: 0.21229

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.191663262517894
val_loss: 0.08532822652853037
best loss: 0.08532822652853037
*********************************
epoch 1
Train_loss: 2.175351456765594
val_loss: 0.08536116190245886
best loss: 0.08532822652853037
*********************************
epoch 2
Train_loss: 2.162767233075256
val_loss: 0.08530245704313923
best loss: 0.08530245704313923
*********************************
epoch 3
Train_loss: 2.1489383428574316
val_loss: 0.08533395054792672
best loss: 0.08530245704313923
*********************************
epoch 4
Train_loss: 2.14089159709297
val_loss: 0.08521081223692994
best loss: 0.08521081223692994
*********************************
epoch 5
Train_loss: 2.134786294027696
val_loss: 0.08538407031543056
best loss: 0.08521081223692994
*********************************
epoch 6
Train_loss: 2.1261470880240503
val_loss: 0.08538120871720024
best loss: 0.08521081223692994
*********************************
epoch 7
Train_loss: 2.1188825453475815
val_loss: 0.08533681987519735

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.1510847184341046
val_loss: 0.09130401241826007
best loss: 0.09130401241826007
*********************************
epoch 1
Train_loss: 2.1282990297050666
val_loss: 0.09127372957215377
best loss: 0.09127372957215377
*********************************
epoch 2
Train_loss: 2.11439966821368
val_loss: 0.0912957502025573
best loss: 0.09127372957215377
*********************************
epoch 3
Train_loss: 2.101455495106125
val_loss: 0.09131502097281999
best loss: 0.09127372957215377
*********************************
epoch 4
Train_loss: 2.0930662407854204
val_loss: 0.09129988018651249
best loss: 0.09127372957215377
*********************************
epoch 5
Train_loss: 2.081983921072521
val_loss: 0.09122968038565028
best loss: 0.09122968038565028
*********************************
epoch 6
Train_loss: 2.0767931262218897
val_loss: 0.09158480305591168
best loss: 0.09122968038565028
*********************************
epoch 7
Train_loss: 2.0668839503373326
val_loss: 0.0915228642896
be

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.3483353422756776
val_loss: 0.13085893455654227
best loss: 0.13085893455654227
*********************************
epoch 1
Train_loss: 2.3166884273043427
val_loss: 0.13040904677736467
best loss: 0.13040904677736467
*********************************
epoch 2
Train_loss: 2.2957744383353713
val_loss: 0.13034513318293908
best loss: 0.13034513318293908
*********************************
epoch 3
Train_loss: 2.279309517393562
val_loss: 0.13034933725641232
best loss: 0.13034513318293908
*********************************
epoch 4
Train_loss: 2.2665682704403247
val_loss: 0.13053266057510549
best loss: 0.13034513318293908
*********************************
epoch 5
Train_loss: 2.2556201491102277
val_loss: 0.13060329469732126
best loss: 0.13034513318293908
*********************************
epoch 6
Train_loss: 2.2459974832695813
val_loss: 0.13047127432621955
best loss: 0.13034513318293908
*********************************
epoch 7
Train_loss: 2.2378153072970663
val_loss: 0.130402317972

epoch 62
Train_loss: 1.9666072268917296
val_loss: 0.12907703402773113
best loss: 0.12887437485108716
*********************************
epoch 63
Train_loss: 1.963075442418857
val_loss: 0.12902573711229198
best loss: 0.12887437485108716
*********************************
epoch 64
Train_loss: 1.9611523916209235
val_loss: 0.12936210335217144
best loss: 0.12887437485108716
*********************************
epoch 65
Train_loss: 1.9561748294930705
val_loss: 0.12900051260889053
best loss: 0.12887437485108716
*********************************
Epoch    67: reducing learning rate of group 0 to 1.0000e-05.
epoch 66
Train_loss: 1.9526620239308725
val_loss: 0.12910226410868558
best loss: 0.12887437485108716
*********************************
epoch 67
Train_loss: 1.9413193776792477
val_loss: 0.12922419629063675
best loss: 0.12887437485108716
*********************************
epoch 68
Train_loss: 1.9396076363506458
val_loss: 0.1289887346314115
best loss: 0.12887437485108716
*****************************

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.264809517598096
val_loss: 0.13999152456526454
best loss: 0.13999152456526454
*********************************
epoch 1
Train_loss: 2.2496974734319215
val_loss: 0.14097845025924619
best loss: 0.13999152456526454
*********************************
epoch 2
Train_loss: 2.2405687763538844
val_loss: 0.14077227721760557
best loss: 0.13999152456526454
*********************************
epoch 3
Train_loss: 2.231676367624158
val_loss: 0.14014233465769643
best loss: 0.13999152456526454
*********************************
epoch 4
Train_loss: 2.2220032978421687
val_loss: 0.14052603293043972
best loss: 0.13999152456526454
*********************************
epoch 5
Train_loss: 2.213276079499444
val_loss: 0.14045158418004075
best loss: 0.13999152456526454
*********************************
epoch 6
Train_loss: 2.2063682255714356
val_loss: 0.14010797808315203
best loss: 0.13999152456526454
*********************************
epoch 7
Train_loss: 2.2014271825466913
val_loss: 0.14011751991880

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

epoch 0
Train_loss: 2.24332124418199
val_loss: 0.1320573631272611
best loss: 0.1320573631272611
*********************************
epoch 1
Train_loss: 2.2224780234381503
val_loss: 0.1325689936971083
best loss: 0.1320573631272611
*********************************
epoch 2
Train_loss: 2.211323987185134
val_loss: 0.1324069070082678
best loss: 0.1320573631272611
*********************************
epoch 3
Train_loss: 2.197756012815362
val_loss: 0.13224200278047968
best loss: 0.1320573631272611
*********************************
epoch 4
Train_loss: 2.1883291049109226
val_loss: 0.13203199229919838
best loss: 0.13203199229919838
*********************************
epoch 5
Train_loss: 2.179778395726114
val_loss: 0.13207004864630176
best loss: 0.13203199229919838
*********************************
epoch 6
Train_loss: 2.1752946905041406
val_loss: 0.13203622099385495
best loss: 0.13203199229919838
*********************************
epoch 7
Train_loss: 2.1627942228658563
val_loss: 0.13236744155933547
best 