In [1]:
import os
try:
    os.chdir('src')
except:
    pass
print(os.getcwd())

/home/shamvinc/ssl_time_series/mvts_transformer/src


In [2]:
%load_ext autoreload
%autoreload 2
%reload_ext autoreload


<center><img src="../img/img_0.PNG"  width="1000" height="240"/></center>


# SimMTM is a simple self-supervised learning framework for Masked Time-Series Modeling.

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import numpy as np
from tqdm import tqdm
import copy



from datasets.datasplit import split_dataset
from datasets.data import data_factory, Normalizer, TSRegressionArchive, CSVRegressionArchive
from datasets.datasplit import split_dataset
from datasets.dataset import collate_superv
from models.ts_transformer import model_factory
from models.loss import get_loss_module, contrastive_loss
from optimizers import get_optimizer

from options import Options
from running import setup


# Masked Modeling

<img src="../img/img_1.PNG"  width="900" height="240"/>

<img src="../img/img_2.PNG"  width="900" height="240"/>

### Random Masking is not a good choice to learn a good representation. 

<img src="../img/img_3.PNG" width="600"/>

### Therefore, we randomly mask a sequence of points in a serie.

In [4]:
def geom_noise_mask_single(L, lm, masking_ratio):
    """
    Randomly create a boolean mask of length `L`, consisting of subsequences of average length lm, masking with 0s a `masking_ratio`
    proportion of the sequence L. The length of masking subsequences and intervals follow a geometric distribution.
    Args:
        L: length of mask and sequence to be masked
        lm: average length of masking subsequences (streaks of 0s)
        masking_ratio: proportion of L to be masked

    Returns:
        (L,) boolean numpy array intended to mask ('drop') with 0s a sequence of length L
    """
    keep_mask = np.ones(L, dtype=bool)
    p_m = 1 / lm  # probability of each masking sequence stopping. parameter of geometric distribution.
    p_u = p_m * masking_ratio / (1 - masking_ratio)  # probability of each unmasked sequence stopping. parameter of geometric distribution.
    p = [p_m, p_u]

    # Start in state 0 with masking_ratio probability
    state = int(np.random.rand() > masking_ratio)  # state 0 means masking, 1 means not masking
    for i in range(L):
        keep_mask[i] = state  # here it happens that state and masking value corresponding to state are identical
        if np.random.rand() < p[state]:
            state = 1 - state

    return keep_mask

# SimMTM ultilizes both contrastive learning and mask modeling to learn the data representation.


# 1 - Contrastive learning

<img src="../img/img_5.png"/>

### The contrastive loss is the following: (Eq. 8 in the paper)

<center><img src="../img/img_6.PNG"/><center/>

In [5]:
def demo_contrastive_loss(z, batch_size):
    s = s.squeeze(-1) 

    B = s.shape[0]
    v = s.reshape(B, -1)

    norm_v = torch.norm(v, p=2, dim=-1).unsqueeze(-1)
    v = v/norm_v
    u = torch.transpose(v, 0, 1)

    R = torch.matmul(v,u)

 
    R = torch.exp(R/tau) # (batch + mask size) x (batch + mask size)
    
    # number of masks
    M = B//batch_size
    mask = torch.eye(batch_size, device=R.device).repeat_interleave(M,dim=0).repeat_interleave(M,dim=1)

    denom = R * (torch.ones_like(R) - torch.eye(R.shape[0], device=R.device))

    denom = R.sum(-1).unsqueeze(-1)

    loss = torch.log(R/denom)
    

    loss = (loss * (mask - torch.eye(R.shape[0], device=R.device))).sum(1)/(M-1) # except no masked unit
    loss = loss.mean(0)
    
    return -loss


# 2 - Masked Modeling

### SimMTM proposes to recover a time serie by the weighted sum of multiple masked points, which eases the reconstruction task by assembling ruined but complementary temporal variations.

<img src="../img/img_4.png"/>

In [6]:


class DemoSimMTMTransformerEncoder(nn.Module):

    def forward(self, X):
        """
        Args:
            X: (batch_size, seq_length, feat_dim) torch tensor of masked features (input)
            padding_masks: (batch_size, seq_length) boolean tensor, 1 means keep vector at this position, 0 means padding
        Returns:
            output: (batch_size, seq_length, feat_dim)
        """

        # permute because pytorch convention for transformers is [seq_length, batch_size, feat_dim]. padding_masks [batch_size, feat_dim]
        _x = X
     
        for i in range(self.temporal_unit):
            mask = geom_noise_mask_single(X.shape[0] * X.shape[1] * X.shape[2], 5, 0.3)
            mask = mask.reshape(X.shape[0], X.shape[1], X.shape[2])
            mask = torch.from_numpy(mask).to(X.device)
            x_masked = mask * X
            _x = torch.cat([_x, x_masked], axis=-1) # [batch_size, seq_length, feat_dim * temporal_unit]
    
        
        _x = _x.reshape(X.shape[0] * (self.temporal_unit + 1), X.shape[1], X.shape[2])
  

        inp = _x.permute(1, 0, 2)
        inp = self.project_inp(inp) * math.sqrt(self.d_model)  # [seq_length, batch_size, d_model] project input vectors to d_model dimensional space
        inp = self.pos_enc(inp)  # add positional encoding

        
        output = self.transformer_encoder(inp)  # (seq_length, batch_size, d_model)
        output = self.act(output)  # the output transformer encoder/decoder embeddings don't include non-linearity
        output = output.permute(1, 0, 2)  # (batch_size, seq_length, d_model)
        output = self.dropout1(output)

        z_hat = self.project(output)
        # Most probably defining a Linear(d_model,feat_dim) vectorizes the operation over (seq_length, batch_size).
        output = self.output_layer(z_hat)  # (batch_size, seq_length, feat_dim)

        return output

    
    def project(self, z, tau=0.02):
        _z = z.transpose(1, 2) # [batch_size, d_model, seq_length]
        s = self.projector_layer(_z) # [batch_size, d_model, 1]
        s = s.squeeze(-1) 

        B = s.shape[0]
        v = s.reshape(B, -1)

        norm_v = torch.norm(v, p=2, dim=-1).unsqueeze(-1)
        v = v/norm_v
        u = torch.transpose(v, 0, 1)
        
        R = torch.matmul(v,u)
     
  
        R = torch.exp(R/tau) # (batch + mask size) x (batch + mask size)
        R = R * (torch.ones_like(R) - torch.eye(R.shape[0], device=R.device)) # zero out the weight of no masked component
        R = R/R.sum(-1).unsqueeze(-1)
        M = self.temporal_unit + 1
        R = R[::M] # extract every no mask unit # (batch size) x (batch + mask size)

        z_hat = (R.unsqueeze(-1).unsqueeze(-1) * z.unsqueeze(0)).sum(1) 
        return z_hat




# Data Loading and Preparation

In [7]:
args = Options().parse()  # `argsparse` object
args.data_dir = '../datasets/BeijingPM25Quality'
args.task = 'regression'
args.output_dir = '../experiments'
config = setup(args)
# config = setup(args)  # configuration dictionary

2023-08-23 03:25:16,988 | INFO : Stored configuration file in '../experiments/_2023-08-23_03-25-16_GlX'


In [8]:
data = TSRegressionArchive(config['data_dir'], pattern='TRAIN', config=config)
test_data = TSRegressionArchive(config['data_dir'], pattern='TEST', config=config)

11942it [00:48, 245.72it/s]
5072it [00:20, 252.53it/s]


In [36]:
data.feature_df.loc[11917]

Unnamed: 0,dim_0,dim_1,dim_2,dim_3,dim_4,dim_5,dim_6,dim_7,dim_8
11917,7.0,59.0,800.0,14.0,-1.4,1030.2,-11.3,0.0,0.9
11917,8.0,59.0,900.0,11.0,-1.6,1030.2,-10.2,0.0,0.8
11917,7.0,57.0,900.0,11.0,-1.6,1030.4,-10.7,0.0,1.6
11917,6.0,59.0,900.0,9.0,-3.0,1030.5,-10.3,0.0,1.1
11917,5.0,53.0,800.0,15.0,-2.4,1030.4,-10.5,0.0,1.2
11917,5.0,39.0,600.0,29.0,-3.7,1030.1,-10.8,0.0,0.7
11917,6.0,47.0,600.0,18.0,-5.8,1030.0,-10.6,0.0,1.0
11917,4.0,52.0,600.0,15.0,-5.5,1030.2,-11.0,0.0,0.7
11917,4.0,61.0,800.0,8.0,-5.1,1030.2,-10.9,0.0,1.0
11917,4.0,58.0,800.0,12.0,-3.0,1030.7,-10.8,0.0,1.5


In [10]:
data.labels_df

Unnamed: 0,0
0,24.0
1,93.0
2,117.0
3,58.0
4,226.0
...,...
11913,89.0
11914,281.0
11915,543.0
11916,505.0


In [10]:
train_indices, val_indices, _ = split_dataset(data_indices=data.all_IDs,
                                                         validation_method='ShuffleSplit',
                                                         n_splits=1,
                                                         validation_ratio=0.2,
                                                         test_set_ratio=0,  # used only if test_indices not explicitly specified
                                                         test_indices=None,
                                                         random_seed=1337,
                                                         labels=None)
train_indices = train_indices[0]
val_indices = val_indices[0]
test_indices = np.array(test_data.all_IDs)

In [11]:
normalizer = Normalizer(config['normalization'])
data.feature_df = normalizer.normalize(data.feature_df)
test_data.feature_df = normalizer.normalize(test_data.feature_df)

In [37]:
config['task'] = 'simmtm'
config['normalization_layer'] = 'BatchNorm'
config['out_len'] = 1
config['out_dim'] = 1
config['d_model'] = 8
config['dim_feedforward'] = 32
config['num_heads'] = 4
config['num_layers'] = 2
# SimMTMTransformerEncoder
from models.ts_transformer import model_factory
model = model_factory(config, data)
device = "cuda"
model.to(device)
model.tau = 0.05
min_tau = 0.05
model.mask_length = data.feature_df.loc[0].shape[0]//2
model.mask_ratio = .5

In [38]:
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)

In [39]:
from torch.utils.data import DataLoader
batch_size = 64
max_len = 24
train_dataloader = DataLoader(train_indices, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_indices, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_indices, batch_size=batch_size, shuffle=True)



# Pre-Training Loop

In [40]:

i = 0
max_epoch = 0
best_loss = 1e10
best_model = copy.deepcopy(model)
best_epoch = 0
loss_fn = nn.MSELoss()


while i < max_epoch:
    train_loss = { "loss": [], "loss_mse": [], "loss_con": []}
    progress_bar = tqdm(train_dataloader)
    
    for IDs in progress_bar:
        model.train()
        X = torch.tensor(data.feature_df.loc[IDs].to_numpy()).to(device)
        X = X.float()
        X = X.reshape(-1, max_len, X.shape[-1])
        
        pred, R, w1, w2, norm = model(X)  # (batch_size, padded_length, feat_dim)
        
        loss_mse = loss_fn(pred, X) 

        loss_con = contrastive_loss(R, X.shape[0])
        
        loss_norm =  norm.mean()

        loss = 1/(w1.pow(2)) * loss_mse + 1/(w2.pow(2)) * loss_con + torch.log(w1) + torch.log(w2)
  


        optimizer.zero_grad()
        loss.backward()

        nn.utils.clip_grad_norm_(model.parameters(), max_norm=4.0)
        optimizer.step()

        progress_bar.set_description("Epoch {0} - Training loss: {1:.2f} - MSE loss: {2:.2f} - Contrastive loss: {3:.2f} - norm loss: {3:.2f}".format(i, 
                loss.cpu().detach().numpy().item(), loss_mse.cpu().detach().numpy().item(), 
                loss_con.cpu().detach().numpy().item(), loss_norm.cpu().detach().numpy().item())) 
        train_loss["loss"].append(loss)
        train_loss["loss_mse"].append(loss_mse)
        train_loss["loss_con"].append(loss_con)
    model.tau = max(model.tau * 0.98, min_tau)
        
    
    val_loss = { "loss": [], "loss_mse": [], "loss_con": []}
    for IDs in val_dataloader:
        model.eval()
        X = torch.tensor(data.feature_df.loc[IDs].to_numpy()).to(device)
        X = X.float()
        X = X.reshape(-1, max_len, X.shape[-1])
        
                
        pred, R, w1, w2, norm = model(X.float())  # (batch_size, padded_length, feat_dim)
        
        loss_mse = loss_fn(pred, X) 

        loss_con = contrastive_loss(R, X.shape[0])

        loss = 1/(w1.pow(2)) * loss_mse + 1/(w2.pow(2)) * loss_con + torch.log(w1) + torch.log(w2)
        
        val_loss["loss"].append(loss)
        val_loss["loss_mse"].append(loss_mse)
        val_loss["loss_con"].append(loss_con)
    
    train_loss["loss"] = torch.tensor(train_loss["loss"]).mean()
    train_loss["loss_mse"] = torch.tensor(train_loss["loss_mse"]).mean()
    train_loss["loss_con"] = torch.tensor(train_loss["loss_con"]).mean()
    val_loss["loss"] = torch.tensor(val_loss["loss"]).mean()
    val_loss["loss_mse"] = torch.tensor(val_loss["loss_mse"]).mean()
    val_loss["loss_con"] = torch.tensor(val_loss["loss_con"]).mean()
    
    if val_loss["loss"] < best_loss and model.tau == min_tau:
        best_loss = val_loss["loss"]
        best_model = copy.deepcopy(model)
        best_epoch = i
    
    progress_bar.write("Epoch {0} - Training loss: {1:.2f} {2:.2f} {3:.2f} - Validation loss: {4:.2f} {5:.2f} {6:.2f}".format(i, 
            train_loss["loss"].cpu().detach().numpy().item(), train_loss["loss_mse"].cpu().detach().numpy().item(), train_loss["loss_con"].cpu().detach().numpy().item(),
            val_loss["loss"].cpu().detach().numpy().item(), val_loss["loss_mse"].cpu().detach().numpy().item(), val_loss["loss_con"].cpu().detach().numpy().item()))
    i += 1
    
    
tqdm.write("Best Epoch {} - Best Validation loss: {}".format(best_epoch, best_loss))

Best Epoch 0 - Best Validation loss: 10000000000.0


# Replace the output layer for downstream task

In [41]:
finetune_model = copy.deepcopy(best_model)
# finetune_model.predict_layer1 = nn.Linear(finetune_model.max_len , 1)
# out_size = data.labels_df.shape[1]
# hid_size = finetune_model.d_model 
# finetune_model.predict_layer2 = nn.Linear(hid_size , out_size)

In [42]:


optimizer = torch.optim.AdamW(finetune_model.parameters(), lr=1e-3)

# Finetune Training Loop

In [43]:
i = 0
max_epoch = 200
best_loss = 1e10
best_finetune_model = copy.deepcopy(best_model)
best_epoch = 0
device = "cuda"
finetune_model.to(device)
while i < max_epoch:
    train_loss = []
    progress_bar = tqdm(train_dataloader)
    
    for IDs in progress_bar:
        finetune_model.train()
        X = torch.tensor(data.feature_df.loc[IDs].to_numpy()).to(device)
        X = X.reshape(-1, max_len, X.shape[-1])
        targets = torch.tensor(data.labels_df.loc[IDs].to_numpy()).to(device)
        pred = finetune_model.predict(X.float()).squeeze(-1)
        loss = loss_fn(pred, targets)


        optimizer.zero_grad()
        loss.backward()

        nn.utils.clip_grad_norm_(finetune_model.parameters(), max_norm=4.0)
        optimizer.step()

        progress_bar.set_description("Epoch {} - Training loss: {}".format(i, loss)) 
        train_loss.append(loss)
    
    val_loss = []
    for IDs in val_dataloader:
        finetune_model.eval()
        X = torch.tensor(data.feature_df.loc[IDs].to_numpy()).to(device)
        X = X.reshape(-1, max_len, X.shape[-1])
        targets = torch.tensor(data.labels_df.loc[IDs].to_numpy()).to(device)
        pred = finetune_model.predict(X.float()).squeeze(-1)
        loss = loss_fn(pred, targets)
        val_loss.append(loss)
    
    train_loss = torch.tensor(train_loss).mean()
    val_loss = torch.tensor(val_loss).mean()
    
    if val_loss < best_loss:
        best_loss = val_loss
        best_finetune_model = copy.deepcopy(finetune_model)
        best_epoch = i
    
    progress_bar.write("Epoch {} - Training loss: {} - Validation loss: {}".format(i, train_loss, val_loss))
    i += 1
    
    
progress_bar.write("Best Epoch {} - Best Validation loss: {}".format(best_epoch, best_loss))

Epoch 0 - Training loss: 29801.083984375: 100%|███████████████████████████████████| 149/149 [00:07<00:00, 20.69it/s]
Epoch 1 - Training loss: 48352.125:   1%|▌                                          | 2/149 [00:00<00:07, 18.76it/s]

Epoch 0 - Training loss: 31492.59765625 - Validation loss: 28988.14453125


Epoch 1 - Training loss: 20128.15625: 100%|███████████████████████████████████████| 149/149 [00:07<00:00, 20.09it/s]
Epoch 2 - Training loss: 28452.390625:   1%|▌                                       | 2/149 [00:00<00:08, 17.46it/s]

Epoch 1 - Training loss: 27886.349609375 - Validation loss: 23294.662109375


Epoch 2 - Training loss: 15004.7412109375: 100%|██████████████████████████████████| 149/149 [00:07<00:00, 20.14it/s]
Epoch 3 - Training loss: 14163.3779296875:   2%|▋                                   | 3/149 [00:00<00:06, 21.59it/s]

Epoch 2 - Training loss: 20706.595703125 - Validation loss: 15795.7939453125


Epoch 3 - Training loss: 9088.0849609375: 100%|███████████████████████████████████| 149/149 [00:07<00:00, 20.77it/s]
Epoch 4 - Training loss: 12152.2197265625:   1%|▍                                   | 2/149 [00:00<00:07, 18.60it/s]

Epoch 3 - Training loss: 12592.333984375 - Validation loss: 8903.3544921875


Epoch 4 - Training loss: 4531.68701171875: 100%|██████████████████████████████████| 149/149 [00:07<00:00, 20.90it/s]
Epoch 5 - Training loss: 4442.42578125:   1%|▌                                      | 2/149 [00:00<00:07, 19.62it/s]

Epoch 4 - Training loss: 7185.53271484375 - Validation loss: 5026.70166015625


Epoch 5 - Training loss: 3476.274658203125: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 20.84it/s]
Epoch 6 - Training loss: 5708.59716796875:   2%|▋                                   | 3/149 [00:00<00:06, 21.16it/s]

Epoch 5 - Training loss: 5289.13232421875 - Validation loss: 3796.1923828125


Epoch 6 - Training loss: 3602.5380859375: 100%|███████████████████████████████████| 149/149 [00:07<00:00, 19.74it/s]
Epoch 7 - Training loss: 3887.375:   1%|▌                                           | 2/149 [00:00<00:07, 19.64it/s]

Epoch 6 - Training loss: 5160.1533203125 - Validation loss: 3547.500732421875


Epoch 7 - Training loss: 3569.257568359375: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 20.50it/s]
Epoch 8 - Training loss: 3384.351806640625:   1%|▍                                  | 2/149 [00:00<00:07, 19.45it/s]

Epoch 7 - Training loss: 5073.64111328125 - Validation loss: 3128.290283203125


Epoch 8 - Training loss: 4686.814453125: 100%|████████████████████████████████████| 149/149 [00:07<00:00, 19.16it/s]
Epoch 9 - Training loss: 4606.12109375:   1%|▌                                      | 2/149 [00:00<00:14, 10.41it/s]

Epoch 8 - Training loss: 4710.4599609375 - Validation loss: 3174.173583984375


Epoch 9 - Training loss: 3752.217041015625: 100%|█████████████████████████████████| 149/149 [00:12<00:00, 11.63it/s]
Epoch 10 - Training loss: 4351.890625:   2%|▊                                       | 3/149 [00:00<00:06, 20.96it/s]

Epoch 9 - Training loss: 4761.193359375 - Validation loss: 2933.801513671875


Epoch 10 - Training loss: 15640.333984375: 100%|██████████████████████████████████| 149/149 [00:07<00:00, 20.52it/s]
Epoch 11 - Training loss: 5944.3251953125:   2%|▋                                   | 3/149 [00:00<00:06, 21.01it/s]

Epoch 10 - Training loss: 4765.68701171875 - Validation loss: 3285.215576171875


Epoch 11 - Training loss: 4405.81396484375: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 20.04it/s]
Epoch 12 - Training loss: 2389.09130859375:   2%|▋                                  | 3/149 [00:00<00:06, 21.00it/s]

Epoch 11 - Training loss: 4661.48876953125 - Validation loss: 3272.507080078125


Epoch 12 - Training loss: 3002.27197265625: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 20.52it/s]
Epoch 13 - Training loss: 3325.21826171875:   2%|▋                                  | 3/149 [00:00<00:06, 21.19it/s]

Epoch 12 - Training loss: 4790.48583984375 - Validation loss: 2942.647705078125


Epoch 13 - Training loss: 8323.0361328125: 100%|██████████████████████████████████| 149/149 [00:07<00:00, 21.05it/s]
Epoch 14 - Training loss: 6824.62744140625:   1%|▍                                  | 2/149 [00:00<00:07, 19.73it/s]

Epoch 13 - Training loss: 4640.79248046875 - Validation loss: 3000.5869140625


Epoch 14 - Training loss: 7530.66064453125: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 20.94it/s]
Epoch 15 - Training loss: 4018.089111328125:   2%|▋                                 | 3/149 [00:00<00:06, 21.43it/s]

Epoch 14 - Training loss: 4558.037109375 - Validation loss: 3135.088623046875


Epoch 15 - Training loss: 5252.07763671875: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 21.16it/s]
Epoch 16 - Training loss: 10368.453125:   2%|▊                                      | 3/149 [00:00<00:06, 21.15it/s]

Epoch 15 - Training loss: 4637.14892578125 - Validation loss: 3305.073486328125


Epoch 16 - Training loss: 3650.383544921875: 100%|████████████████████████████████| 149/149 [00:07<00:00, 20.79it/s]
Epoch 17 - Training loss: 3642.310546875:   2%|▋                                    | 3/149 [00:00<00:06, 21.53it/s]

Epoch 16 - Training loss: 4612.236328125 - Validation loss: 2898.341064453125


Epoch 17 - Training loss: 9102.373046875: 100%|███████████████████████████████████| 149/149 [00:07<00:00, 21.28it/s]
Epoch 18 - Training loss: 4307.6201171875:   2%|▋                                   | 3/149 [00:00<00:07, 20.37it/s]

Epoch 17 - Training loss: 4620.53564453125 - Validation loss: 3066.79931640625


Epoch 18 - Training loss: 4004.91357421875: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 20.92it/s]
Epoch 19 - Training loss: 6703.09423828125:   2%|▋                                  | 3/149 [00:00<00:06, 21.59it/s]

Epoch 18 - Training loss: 4608.63330078125 - Validation loss: 2889.80712890625


Epoch 19 - Training loss: 4567.47216796875: 100%|█████████████████████████████████| 149/149 [00:06<00:00, 21.44it/s]
Epoch 20 - Training loss: 5572.45703125:   2%|▊                                     | 3/149 [00:00<00:06, 21.54it/s]

Epoch 19 - Training loss: 4472.93603515625 - Validation loss: 2834.609130859375


Epoch 20 - Training loss: 3965.09912109375: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 20.31it/s]
Epoch 21 - Training loss: 5784.92431640625:   1%|▍                                  | 2/149 [00:00<00:07, 19.51it/s]

Epoch 20 - Training loss: 4477.6357421875 - Validation loss: 3002.124267578125


Epoch 21 - Training loss: 6086.22265625: 100%|████████████████████████████████████| 149/149 [00:07<00:00, 19.80it/s]
Epoch 22 - Training loss: 3618.22314453125:   2%|▋                                  | 3/149 [00:00<00:07, 20.71it/s]

Epoch 21 - Training loss: 4583.1318359375 - Validation loss: 2842.6044921875


Epoch 22 - Training loss: 2735.734130859375: 100%|████████████████████████████████| 149/149 [00:07<00:00, 20.76it/s]
Epoch 23 - Training loss: 3959.106201171875:   2%|▋                                 | 3/149 [00:00<00:06, 21.09it/s]

Epoch 22 - Training loss: 4566.92626953125 - Validation loss: 2944.19140625


Epoch 23 - Training loss: 6316.63623046875: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 21.12it/s]
Epoch 24 - Training loss: 5379.9482421875:   1%|▍                                   | 2/149 [00:00<00:07, 18.98it/s]

Epoch 23 - Training loss: 4522.0791015625 - Validation loss: 2656.213134765625


Epoch 24 - Training loss: 5159.04443359375: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 19.06it/s]
Epoch 25 - Training loss: 4317.5341796875:   1%|▍                                   | 2/149 [00:00<00:08, 18.19it/s]

Epoch 24 - Training loss: 4377.287109375 - Validation loss: 2796.447265625


Epoch 25 - Training loss: 4176.29833984375: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 19.22it/s]
Epoch 26 - Training loss: 4129.7783203125:   1%|▍                                   | 2/149 [00:00<00:07, 18.73it/s]

Epoch 25 - Training loss: 4478.30078125 - Validation loss: 2873.267822265625


Epoch 26 - Training loss: 3361.11767578125: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 20.92it/s]
Epoch 27 - Training loss: 3351.790283203125:   2%|▋                                 | 3/149 [00:00<00:06, 20.90it/s]

Epoch 26 - Training loss: 4520.9345703125 - Validation loss: 2837.48681640625


Epoch 27 - Training loss: 3458.169921875: 100%|███████████████████████████████████| 149/149 [00:07<00:00, 21.19it/s]
Epoch 28 - Training loss: 3159.5:   1%|▌                                            | 2/149 [00:00<00:07, 19.26it/s]

Epoch 27 - Training loss: 4452.18408203125 - Validation loss: 3121.861572265625


Epoch 28 - Training loss: 3958.5947265625: 100%|██████████████████████████████████| 149/149 [00:07<00:00, 19.51it/s]
Epoch 29 - Training loss: 2942.08544921875:   1%|▍                                  | 2/149 [00:00<00:08, 17.77it/s]

Epoch 28 - Training loss: 4511.24072265625 - Validation loss: 2888.46240234375


Epoch 29 - Training loss: 3176.832763671875: 100%|████████████████████████████████| 149/149 [00:07<00:00, 20.65it/s]
Epoch 30 - Training loss: 4231.29296875:   2%|▊                                     | 3/149 [00:00<00:06, 20.93it/s]

Epoch 29 - Training loss: 4483.4130859375 - Validation loss: 2891.071044921875


Epoch 30 - Training loss: 3587.47314453125: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 19.88it/s]
Epoch 31 - Training loss: 4347.765625:   2%|▊                                       | 3/149 [00:00<00:06, 21.34it/s]

Epoch 30 - Training loss: 4562.826171875 - Validation loss: 2784.565673828125


Epoch 31 - Training loss: 3226.00341796875: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 19.78it/s]
Epoch 32 - Training loss: 3192.5458984375:   2%|▋                                   | 3/149 [00:00<00:06, 21.23it/s]

Epoch 31 - Training loss: 4401.9130859375 - Validation loss: 3033.1064453125


Epoch 32 - Training loss: 5916.2099609375: 100%|██████████████████████████████████| 149/149 [00:07<00:00, 20.32it/s]
Epoch 33 - Training loss: 3705.96484375:   1%|▌                                     | 2/149 [00:00<00:07, 19.99it/s]

Epoch 32 - Training loss: 4381.00341796875 - Validation loss: 3018.94287109375


Epoch 33 - Training loss: 3118.888427734375: 100%|████████████████████████████████| 149/149 [00:07<00:00, 19.47it/s]
Epoch 34 - Training loss: 3710.44775390625:   1%|▍                                  | 2/149 [00:00<00:07, 19.42it/s]

Epoch 33 - Training loss: 4323.60546875 - Validation loss: 3047.81005859375


Epoch 34 - Training loss: 7333.35791015625: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 20.04it/s]
Epoch 35 - Training loss: 8222.7529296875:   2%|▋                                   | 3/149 [00:00<00:06, 21.81it/s]

Epoch 34 - Training loss: 4466.49853515625 - Validation loss: 2922.7939453125


Epoch 35 - Training loss: 7346.54296875: 100%|████████████████████████████████████| 149/149 [00:07<00:00, 19.63it/s]
Epoch 36 - Training loss: 3059.25146484375:   1%|▍                                  | 2/149 [00:00<00:08, 18.16it/s]

Epoch 35 - Training loss: 4369.94921875 - Validation loss: 3085.18310546875


Epoch 36 - Training loss: 5968.5126953125: 100%|██████████████████████████████████| 149/149 [00:07<00:00, 20.72it/s]
Epoch 37 - Training loss: 2985.105712890625:   1%|▍                                 | 2/149 [00:00<00:07, 19.03it/s]

Epoch 36 - Training loss: 4344.18115234375 - Validation loss: 3030.063232421875


Epoch 37 - Training loss: 5361.41845703125: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 20.94it/s]
Epoch 38 - Training loss: 3476.56591796875:   1%|▍                                  | 2/149 [00:00<00:07, 19.82it/s]

Epoch 37 - Training loss: 4403.72216796875 - Validation loss: 3287.73095703125


Epoch 38 - Training loss: 5694.443359375: 100%|███████████████████████████████████| 149/149 [00:07<00:00, 20.06it/s]
Epoch 39 - Training loss: 1648.411865234375:   2%|▋                                 | 3/149 [00:00<00:06, 21.10it/s]

Epoch 38 - Training loss: 4403.5703125 - Validation loss: 3172.2861328125


Epoch 39 - Training loss: 4444.8125: 100%|████████████████████████████████████████| 149/149 [00:07<00:00, 21.11it/s]
Epoch 40 - Training loss: 4937.6044921875:   2%|▋                                   | 3/149 [00:00<00:06, 21.36it/s]

Epoch 39 - Training loss: 4238.3037109375 - Validation loss: 2965.353759765625


Epoch 40 - Training loss: 3681.260498046875: 100%|████████████████████████████████| 149/149 [00:07<00:00, 19.72it/s]
Epoch 41 - Training loss: 3140.19140625:   2%|▊                                     | 3/149 [00:00<00:06, 21.10it/s]

Epoch 40 - Training loss: 4449.06689453125 - Validation loss: 2665.3134765625


Epoch 41 - Training loss: 4569.3896484375: 100%|██████████████████████████████████| 149/149 [00:07<00:00, 20.66it/s]
Epoch 42 - Training loss: 2590.968994140625:   2%|▋                                 | 3/149 [00:00<00:06, 20.90it/s]

Epoch 41 - Training loss: 4402.33203125 - Validation loss: 2802.16162109375


Epoch 42 - Training loss: 3819.4404296875: 100%|██████████████████████████████████| 149/149 [00:07<00:00, 20.62it/s]
Epoch 43 - Training loss: 10524.4111328125:   1%|▍                                  | 2/149 [00:00<00:07, 19.23it/s]

Epoch 42 - Training loss: 4284.96923828125 - Validation loss: 2910.350830078125


Epoch 43 - Training loss: 5114.73974609375: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 20.70it/s]
Epoch 44 - Training loss: 2877.75390625:   2%|▊                                     | 3/149 [00:00<00:06, 21.34it/s]

Epoch 43 - Training loss: 4300.92431640625 - Validation loss: 2911.7802734375


Epoch 44 - Training loss: 3733.966552734375: 100%|████████████████████████████████| 149/149 [00:07<00:00, 20.13it/s]
Epoch 45 - Training loss: 2848.32275390625:   1%|▍                                  | 2/149 [00:00<00:07, 18.52it/s]

Epoch 44 - Training loss: 4334.7880859375 - Validation loss: 2741.633544921875


Epoch 45 - Training loss: 4082.7724609375: 100%|██████████████████████████████████| 149/149 [00:06<00:00, 21.36it/s]
Epoch 46 - Training loss: 5518.39111328125:   2%|▋                                  | 3/149 [00:00<00:07, 20.41it/s]

Epoch 45 - Training loss: 4376.189453125 - Validation loss: 2627.56640625


Epoch 46 - Training loss: 3687.510498046875: 100%|████████████████████████████████| 149/149 [00:07<00:00, 20.65it/s]
Epoch 47 - Training loss: 6544.0810546875:   1%|▍                                   | 2/149 [00:00<00:07, 18.44it/s]

Epoch 46 - Training loss: 4169.75732421875 - Validation loss: 2759.097412109375


Epoch 47 - Training loss: 3261.67529296875: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 20.53it/s]
Epoch 48 - Training loss: 2910.69873046875:   1%|▍                                  | 2/149 [00:00<00:07, 19.98it/s]

Epoch 47 - Training loss: 4253.5859375 - Validation loss: 2613.200927734375


Epoch 48 - Training loss: 5389.2705078125: 100%|██████████████████████████████████| 149/149 [00:07<00:00, 20.19it/s]
Epoch 49 - Training loss: 4987.2841796875:   1%|▍                                   | 2/149 [00:00<00:07, 19.06it/s]

Epoch 48 - Training loss: 4216.46875 - Validation loss: 2789.24755859375


Epoch 49 - Training loss: 2165.817138671875: 100%|████████████████████████████████| 149/149 [00:07<00:00, 21.04it/s]
Epoch 50 - Training loss: 2557.8359375:   2%|▊                                      | 3/149 [00:00<00:07, 20.48it/s]

Epoch 49 - Training loss: 4175.04150390625 - Validation loss: 2686.764404296875


Epoch 50 - Training loss: 3319.689453125: 100%|███████████████████████████████████| 149/149 [00:07<00:00, 20.51it/s]
Epoch 51 - Training loss: 1934.184814453125:   2%|▋                                 | 3/149 [00:00<00:06, 21.14it/s]

Epoch 50 - Training loss: 4200.18359375 - Validation loss: 2820.598388671875


Epoch 51 - Training loss: 2589.13720703125: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 20.47it/s]
Epoch 52 - Training loss: 6289.1064453125:   1%|▍                                   | 2/149 [00:00<00:07, 19.39it/s]

Epoch 51 - Training loss: 4254.96630859375 - Validation loss: 2738.4609375


Epoch 52 - Training loss: 10113.78515625: 100%|███████████████████████████████████| 149/149 [00:07<00:00, 20.16it/s]
Epoch 53 - Training loss: 4062.399658203125:   2%|▋                                 | 3/149 [00:00<00:06, 21.45it/s]

Epoch 52 - Training loss: 4252.9345703125 - Validation loss: 3013.67431640625


Epoch 53 - Training loss: 2607.68798828125: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 21.20it/s]
Epoch 54 - Training loss: 4022.08154296875:   1%|▍                                  | 2/149 [00:00<00:07, 18.90it/s]

Epoch 53 - Training loss: 4226.75 - Validation loss: 2632.510009765625


Epoch 54 - Training loss: 4506.20751953125: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 20.62it/s]
Epoch 55 - Training loss: 3053.76708984375:   2%|▋                                  | 3/149 [00:00<00:06, 21.68it/s]

Epoch 54 - Training loss: 4286.2333984375 - Validation loss: 2854.106201171875


Epoch 55 - Training loss: 3795.148193359375: 100%|████████████████████████████████| 149/149 [00:07<00:00, 20.24it/s]
Epoch 56 - Training loss: 4066.3017578125:   2%|▋                                   | 3/149 [00:00<00:06, 21.54it/s]

Epoch 55 - Training loss: 4078.078369140625 - Validation loss: 3019.132080078125


Epoch 56 - Training loss: 2881.521484375: 100%|███████████████████████████████████| 149/149 [00:07<00:00, 21.02it/s]
Epoch 57 - Training loss: 3910.169921875:   2%|▋                                    | 3/149 [00:00<00:07, 20.30it/s]

Epoch 56 - Training loss: 4302.4091796875 - Validation loss: 2584.889892578125


Epoch 57 - Training loss: 7022.19677734375: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 20.85it/s]
Epoch 58 - Training loss: 4865.07275390625:   2%|▋                                  | 3/149 [00:00<00:07, 20.49it/s]

Epoch 57 - Training loss: 4133.283203125 - Validation loss: 2823.134765625


Epoch 58 - Training loss: 2041.2724609375: 100%|██████████████████████████████████| 149/149 [00:07<00:00, 19.90it/s]
Epoch 59 - Training loss: 4032.194580078125:   1%|▍                                 | 2/149 [00:00<00:07, 19.20it/s]

Epoch 58 - Training loss: 4248.23046875 - Validation loss: 2908.78466796875


Epoch 59 - Training loss: 1989.8212890625: 100%|██████████████████████████████████| 149/149 [00:07<00:00, 20.31it/s]
Epoch 60 - Training loss: 3492.134765625:   2%|▋                                    | 3/149 [00:00<00:06, 21.57it/s]

Epoch 59 - Training loss: 4196.06103515625 - Validation loss: 2662.803955078125


Epoch 60 - Training loss: 3659.900634765625: 100%|████████████████████████████████| 149/149 [00:07<00:00, 20.99it/s]
Epoch 61 - Training loss: 5242.984375:   2%|▊                                       | 3/149 [00:00<00:06, 21.33it/s]

Epoch 60 - Training loss: 4298.9140625 - Validation loss: 2846.490966796875


Epoch 61 - Training loss: 4487.23291015625: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 20.59it/s]
Epoch 62 - Training loss: 2976.342041015625:   2%|▋                                 | 3/149 [00:00<00:07, 20.59it/s]

Epoch 61 - Training loss: 4169.99169921875 - Validation loss: 2550.365234375


Epoch 62 - Training loss: 5875.09033203125: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 20.53it/s]
Epoch 63 - Training loss: 5684.6708984375:   2%|▋                                   | 3/149 [00:00<00:07, 20.20it/s]

Epoch 62 - Training loss: 4195.47314453125 - Validation loss: 3049.6416015625


Epoch 63 - Training loss: 5710.8134765625: 100%|██████████████████████████████████| 149/149 [00:07<00:00, 20.45it/s]
Epoch 64 - Training loss: 7365.86181640625:   1%|▍                                  | 2/149 [00:00<00:07, 18.79it/s]

Epoch 63 - Training loss: 4242.48583984375 - Validation loss: 2940.68603515625


Epoch 64 - Training loss: 4186.16748046875: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 20.54it/s]
Epoch 65 - Training loss: 2288.447265625:   1%|▍                                    | 2/149 [00:00<00:07, 19.71it/s]

Epoch 64 - Training loss: 4186.69482421875 - Validation loss: 2569.074951171875


Epoch 65 - Training loss: 5173.0009765625: 100%|██████████████████████████████████| 149/149 [00:12<00:00, 12.31it/s]
Epoch 66 - Training loss: 5951.9404296875:   1%|▍                                   | 2/149 [00:00<00:12, 11.82it/s]

Epoch 65 - Training loss: 4342.8095703125 - Validation loss: 2844.49462890625


Epoch 66 - Training loss: 5669.60498046875: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 19.15it/s]
Epoch 67 - Training loss: 3852.230224609375:   1%|▍                                 | 2/149 [00:00<00:07, 18.80it/s]

Epoch 66 - Training loss: 4167.1708984375 - Validation loss: 2550.026611328125


Epoch 67 - Training loss: 6560.4453125: 100%|█████████████████████████████████████| 149/149 [00:07<00:00, 19.70it/s]
Epoch 68 - Training loss: 2913.9677734375:   2%|▋                                   | 3/149 [00:00<00:06, 21.57it/s]

Epoch 67 - Training loss: 4168.35546875 - Validation loss: 2952.233154296875


Epoch 68 - Training loss: 3788.644287109375: 100%|████████████████████████████████| 149/149 [00:07<00:00, 20.60it/s]
Epoch 69 - Training loss: 5810.69580078125:   2%|▋                                  | 3/149 [00:00<00:06, 21.52it/s]

Epoch 68 - Training loss: 4104.8466796875 - Validation loss: 2675.093994140625


Epoch 69 - Training loss: 1904.633056640625: 100%|████████████████████████████████| 149/149 [00:07<00:00, 21.03it/s]
Epoch 70 - Training loss: 3933.6259765625:   1%|▍                                   | 2/149 [00:00<00:07, 19.14it/s]

Epoch 69 - Training loss: 4048.93408203125 - Validation loss: 2988.785888671875


Epoch 70 - Training loss: 5850.0654296875: 100%|██████████████████████████████████| 149/149 [00:07<00:00, 20.93it/s]
Epoch 71 - Training loss: 2546.85400390625:   2%|▋                                  | 3/149 [00:00<00:06, 21.52it/s]

Epoch 70 - Training loss: 4079.79541015625 - Validation loss: 2757.021240234375


Epoch 71 - Training loss: 3597.79931640625: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 19.81it/s]
Epoch 72 - Training loss: 4165.22265625:   2%|▊                                     | 3/149 [00:00<00:06, 21.65it/s]

Epoch 71 - Training loss: 3991.909912109375 - Validation loss: 2607.131591796875


Epoch 72 - Training loss: 5515.03466796875: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 20.99it/s]
Epoch 73 - Training loss: 3976.09423828125:   2%|▋                                  | 3/149 [00:00<00:06, 21.06it/s]

Epoch 72 - Training loss: 4129.46044921875 - Validation loss: 2613.276611328125


Epoch 73 - Training loss: 2302.966552734375: 100%|████████████████████████████████| 149/149 [00:07<00:00, 21.03it/s]
Epoch 74 - Training loss: 3017.561767578125:   1%|▍                                 | 2/149 [00:00<00:07, 18.94it/s]

Epoch 73 - Training loss: 4079.493408203125 - Validation loss: 2633.86376953125


Epoch 74 - Training loss: 3917.93994140625: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 20.37it/s]
Epoch 75 - Training loss: 2850.268798828125:   1%|▍                                 | 2/149 [00:00<00:07, 19.17it/s]

Epoch 74 - Training loss: 4148.8046875 - Validation loss: 2391.324951171875


Epoch 75 - Training loss: 4561.033203125: 100%|███████████████████████████████████| 149/149 [00:07<00:00, 19.67it/s]
Epoch 76 - Training loss: 3348.40185546875:   2%|▋                                  | 3/149 [00:00<00:06, 21.35it/s]

Epoch 75 - Training loss: 4131.150390625 - Validation loss: 2714.111083984375


Epoch 76 - Training loss: 5784.8271484375: 100%|██████████████████████████████████| 149/149 [00:07<00:00, 20.75it/s]
Epoch 77 - Training loss: 6808.61572265625:   2%|▋                                  | 3/149 [00:00<00:07, 20.22it/s]

Epoch 76 - Training loss: 4135.6513671875 - Validation loss: 2756.076904296875


Epoch 77 - Training loss: 3995.02099609375: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 20.26it/s]
Epoch 78 - Training loss: 4157.6552734375:   2%|▋                                   | 3/149 [00:00<00:06, 21.13it/s]

Epoch 77 - Training loss: 4214.6904296875 - Validation loss: 2701.6943359375


Epoch 78 - Training loss: 3142.458740234375: 100%|████████████████████████████████| 149/149 [00:07<00:00, 20.47it/s]
Epoch 79 - Training loss: 3505.639892578125:   2%|▋                                 | 3/149 [00:00<00:07, 19.41it/s]

Epoch 78 - Training loss: 4099.69189453125 - Validation loss: 2536.504638671875


Epoch 79 - Training loss: 3859.42236328125: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 20.03it/s]
Epoch 80 - Training loss: 2436.548828125:   1%|▍                                    | 2/149 [00:00<00:08, 17.95it/s]

Epoch 79 - Training loss: 4018.057861328125 - Validation loss: 2815.54736328125


Epoch 80 - Training loss: 3615.760986328125: 100%|████████████████████████████████| 149/149 [00:07<00:00, 20.47it/s]
Epoch 81 - Training loss: 3693.94287109375:   1%|▍                                  | 2/149 [00:00<00:08, 17.80it/s]

Epoch 80 - Training loss: 4050.444091796875 - Validation loss: 2503.0380859375


Epoch 81 - Training loss: 2275.032470703125: 100%|████████████████████████████████| 149/149 [00:07<00:00, 20.58it/s]
Epoch 82 - Training loss: 3158.4384765625:   2%|▋                                   | 3/149 [00:00<00:07, 20.02it/s]

Epoch 81 - Training loss: 4048.192138671875 - Validation loss: 2625.299072265625


Epoch 82 - Training loss: 4871.3798828125: 100%|██████████████████████████████████| 149/149 [00:07<00:00, 19.99it/s]
Epoch 83 - Training loss: 3488.75390625:   1%|▌                                     | 2/149 [00:00<00:08, 18.10it/s]

Epoch 82 - Training loss: 4116.76953125 - Validation loss: 2887.9189453125


Epoch 83 - Training loss: 3421.5703125: 100%|█████████████████████████████████████| 149/149 [00:07<00:00, 20.22it/s]
Epoch 84 - Training loss: 3330.830078125:   1%|▍                                    | 2/149 [00:00<00:07, 18.91it/s]

Epoch 83 - Training loss: 4086.08251953125 - Validation loss: 2794.8154296875


Epoch 84 - Training loss: 2609.115234375: 100%|███████████████████████████████████| 149/149 [00:07<00:00, 20.85it/s]
Epoch 85 - Training loss: 5674.0126953125:   1%|▍                                   | 2/149 [00:00<00:08, 17.63it/s]

Epoch 84 - Training loss: 3952.26416015625 - Validation loss: 2418.1806640625


Epoch 85 - Training loss: 1318.887451171875: 100%|████████████████████████████████| 149/149 [00:10<00:00, 14.37it/s]
Epoch 86 - Training loss: 5561.73193359375:   1%|▍                                  | 2/149 [00:00<00:08, 16.67it/s]

Epoch 85 - Training loss: 4139.13525390625 - Validation loss: 2606.25634765625


Epoch 86 - Training loss: 4322.53173828125: 100%|█████████████████████████████████| 149/149 [00:10<00:00, 14.78it/s]
Epoch 87 - Training loss: 2115.97119140625:   1%|▍                                  | 2/149 [00:00<00:07, 19.81it/s]

Epoch 86 - Training loss: 3923.076416015625 - Validation loss: 2802.3955078125


Epoch 87 - Training loss: 5993.65234375: 100%|████████████████████████████████████| 149/149 [00:07<00:00, 20.13it/s]
Epoch 88 - Training loss: 3629.89501953125:   2%|▋                                  | 3/149 [00:00<00:06, 21.52it/s]

Epoch 87 - Training loss: 4039.929931640625 - Validation loss: 2648.404296875


Epoch 88 - Training loss: 5167.83154296875: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 20.61it/s]
Epoch 89 - Training loss: 3005.264892578125:   2%|▋                                 | 3/149 [00:00<00:07, 19.70it/s]

Epoch 88 - Training loss: 4069.083984375 - Validation loss: 2565.440185546875


Epoch 89 - Training loss: 5455.2646484375: 100%|██████████████████████████████████| 149/149 [00:07<00:00, 19.92it/s]
Epoch 90 - Training loss: 2850.431640625:   1%|▍                                    | 2/149 [00:00<00:09, 15.14it/s]

Epoch 89 - Training loss: 3858.3095703125 - Validation loss: 2767.17626953125


Epoch 90 - Training loss: 4291.60986328125: 100%|█████████████████████████████████| 149/149 [00:11<00:00, 13.12it/s]
Epoch 91 - Training loss: 3132.05224609375:   2%|▋                                  | 3/149 [00:00<00:07, 20.21it/s]

Epoch 90 - Training loss: 3989.448486328125 - Validation loss: 2552.98828125


Epoch 91 - Training loss: 4266.94677734375: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 20.41it/s]
Epoch 92 - Training loss: 3828.0927734375:   1%|▍                                   | 2/149 [00:00<00:08, 18.24it/s]

Epoch 91 - Training loss: 3976.379150390625 - Validation loss: 2571.394775390625


Epoch 92 - Training loss: 2719.73388671875: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 20.97it/s]
Epoch 93 - Training loss: 3839.73193359375:   1%|▍                                  | 2/149 [00:00<00:07, 18.96it/s]

Epoch 92 - Training loss: 4049.16650390625 - Validation loss: 2530.085205078125


Epoch 93 - Training loss: 4809.03466796875: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 19.75it/s]
Epoch 94 - Training loss: 1984.3037109375:   2%|▋                                   | 3/149 [00:00<00:06, 20.96it/s]

Epoch 93 - Training loss: 4253.693359375 - Validation loss: 2507.2021484375


Epoch 94 - Training loss: 3800.67041015625: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 20.44it/s]
Epoch 95 - Training loss: 3027.16748046875:   1%|▍                                  | 2/149 [00:00<00:07, 19.80it/s]

Epoch 94 - Training loss: 4101.74853515625 - Validation loss: 2504.597900390625


Epoch 95 - Training loss: 3841.00439453125: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 20.36it/s]
Epoch 96 - Training loss: 2797.42578125:   2%|▊                                     | 3/149 [00:00<00:06, 20.86it/s]

Epoch 95 - Training loss: 3955.374267578125 - Validation loss: 2456.906494140625


Epoch 96 - Training loss: 1771.6392822265625: 100%|███████████████████████████████| 149/149 [00:07<00:00, 20.36it/s]
Epoch 97 - Training loss: 4348.2294921875:   2%|▋                                   | 3/149 [00:00<00:06, 21.24it/s]

Epoch 96 - Training loss: 3940.13134765625 - Validation loss: 2694.668701171875


Epoch 97 - Training loss: 3680.1875: 100%|████████████████████████████████████████| 149/149 [00:07<00:00, 20.26it/s]
Epoch 98 - Training loss: 9411.0185546875:   2%|▋                                   | 3/149 [00:00<00:07, 20.70it/s]

Epoch 97 - Training loss: 3996.1064453125 - Validation loss: 2574.004638671875


Epoch 98 - Training loss: 3530.026123046875: 100%|████████████████████████████████| 149/149 [00:12<00:00, 12.02it/s]
Epoch 99 - Training loss: 3428.04833984375:   1%|▍                                  | 2/149 [00:00<00:11, 12.96it/s]

Epoch 98 - Training loss: 4020.063720703125 - Validation loss: 2620.65673828125


Epoch 99 - Training loss: 3719.20068359375: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 19.72it/s]
Epoch 100 - Training loss: 3665.735107421875:   1%|▍                                | 2/149 [00:00<00:07, 19.74it/s]

Epoch 99 - Training loss: 4056.80029296875 - Validation loss: 2705.283935546875


Epoch 100 - Training loss: 3280.882080078125: 100%|███████████████████████████████| 149/149 [00:07<00:00, 20.09it/s]
Epoch 101 - Training loss: 2754.182861328125:   1%|▍                                | 2/149 [00:00<00:07, 19.16it/s]

Epoch 100 - Training loss: 3998.942138671875 - Validation loss: 2710.0234375


Epoch 101 - Training loss: 2248.86669921875: 100%|████████████████████████████████| 149/149 [00:07<00:00, 20.93it/s]
Epoch 102 - Training loss: 6041.54296875:   2%|▋                                    | 3/149 [00:00<00:06, 21.42it/s]

Epoch 101 - Training loss: 3973.221435546875 - Validation loss: 2406.678955078125


Epoch 102 - Training loss: 4159.53759765625: 100%|████████████████████████████████| 149/149 [00:07<00:00, 20.45it/s]
Epoch 103 - Training loss: 4153.8046875:   1%|▌                                     | 2/149 [00:00<00:08, 18.14it/s]

Epoch 102 - Training loss: 3889.220703125 - Validation loss: 2606.109619140625


Epoch 103 - Training loss: 2815.295166015625: 100%|███████████████████████████████| 149/149 [00:07<00:00, 20.73it/s]
Epoch 104 - Training loss: 12277.5458984375:   2%|▋                                 | 3/149 [00:00<00:06, 21.39it/s]

Epoch 103 - Training loss: 4007.25830078125 - Validation loss: 2849.724609375


Epoch 104 - Training loss: 3218.809326171875: 100%|███████████████████████████████| 149/149 [00:07<00:00, 20.80it/s]
Epoch 105 - Training loss: 3705.564453125:   1%|▍                                   | 2/149 [00:00<00:07, 18.63it/s]

Epoch 104 - Training loss: 4010.602783203125 - Validation loss: 2593.06494140625


Epoch 105 - Training loss: 3396.0673828125: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 19.14it/s]
Epoch 106 - Training loss: 4344.984375:   1%|▌                                      | 2/149 [00:00<00:07, 19.57it/s]

Epoch 105 - Training loss: 3967.6669921875 - Validation loss: 2604.376953125


Epoch 106 - Training loss: 2890.41162109375: 100%|████████████████████████████████| 149/149 [00:07<00:00, 19.52it/s]
Epoch 107 - Training loss: 3229.25537109375:   1%|▍                                 | 2/149 [00:00<00:07, 19.96it/s]

Epoch 106 - Training loss: 4036.478515625 - Validation loss: 2443.694580078125


Epoch 107 - Training loss: 3539.7734375: 100%|████████████████████████████████████| 149/149 [00:07<00:00, 20.89it/s]
Epoch 108 - Training loss: 2524.701416015625:   1%|▍                                | 2/149 [00:00<00:08, 17.96it/s]

Epoch 107 - Training loss: 4066.188720703125 - Validation loss: 2404.15673828125


Epoch 108 - Training loss: 3228.561767578125: 100%|███████████████████████████████| 149/149 [00:07<00:00, 19.23it/s]
Epoch 109 - Training loss: 3134.171142578125:   1%|▍                                | 2/149 [00:00<00:07, 18.70it/s]

Epoch 108 - Training loss: 3924.40771484375 - Validation loss: 2501.990234375


Epoch 109 - Training loss: 2628.513916015625: 100%|███████████████████████████████| 149/149 [00:07<00:00, 19.78it/s]
Epoch 110 - Training loss: 3334.57177734375:   2%|▋                                 | 3/149 [00:00<00:07, 20.59it/s]

Epoch 109 - Training loss: 4028.427001953125 - Validation loss: 2624.701904296875


Epoch 110 - Training loss: 2873.1689453125: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 20.66it/s]
Epoch 111 - Training loss: 8927.31640625:   2%|▋                                    | 3/149 [00:00<00:06, 20.88it/s]

Epoch 110 - Training loss: 4058.75537109375 - Validation loss: 2545.518798828125


Epoch 111 - Training loss: 5440.72705078125: 100%|████████████████████████████████| 149/149 [00:07<00:00, 20.66it/s]
Epoch 112 - Training loss: 7930.333984375:   2%|▋                                   | 3/149 [00:00<00:07, 20.23it/s]

Epoch 111 - Training loss: 4033.91357421875 - Validation loss: 2591.3125


Epoch 112 - Training loss: 3344.867431640625: 100%|███████████████████████████████| 149/149 [00:07<00:00, 20.65it/s]
Epoch 113 - Training loss: 2750.95849609375:   1%|▍                                 | 2/149 [00:00<00:08, 18.34it/s]

Epoch 112 - Training loss: 3899.861572265625 - Validation loss: 2711.07861328125


Epoch 113 - Training loss: 1587.2862548828125: 100%|██████████████████████████████| 149/149 [00:07<00:00, 19.22it/s]
Epoch 114 - Training loss: 4364.0673828125:   2%|▋                                  | 3/149 [00:00<00:07, 20.49it/s]

Epoch 113 - Training loss: 3938.0595703125 - Validation loss: 2908.598388671875


Epoch 114 - Training loss: 2392.416748046875: 100%|███████████████████████████████| 149/149 [00:07<00:00, 20.71it/s]
Epoch 115 - Training loss: 3254.9970703125:   2%|▋                                  | 3/149 [00:00<00:07, 19.82it/s]

Epoch 114 - Training loss: 3881.521728515625 - Validation loss: 2602.444580078125


Epoch 115 - Training loss: 6925.55224609375: 100%|████████████████████████████████| 149/149 [00:07<00:00, 20.13it/s]
Epoch 116 - Training loss: 3446.88916015625:   1%|▍                                 | 2/149 [00:00<00:08, 18.21it/s]

Epoch 115 - Training loss: 4107.08203125 - Validation loss: 2573.594482421875


Epoch 116 - Training loss: 2912.0458984375: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 20.91it/s]
Epoch 117 - Training loss: 2406.238037109375:   1%|▍                                | 2/149 [00:00<00:07, 19.83it/s]

Epoch 116 - Training loss: 4047.585205078125 - Validation loss: 2685.170166015625


Epoch 117 - Training loss: 1972.4923095703125: 100%|██████████████████████████████| 149/149 [00:07<00:00, 20.25it/s]
Epoch 118 - Training loss: 3001.734130859375:   2%|▋                                | 3/149 [00:00<00:07, 20.81it/s]

Epoch 117 - Training loss: 3866.560302734375 - Validation loss: 2416.8427734375


Epoch 118 - Training loss: 4427.69580078125: 100%|████████████████████████████████| 149/149 [00:07<00:00, 20.57it/s]
Epoch 119 - Training loss: 2882.4033203125:   2%|▋                                  | 3/149 [00:00<00:06, 21.56it/s]

Epoch 118 - Training loss: 3941.921875 - Validation loss: 2741.931396484375


Epoch 119 - Training loss: 3545.6572265625: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 19.87it/s]
Epoch 120 - Training loss: 4201.67041015625:   1%|▍                                 | 2/149 [00:00<00:07, 19.65it/s]

Epoch 119 - Training loss: 3920.555908203125 - Validation loss: 2702.4892578125


Epoch 120 - Training loss: 3068.023193359375: 100%|███████████████████████████████| 149/149 [00:07<00:00, 19.89it/s]
Epoch 121 - Training loss: 1626.0543212890625:   1%|▍                               | 2/149 [00:00<00:07, 19.15it/s]

Epoch 120 - Training loss: 3854.025634765625 - Validation loss: 2583.4853515625


Epoch 121 - Training loss: 3631.805419921875: 100%|███████████████████████████████| 149/149 [00:07<00:00, 21.20it/s]
Epoch 122 - Training loss: 2828.518310546875:   1%|▍                                | 2/149 [00:00<00:08, 18.31it/s]

Epoch 121 - Training loss: 3868.505126953125 - Validation loss: 2732.153076171875


Epoch 122 - Training loss: 5561.8193359375: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 21.14it/s]
Epoch 123 - Training loss: 3784.0224609375:   2%|▋                                  | 3/149 [00:00<00:07, 20.82it/s]

Epoch 122 - Training loss: 3931.540283203125 - Validation loss: 2750.379638671875


Epoch 123 - Training loss: 3352.2099609375: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 20.68it/s]
Epoch 124 - Training loss: 3002.825439453125:   1%|▍                                | 2/149 [00:00<00:08, 17.94it/s]

Epoch 123 - Training loss: 3994.46142578125 - Validation loss: 2344.870361328125


Epoch 124 - Training loss: 3799.999267578125: 100%|███████████████████████████████| 149/149 [00:07<00:00, 20.36it/s]
Epoch 125 - Training loss: 4337.1953125:   1%|▌                                     | 2/149 [00:00<00:07, 19.82it/s]

Epoch 124 - Training loss: 3845.200439453125 - Validation loss: 2497.486328125


Epoch 125 - Training loss: 3635.508056640625: 100%|███████████████████████████████| 149/149 [00:07<00:00, 19.89it/s]
Epoch 126 - Training loss: 3484.687255859375:   1%|▍                                | 2/149 [00:00<00:08, 17.70it/s]

Epoch 125 - Training loss: 3865.0478515625 - Validation loss: 2482.953857421875


Epoch 126 - Training loss: 2421.728271484375: 100%|███████████████████████████████| 149/149 [00:07<00:00, 21.07it/s]
Epoch 127 - Training loss: 2756.78955078125:   2%|▋                                 | 3/149 [00:00<00:06, 21.37it/s]

Epoch 126 - Training loss: 3941.744140625 - Validation loss: 2564.259033203125


Epoch 127 - Training loss: 4673.0380859375: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 21.20it/s]
Epoch 128 - Training loss: 5115.4921875:   2%|▊                                     | 3/149 [00:00<00:06, 21.52it/s]

Epoch 127 - Training loss: 3887.006591796875 - Validation loss: 2415.524169921875


Epoch 128 - Training loss: 3301.29150390625: 100%|████████████████████████████████| 149/149 [00:07<00:00, 20.44it/s]
Epoch 129 - Training loss: 5372.6162109375:   1%|▍                                  | 2/149 [00:00<00:07, 18.76it/s]

Epoch 128 - Training loss: 3858.4345703125 - Validation loss: 2454.767578125


Epoch 129 - Training loss: 3743.30029296875: 100%|████████████████████████████████| 149/149 [00:07<00:00, 19.70it/s]
Epoch 130 - Training loss: 4276.9296875:   2%|▊                                     | 3/149 [00:00<00:06, 21.49it/s]

Epoch 129 - Training loss: 3903.2197265625 - Validation loss: 2538.36865234375


Epoch 130 - Training loss: 3001.988525390625: 100%|███████████████████████████████| 149/149 [00:07<00:00, 19.78it/s]
Epoch 131 - Training loss: 3378.740234375:   1%|▍                                   | 2/149 [00:00<00:07, 19.96it/s]

Epoch 130 - Training loss: 3869.87158203125 - Validation loss: 2696.549072265625


Epoch 131 - Training loss: 4091.85986328125: 100%|████████████████████████████████| 149/149 [00:07<00:00, 20.45it/s]
Epoch 132 - Training loss: 3731.82666015625:   2%|▋                                 | 3/149 [00:00<00:06, 21.16it/s]

Epoch 131 - Training loss: 3836.843505859375 - Validation loss: 2615.37890625


Epoch 132 - Training loss: 2599.7216796875: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 20.23it/s]
Epoch 133 - Training loss: 2303.6943359375:   1%|▍                                  | 2/149 [00:00<00:07, 19.73it/s]

Epoch 132 - Training loss: 3806.108154296875 - Validation loss: 2450.48291015625


Epoch 133 - Training loss: 3950.344482421875: 100%|███████████████████████████████| 149/149 [00:07<00:00, 20.55it/s]
Epoch 134 - Training loss: 3110.0302734375:   2%|▋                                  | 3/149 [00:00<00:06, 21.09it/s]

Epoch 133 - Training loss: 3732.843994140625 - Validation loss: 2517.466552734375


Epoch 134 - Training loss: 2936.767578125: 100%|██████████████████████████████████| 149/149 [00:07<00:00, 19.46it/s]
Epoch 135 - Training loss: 2407.175048828125:   1%|▍                                | 2/149 [00:00<00:07, 18.41it/s]

Epoch 134 - Training loss: 3882.082275390625 - Validation loss: 2602.257080078125


Epoch 135 - Training loss: 3785.051513671875: 100%|███████████████████████████████| 149/149 [00:07<00:00, 19.42it/s]
Epoch 136 - Training loss: 5252.2421875:   1%|▌                                     | 2/149 [00:00<00:07, 18.47it/s]

Epoch 135 - Training loss: 3793.283935546875 - Validation loss: 2765.166015625


Epoch 136 - Training loss: 3334.806884765625: 100%|███████████████████████████████| 149/149 [00:07<00:00, 19.96it/s]
Epoch 137 - Training loss: 5712.1435546875:   2%|▋                                  | 3/149 [00:00<00:06, 21.38it/s]

Epoch 136 - Training loss: 3875.313232421875 - Validation loss: 2431.3251953125


Epoch 137 - Training loss: 3475.751220703125: 100%|███████████████████████████████| 149/149 [00:07<00:00, 20.43it/s]
Epoch 138 - Training loss: 3784.672119140625:   2%|▋                                | 3/149 [00:00<00:07, 19.94it/s]

Epoch 137 - Training loss: 3916.768798828125 - Validation loss: 2414.69287109375


Epoch 138 - Training loss: 4963.189453125: 100%|██████████████████████████████████| 149/149 [00:07<00:00, 19.71it/s]
Epoch 139 - Training loss: 4113.00146484375:   1%|▍                                 | 2/149 [00:00<00:07, 19.80it/s]

Epoch 138 - Training loss: 3847.320556640625 - Validation loss: 2546.031982421875


Epoch 139 - Training loss: 2831.25390625: 100%|███████████████████████████████████| 149/149 [00:08<00:00, 18.62it/s]
Epoch 140 - Training loss: 4262.1943359375:   1%|▍                                  | 2/149 [00:00<00:14, 10.26it/s]

Epoch 139 - Training loss: 3839.83935546875 - Validation loss: 2410.378173828125


Epoch 140 - Training loss: 2948.43212890625: 100%|████████████████████████████████| 149/149 [00:11<00:00, 13.14it/s]
Epoch 141 - Training loss: 2523.182373046875:   1%|▍                                | 2/149 [00:00<00:07, 18.98it/s]

Epoch 140 - Training loss: 3897.26513671875 - Validation loss: 2427.251708984375


Epoch 141 - Training loss: 2763.340576171875: 100%|███████████████████████████████| 149/149 [00:07<00:00, 20.11it/s]
Epoch 142 - Training loss: 2993.427490234375:   2%|▋                                | 3/149 [00:00<00:07, 20.08it/s]

Epoch 141 - Training loss: 3863.090087890625 - Validation loss: 2559.14794921875


Epoch 142 - Training loss: 6888.74072265625: 100%|████████████████████████████████| 149/149 [00:07<00:00, 19.72it/s]
Epoch 143 - Training loss: 4201.81494140625:   1%|▍                                 | 2/149 [00:00<00:08, 18.08it/s]

Epoch 142 - Training loss: 3844.953125 - Validation loss: 2480.023681640625


Epoch 143 - Training loss: 2110.96435546875: 100%|████████████████████████████████| 149/149 [00:07<00:00, 20.12it/s]
Epoch 144 - Training loss: 4933.18359375:   2%|▋                                    | 3/149 [00:00<00:07, 20.85it/s]

Epoch 143 - Training loss: 3823.119140625 - Validation loss: 2518.622802734375


Epoch 144 - Training loss: 2841.3955078125: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 19.63it/s]
Epoch 145 - Training loss: 2843.6123046875:   1%|▍                                  | 2/149 [00:00<00:07, 18.80it/s]

Epoch 144 - Training loss: 3942.700927734375 - Validation loss: 2481.56689453125


Epoch 145 - Training loss: 2195.12646484375: 100%|████████████████████████████████| 149/149 [00:07<00:00, 20.37it/s]
Epoch 146 - Training loss: 2058.552734375:   2%|▋                                   | 3/149 [00:00<00:07, 20.38it/s]

Epoch 145 - Training loss: 3943.8994140625 - Validation loss: 2663.978759765625


Epoch 146 - Training loss: 3617.244384765625: 100%|███████████████████████████████| 149/149 [00:08<00:00, 17.93it/s]
Epoch 147 - Training loss: 4430.314453125:   1%|▍                                   | 2/149 [00:00<00:07, 19.17it/s]

Epoch 146 - Training loss: 3863.42626953125 - Validation loss: 2547.742919921875


Epoch 147 - Training loss: 4122.572265625: 100%|██████████████████████████████████| 149/149 [00:09<00:00, 15.61it/s]
Epoch 148 - Training loss: 6102.34228515625:   2%|▋                                 | 3/149 [00:00<00:06, 21.09it/s]

Epoch 147 - Training loss: 3898.015625 - Validation loss: 2419.582763671875


Epoch 148 - Training loss: 3022.381103515625: 100%|███████████████████████████████| 149/149 [00:07<00:00, 19.97it/s]
Epoch 149 - Training loss: 6104.69677734375:   1%|▍                                 | 2/149 [00:00<00:07, 19.71it/s]

Epoch 148 - Training loss: 3887.5234375 - Validation loss: 2805.60205078125


Epoch 149 - Training loss: 2988.21630859375: 100%|████████████████████████████████| 149/149 [00:07<00:00, 20.58it/s]
Epoch 150 - Training loss: 3258.152587890625:   2%|▋                                | 3/149 [00:00<00:07, 20.62it/s]

Epoch 149 - Training loss: 3724.48193359375 - Validation loss: 2821.503173828125


Epoch 150 - Training loss: 4147.748046875: 100%|██████████████████████████████████| 149/149 [00:07<00:00, 20.43it/s]
Epoch 151 - Training loss: 2541.49365234375:   1%|▍                                 | 2/149 [00:00<00:07, 18.67it/s]

Epoch 150 - Training loss: 3757.86279296875 - Validation loss: 2463.16162109375


Epoch 151 - Training loss: 6582.80859375: 100%|███████████████████████████████████| 149/149 [00:07<00:00, 20.73it/s]
Epoch 152 - Training loss: 2381.966796875:   2%|▋                                   | 3/149 [00:00<00:06, 20.88it/s]

Epoch 151 - Training loss: 3933.69970703125 - Validation loss: 2509.9326171875


Epoch 152 - Training loss: 5497.31005859375: 100%|████████████████████████████████| 149/149 [00:07<00:00, 20.24it/s]
Epoch 153 - Training loss: 3861.82763671875:   1%|▍                                 | 2/149 [00:00<00:07, 18.86it/s]

Epoch 152 - Training loss: 3730.756591796875 - Validation loss: 2533.49169921875


Epoch 153 - Training loss: 6907.7724609375: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 20.90it/s]
Epoch 154 - Training loss: 3197.183837890625:   2%|▋                                | 3/149 [00:00<00:06, 21.17it/s]

Epoch 153 - Training loss: 3782.537841796875 - Validation loss: 2871.961669921875


Epoch 154 - Training loss: 3717.68017578125: 100%|████████████████████████████████| 149/149 [00:06<00:00, 21.29it/s]
Epoch 155 - Training loss: 2233.21044921875:   2%|▋                                 | 3/149 [00:00<00:06, 21.22it/s]

Epoch 154 - Training loss: 3732.75341796875 - Validation loss: 2869.037353515625


Epoch 155 - Training loss: 1947.5760498046875: 100%|██████████████████████████████| 149/149 [00:06<00:00, 21.34it/s]
Epoch 156 - Training loss: 3974.459716796875:   2%|▋                                | 3/149 [00:00<00:06, 21.02it/s]

Epoch 155 - Training loss: 3865.83984375 - Validation loss: 2862.90625


Epoch 156 - Training loss: 3058.39599609375: 100%|████████████████████████████████| 149/149 [00:07<00:00, 21.27it/s]
Epoch 157 - Training loss: 2066.81591796875:   2%|▋                                 | 3/149 [00:00<00:06, 21.39it/s]

Epoch 156 - Training loss: 3765.401123046875 - Validation loss: 2413.754638671875


Epoch 157 - Training loss: 8691.3251953125: 100%|█████████████████████████████████| 149/149 [00:06<00:00, 21.47it/s]
Epoch 158 - Training loss: 6541.232421875:   2%|▋                                   | 3/149 [00:00<00:06, 21.05it/s]

Epoch 157 - Training loss: 3894.590576171875 - Validation loss: 2506.34765625


Epoch 158 - Training loss: 2403.14111328125: 100%|████████████████████████████████| 149/149 [00:06<00:00, 21.32it/s]
Epoch 159 - Training loss: 2273.14501953125:   2%|▋                                 | 3/149 [00:00<00:06, 21.17it/s]

Epoch 158 - Training loss: 3831.224365234375 - Validation loss: 2452.474365234375


Epoch 159 - Training loss: 3899.447509765625: 100%|███████████████████████████████| 149/149 [00:06<00:00, 21.33it/s]
Epoch 160 - Training loss: 2771.115478515625:   2%|▋                                | 3/149 [00:00<00:06, 21.24it/s]

Epoch 159 - Training loss: 3731.91455078125 - Validation loss: 2439.163330078125


Epoch 160 - Training loss: 3529.701171875: 100%|██████████████████████████████████| 149/149 [00:06<00:00, 21.50it/s]
Epoch 161 - Training loss: 3842.52978515625:   2%|▋                                 | 3/149 [00:00<00:06, 21.22it/s]

Epoch 160 - Training loss: 3900.734130859375 - Validation loss: 2510.52587890625


Epoch 161 - Training loss: 3510.965087890625: 100%|███████████████████████████████| 149/149 [00:06<00:00, 21.44it/s]
Epoch 162 - Training loss: 3099.54638671875:   2%|▋                                 | 3/149 [00:00<00:06, 21.41it/s]

Epoch 161 - Training loss: 3781.7177734375 - Validation loss: 2311.41943359375


Epoch 162 - Training loss: 2842.5771484375: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 20.59it/s]
Epoch 163 - Training loss: 2319.39501953125:   2%|▋                                 | 3/149 [00:00<00:06, 21.10it/s]

Epoch 162 - Training loss: 3845.04541015625 - Validation loss: 2608.021240234375


Epoch 163 - Training loss: 2861.920166015625: 100%|███████████████████████████████| 149/149 [00:07<00:00, 19.55it/s]
Epoch 164 - Training loss: 3020.407958984375:   1%|▍                                | 2/149 [00:00<00:07, 19.66it/s]

Epoch 163 - Training loss: 3714.0712890625 - Validation loss: 2776.57763671875


Epoch 164 - Training loss: 5802.1552734375: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 19.62it/s]
Epoch 165 - Training loss: 4419.50439453125:   2%|▋                                 | 3/149 [00:00<00:06, 21.17it/s]

Epoch 164 - Training loss: 3786.376220703125 - Validation loss: 2817.797607421875


Epoch 165 - Training loss: 7463.6572265625: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 21.04it/s]
Epoch 166 - Training loss: 3263.146484375:   2%|▋                                   | 3/149 [00:00<00:07, 20.81it/s]

Epoch 165 - Training loss: 3781.0419921875 - Validation loss: 2765.967041015625


Epoch 166 - Training loss: 2594.52392578125: 100%|████████████████████████████████| 149/149 [00:06<00:00, 21.38it/s]
Epoch 167 - Training loss: 3107.42138671875:   1%|▍                                 | 2/149 [00:00<00:07, 19.91it/s]

Epoch 166 - Training loss: 3732.0986328125 - Validation loss: 2400.643310546875


Epoch 167 - Training loss: 1431.5426025390625: 100%|██████████████████████████████| 149/149 [00:07<00:00, 19.01it/s]
Epoch 168 - Training loss: 2607.62158203125:   2%|▋                                 | 3/149 [00:00<00:06, 21.21it/s]

Epoch 167 - Training loss: 3756.56884765625 - Validation loss: 2703.05224609375


Epoch 168 - Training loss: 3602.5634765625: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 20.37it/s]
Epoch 169 - Training loss: 4913.0673828125:   2%|▋                                  | 3/149 [00:00<00:06, 21.19it/s]

Epoch 168 - Training loss: 3821.505859375 - Validation loss: 2330.481201171875


Epoch 169 - Training loss: 2200.987060546875: 100%|███████████████████████████████| 149/149 [00:06<00:00, 21.37it/s]
Epoch 170 - Training loss: 3461.303955078125:   2%|▋                                | 3/149 [00:00<00:06, 21.06it/s]

Epoch 169 - Training loss: 3778.974365234375 - Validation loss: 2467.178955078125


Epoch 170 - Training loss: 7357.4775390625: 100%|█████████████████████████████████| 149/149 [00:06<00:00, 21.55it/s]
Epoch 171 - Training loss: 3050.74169921875:   2%|▋                                 | 3/149 [00:00<00:06, 21.61it/s]

Epoch 170 - Training loss: 3819.472412109375 - Validation loss: 2658.8642578125


Epoch 171 - Training loss: 4231.3876953125: 100%|█████████████████████████████████| 149/149 [00:06<00:00, 21.41it/s]
Epoch 172 - Training loss: 4821.93017578125:   2%|▋                                 | 3/149 [00:00<00:06, 21.30it/s]

Epoch 171 - Training loss: 3892.025146484375 - Validation loss: 2585.85791015625


Epoch 172 - Training loss: 2152.559326171875: 100%|███████████████████████████████| 149/149 [00:06<00:00, 21.43it/s]
Epoch 173 - Training loss: 3935.39892578125:   2%|▋                                 | 3/149 [00:00<00:06, 20.88it/s]

Epoch 172 - Training loss: 3755.45166015625 - Validation loss: 2580.23291015625


Epoch 173 - Training loss: 3122.45703125: 100%|███████████████████████████████████| 149/149 [00:06<00:00, 21.54it/s]
Epoch 174 - Training loss: 3564.55322265625:   2%|▋                                 | 3/149 [00:00<00:06, 21.34it/s]

Epoch 173 - Training loss: 3751.070556640625 - Validation loss: 2607.71630859375


Epoch 174 - Training loss: 3821.1044921875: 100%|█████████████████████████████████| 149/149 [00:06<00:00, 21.57it/s]
Epoch 175 - Training loss: 8555.451171875:   2%|▋                                   | 3/149 [00:00<00:06, 21.45it/s]

Epoch 174 - Training loss: 3819.332275390625 - Validation loss: 2527.17138671875


Epoch 175 - Training loss: 3127.7216796875: 100%|█████████████████████████████████| 149/149 [00:06<00:00, 21.57it/s]
Epoch 176 - Training loss: 2983.420654296875:   2%|▋                                | 3/149 [00:00<00:06, 21.59it/s]

Epoch 175 - Training loss: 3757.091552734375 - Validation loss: 2526.52783203125


Epoch 176 - Training loss: 3923.3291015625: 100%|█████████████████████████████████| 149/149 [00:06<00:00, 21.60it/s]
Epoch 177 - Training loss: 4693.03369140625:   2%|▋                                 | 3/149 [00:00<00:06, 21.55it/s]

Epoch 176 - Training loss: 3829.75048828125 - Validation loss: 2566.7578125


Epoch 177 - Training loss: 4444.76025390625: 100%|████████████████████████████████| 149/149 [00:06<00:00, 21.42it/s]
Epoch 178 - Training loss: 3025.2861328125:   2%|▋                                  | 3/149 [00:00<00:06, 21.48it/s]

Epoch 177 - Training loss: 3723.29443359375 - Validation loss: 2504.169921875


Epoch 178 - Training loss: 7190.92822265625: 100%|████████████████████████████████| 149/149 [00:06<00:00, 21.53it/s]
Epoch 179 - Training loss: 7300.14453125:   2%|▋                                    | 3/149 [00:00<00:06, 21.53it/s]

Epoch 178 - Training loss: 3812.950927734375 - Validation loss: 2445.4619140625


Epoch 179 - Training loss: 2345.87890625: 100%|███████████████████████████████████| 149/149 [00:06<00:00, 21.41it/s]
Epoch 180 - Training loss: 3820.648681640625:   2%|▋                                | 3/149 [00:00<00:06, 21.65it/s]

Epoch 179 - Training loss: 3711.489501953125 - Validation loss: 2581.951171875


Epoch 180 - Training loss: 3658.530029296875: 100%|███████████████████████████████| 149/149 [00:06<00:00, 21.46it/s]
Epoch 181 - Training loss: 11881.9599609375:   2%|▋                                 | 3/149 [00:00<00:06, 21.52it/s]

Epoch 180 - Training loss: 3733.245849609375 - Validation loss: 2539.032470703125


Epoch 181 - Training loss: 4072.343994140625: 100%|███████████████████████████████| 149/149 [00:06<00:00, 21.46it/s]
Epoch 182 - Training loss: 2654.975341796875:   2%|▋                                | 3/149 [00:00<00:06, 21.04it/s]

Epoch 181 - Training loss: 3779.524658203125 - Validation loss: 2517.107177734375


Epoch 182 - Training loss: 2310.866455078125: 100%|███████████████████████████████| 149/149 [00:06<00:00, 21.44it/s]
Epoch 183 - Training loss: 3994.583984375:   2%|▋                                   | 3/149 [00:00<00:06, 21.56it/s]

Epoch 182 - Training loss: 3692.05078125 - Validation loss: 2696.409423828125


Epoch 183 - Training loss: 3489.323974609375: 100%|███████████████████████████████| 149/149 [00:06<00:00, 21.45it/s]
Epoch 184 - Training loss: 2719.742431640625:   2%|▋                                | 3/149 [00:00<00:06, 21.50it/s]

Epoch 183 - Training loss: 3850.32958984375 - Validation loss: 2461.087890625


Epoch 184 - Training loss: 4105.02392578125: 100%|████████████████████████████████| 149/149 [00:06<00:00, 21.54it/s]
Epoch 185 - Training loss: 4951.58935546875:   2%|▋                                 | 3/149 [00:00<00:06, 21.36it/s]

Epoch 184 - Training loss: 3847.9765625 - Validation loss: 2534.654296875


Epoch 185 - Training loss: 2906.556884765625: 100%|███████████████████████████████| 149/149 [00:06<00:00, 21.43it/s]
Epoch 186 - Training loss: 3150.39111328125:   2%|▋                                 | 3/149 [00:00<00:07, 20.71it/s]

Epoch 185 - Training loss: 3672.345947265625 - Validation loss: 2689.821044921875


Epoch 186 - Training loss: 5521.279296875: 100%|██████████████████████████████████| 149/149 [00:06<00:00, 21.54it/s]
Epoch 187 - Training loss: 4406.44677734375:   2%|▋                                 | 3/149 [00:00<00:06, 21.37it/s]

Epoch 186 - Training loss: 3751.695068359375 - Validation loss: 2533.66357421875


Epoch 187 - Training loss: 3932.98681640625: 100%|████████████████████████████████| 149/149 [00:06<00:00, 21.58it/s]
Epoch 188 - Training loss: 4686.13134765625:   2%|▋                                 | 3/149 [00:00<00:06, 21.43it/s]

Epoch 187 - Training loss: 3679.37158203125 - Validation loss: 2350.664794921875


Epoch 188 - Training loss: 3042.480224609375: 100%|███████████████████████████████| 149/149 [00:06<00:00, 21.44it/s]
Epoch 189 - Training loss: 1882.167236328125:   2%|▋                                | 3/149 [00:00<00:07, 20.35it/s]

Epoch 188 - Training loss: 3682.177001953125 - Validation loss: 2389.610107421875


Epoch 189 - Training loss: 4144.07568359375: 100%|████████████████████████████████| 149/149 [00:07<00:00, 20.56it/s]
Epoch 190 - Training loss: 5263.91455078125:   1%|▍                                 | 2/149 [00:00<00:08, 17.15it/s]

Epoch 189 - Training loss: 3651.51806640625 - Validation loss: 2338.9306640625


Epoch 190 - Training loss: 3999.5439453125: 100%|█████████████████████████████████| 149/149 [00:07<00:00, 19.87it/s]
Epoch 191 - Training loss: 3977.5029296875:   2%|▋                                  | 3/149 [00:00<00:07, 19.70it/s]

Epoch 190 - Training loss: 3677.7138671875 - Validation loss: 2839.698486328125


Epoch 191 - Training loss: 3218.946533203125: 100%|███████████████████████████████| 149/149 [00:07<00:00, 21.21it/s]
Epoch 192 - Training loss: 4689.984375:   2%|▊                                      | 3/149 [00:00<00:06, 21.46it/s]

Epoch 191 - Training loss: 3613.242919921875 - Validation loss: 2715.035888671875


Epoch 192 - Training loss: 3450.566162109375: 100%|███████████████████████████████| 149/149 [00:06<00:00, 21.44it/s]
Epoch 193 - Training loss: 2741.619384765625:   2%|▋                                | 3/149 [00:00<00:06, 21.61it/s]

Epoch 192 - Training loss: 3661.498291015625 - Validation loss: 2928.6875


Epoch 193 - Training loss: 5993.0712890625: 100%|█████████████████████████████████| 149/149 [00:06<00:00, 21.47it/s]
Epoch 194 - Training loss: 1471.35302734375:   2%|▋                                 | 3/149 [00:00<00:06, 21.12it/s]

Epoch 193 - Training loss: 3733.912353515625 - Validation loss: 2575.457763671875


Epoch 194 - Training loss: 2490.179931640625: 100%|███████████████████████████████| 149/149 [00:06<00:00, 21.57it/s]
Epoch 195 - Training loss: 3097.681884765625:   2%|▋                                | 3/149 [00:00<00:06, 21.52it/s]

Epoch 194 - Training loss: 3550.275634765625 - Validation loss: 2406.0302734375


Epoch 195 - Training loss: 2293.3505859375: 100%|█████████████████████████████████| 149/149 [00:06<00:00, 21.34it/s]
Epoch 196 - Training loss: 1867.82666015625:   2%|▋                                 | 3/149 [00:00<00:06, 21.31it/s]

Epoch 195 - Training loss: 3667.76806640625 - Validation loss: 2671.431640625


Epoch 196 - Training loss: 3879.180908203125: 100%|███████████████████████████████| 149/149 [00:06<00:00, 21.46it/s]
Epoch 197 - Training loss: 9275.3994140625:   2%|▋                                  | 3/149 [00:00<00:06, 21.31it/s]

Epoch 196 - Training loss: 3589.022216796875 - Validation loss: 2591.88427734375


Epoch 197 - Training loss: 5006.5185546875: 100%|█████████████████████████████████| 149/149 [00:06<00:00, 21.38it/s]
Epoch 198 - Training loss: 3721.4482421875:   2%|▋                                  | 3/149 [00:00<00:06, 21.08it/s]

Epoch 197 - Training loss: 3701.179443359375 - Validation loss: 2687.814453125


Epoch 198 - Training loss: 6867.14892578125: 100%|████████████████████████████████| 149/149 [00:07<00:00, 21.27it/s]
Epoch 199 - Training loss: 4150.63525390625:   2%|▋                                 | 3/149 [00:00<00:06, 21.47it/s]

Epoch 198 - Training loss: 3658.193359375 - Validation loss: 2344.675048828125


Epoch 199 - Training loss: 2626.169677734375: 100%|███████████████████████████████| 149/149 [00:06<00:00, 21.58it/s]


Epoch 199 - Training loss: 3792.5830078125 - Validation loss: 2514.562744140625
Best Epoch 161 - Best Validation loss: 2311.41943359375


In [44]:
test_loss = []
for IDs in test_dataloader:
    best_finetune_model.eval()
    X = torch.tensor(test_data.feature_df.loc[IDs].to_numpy()).to(device)
    X = X.reshape(-1, max_len, X.shape[-1])
    targets = torch.tensor(test_data.labels_df.loc[IDs].to_numpy()).to(device)
    pred = best_finetune_model.predict(X.float()).squeeze(-1)
    loss = loss_fn(pred, targets)
    test_loss.append(loss)


test_loss = torch.tensor(test_loss).mean()
print("Test loss: {}".format(test_loss))
print("Test loss: {}".format(np.sqrt(test_loss)))

Test loss: 3786.213623046875
Test loss: 61.5322151184082


# Forecasting Results from the paper

<center><img src="../img/img_8.PNG"/><center/>

Reference:
1. https://arxiv.org/abs/2302.00861
2. https://github.com/gzerveas/mvts_transformer

BeijingPM25Quality

No Pretrain

Test loss: 4083.34765625

Test loss: 4096.765625

Test loss: 3880.41064453125

Test loss: 3818.454833984375

Test loss: 3640.204833984375

Test loss: 3793.255615234375

Test loss: 3386.54638671875

Test loss: 3532.69140625

Test loss: 3568.943115234375

Test loss: 3639.838623046875

Pretrain

Test loss: 3398.438720703125

Test loss: 3389.167724609375

Test loss: 3470.773681640625

Test loss: 3443.25830078125

Test loss: 3331.67919921875

Test loss: 3753.1474609375

Test loss: 3480.992431640625

Test loss: 3532.7275390625

Test loss: 3068.515869140625

Test loss: 3118.0390625

Hyperparameter

config['normalization_layer'] = 'BatchNorm'

config['out_len'] = 1

config['out_dim'] = 1

config['d_model'] = 8

config['dim_feedforward'] = 64

config['num_heads'] = 4

config['num_layers'] = 2

No Pretrain

Test loss: 3492.47705078125

Test loss: 3782.492431640625

Test loss: 3879.20458984375

Test loss: 3876.313232421875

Test loss: 3542.3046875

Test loss: 3592.865966796875

Pretrain L//2

Test loss: 3254.694091796875

Test loss: 3107.428955078125

Test loss: 3612.37939453125

Test loss: 3387.132568359375

Test loss: 3596.321533203125

Test loss: 3533.36083984375

Test loss: 3259.4228515625

Test loss: 3758.171630859375

Test loss: 3398.135986328125

Test loss: 3677.972412109375

Pretrain L//4

Test loss: 3581.83984375

Test loss: 3993.34814453125

Test loss: 3416.56103515625

Test loss: 3612.152587890625

Test loss: 3512.76025390625

Test loss: 3326.46044921875

Test loss: 3355.169677734375

Test loss: 3313.980712890625

Test loss: 3644.6640625

Test loss: 3978.413818359375

Hyperparameter

config['normalization_layer'] = 'BatchNorm'

config['out_len'] = 1

config['out_dim'] = 1

config['d_model'] = 8

config['dim_feedforward'] = 32

config['num_heads'] = 4

config['num_layers'] = 2

No Pretrain

Test loss: 3380.054931640625

Test loss: 3759.367919921875