In [1]:
import json
import pandas as pd
import torch
from torch import nn
from torch.nn.utils.rnn import pack_padded_sequence
import torch.optim as optim
from tqdm import tqdm, trange
from transformers import AutoModel, AutoTokenizer
from torch.optim.lr_scheduler import MultiplicativeLR
import matplotlib.pyplot as plt
import os

## Device Setting & data directory

In [2]:
path = os.getcwd()
data_dir = os.path.dirname(path)
device = torch.device('cuda:0')
batch_size = 64

## Tokenizer

In [3]:
# Load_transformer
print('Loading transformers...')
transformer_tag = "activebus/BERT-DK_rest"
tokenizer = AutoTokenizer.from_pretrained(transformer_tag)
transformer = AutoModel.from_pretrained(transformer_tag, add_pooling_layer=False)
transformer.to(device)


Loading transformers...


Some weights of the model checkpoint at activebus/BERT-DK_rest were not used when initializing BertModel: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(30522, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0): BertLayer(
        (attention): BertAttention(
          (self): BertSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          

## Load Data 

In [4]:
def load_data(split_name='train'):
    print('Loading {} data...'.format(split_name))
    df = pd.read_csv(data_dir + f'/{split_name}.csv')
    return df
df = load_data('Combined_News_DJIA')
stock_df = load_data('upload_DJIA_table')
df['price'] = stock_df.Close

train_df = df[:int(len(df) * 0.9)]
valid_df = df[int(len(df) * 0.9):]
num_train_batches = (len(train_df) + batch_size - 1) // batch_size
num_valid_batches = (len(valid_df) + batch_size - 1) // batch_size
# news = pd.read_csv("../Combined_News_DJIA.csv")

print("Loaded")
# train_df

Loading Combined_News_DJIA data...
Loading upload_DJIA_table data...
Loaded


## Model 
Pre_trained_BERT Finished <br>
Need Linear & LSTM


In [5]:
class Model(nn.Module):
    def __init__(self, hidden_size, num_linear=1):
        super(Model, self).__init__()
        self.pooler = nn.Sequential(
            nn.Linear(768, 768),
            nn.Tanh(),
        )
        #self.layers = nn.ModuleList([nn.Linear(768 + 1, hidden_size)])
        self.layers = nn.ModuleList([nn.Linear(256, hidden_size)])
        self.layers.extend([nn.Linear(hidden_size, hidden_size) for i in range(num_linear)])
        self.dropout = nn.Dropout(p=0.5)
        self.output = nn.Linear(hidden_size, 1)
        
#         Add LSTM HERE
        self.LSTM = nn.LSTM(input_size=769,hidden_size=256,
                           num_layers=1,batch_first=True)
#        End LSTM HERE
    def forward(self, text_embeddings, other_inputs):
        pooled_output = self.pooler(text_embeddings)  # (batch_size, 768)
        h = torch.cat([pooled_output, other_inputs], 1)  # (batch_size, 768 + 1)
        print(h)
#         Add LSTM HERE
        #print(h[0])
        #print(h)
        h_size = list(h.shape)
        h = h.reshape(1,h_size[0],h_size[1])
        #print(h)
        l = torch.zeros([1],dtype=torch.float32)
        l = l.add(h_size[0])

        #pack_sequence(h)
        #print(h.shape)
        h = pack_padded_sequence(h,lengths=l,batch_first=True)
        #print(h.data.shape)
        #print(h)

        h, (hn, cn) = self.LSTM(h, None)
        h = h.data
#         End LSTM HERE

        for layer in self.layers:
            h = nn.functional.leaky_relu(layer(h))  # (batch_size, hidden_size)
            h = self.dropout(h)
        o = self.output(h)  # (batch_size, 1)
        print(o)
        return o

## Train hyperparameter

In [6]:
num_epochs = 10
lr = 0.001
criterion = nn.BCEWithLogitsLoss()

hidden_size = 128
num_linear = 1

records = []

In [7]:
def compute_predictions(outputs):
    logits = torch.sigmoid(outputs)
    threshold = 0.5
    predictions = torch.zeros(len(logits),1).to(device)
    for i in range(len(logits)):
        if logits[i] < threshold:
            predictions[i] = 0
        else:
            predictions[i] = 1
    return predictions

# Brute concatenate TopK News
def make_input_batch(i_batch, df, batch_size):
    rows = df[i_batch* batch_size : min((i_batch+1) * batch_size, len(df))]
    text = rows[rows.columns.difference(['price', 'Label', 'Date'])]
    text = text.apply(lambda x :' '.join(x.astype(str)),1).tolist()
    text_inputs = tokenizer(text, padding='max_length', truncation=True, return_tensors="pt")
    text_inputs = {k : v.to(device) for k, v in text_inputs.items()}
    with torch.no_grad():
        text_embeddings = transformer(**text_inputs, return_dict=True).last_hidden_state[:,0,:]
    other_inputs = torch.tensor([rows.price.tolist()], dtype=torch.float32).to(device) # (batch_size, 1)
    other_inputs = torch.transpose(other_inputs, 0, 1)
    train_labels = torch.tensor([rows.Label.tolist()],dtype=torch.float32).to(device)  
    train_labels = torch.transpose(train_labels, 0, 1) # (batch_size, 1)
    return text_embeddings, other_inputs, train_labels

## Training

In [8]:
# save path
config = f'hid-{hidden_size}-numlin-{num_linear}'
print(config)
save_path = data_dir + '/results/' + config

# model design
model = Model(hidden_size=hidden_size, num_linear=num_linear)
# if os.path.isfile(save_path + '/model.pt'):
#     model.load_state_dict(torch.load(save_path + '/model.pt'))
model.to(device)

# Learning Rate
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
lmbda = lambda epoch: 0.95
scheduler = MultiplicativeLR(optimizer, lr_lambda=lmbda)
total_acc = 0
print(total_acc)
losses, acc_train, acc_valid = [], [], []
print("Start Training")
for epoch in range(num_epochs):
    # train
    running_loss = 0.0
    total_acc = 0
    model.train()

    # shuffle train data
    train_df = train_df.sample(frac=1, random_state=epoch).reset_index(drop=True)

    for i_batch in trange(num_train_batches, desc='ep {:2d}'.format(epoch + 1)):
        text_embeddings, other_inputs, train_labels = make_input_batch(i_batch, train_df, batch_size)

        # train step
        optimizer.zero_grad()
        outputs = model(text_embeddings, other_inputs)
        
        loss = criterion(outputs, train_labels)
        loss.backward()
        optimizer.step()

        # record
        losses.append(loss.cpu().detach().item())
        running_loss += losses[-1]
        predictions = compute_predictions(outputs)
        total_acc += (predictions == train_labels).sum().item()

    print(' loss: %.6f,  train acc: %.6f' % (running_loss / len(train_df), total_acc / len(train_df)))
    acc_train.append(total_acc / len(train_df))
    scheduler.step()

    # validate
    if epoch % 1 == 0:
        model.eval()
        total_acc = 0
        ## need to find valid data
        with torch.no_grad():
            for i_batch in trange(num_valid_batches, desc='valid'):
                text_embeddings, other_inputs, valid_labels = make_input_batch(i_batch, valid_df, batch_size)
                outputs = model(text_embeddings, other_inputs)
                predictions = compute_predictions(outputs)
                total_acc += (predictions == valid_labels).sum().item()

        print('valid acc', total_acc / len(valid_df))
        acc_valid.append(total_acc / len(valid_df))

print('Finished Training')

if not os.path.isdir(save_path):
    os.mkdir(save_path)
torch.save(model.state_dict(), save_path + '/model.pt')

record = json.dumps({
    'losses' : losses, 'acc_train' : acc_train, 'acc_valid' : acc_valid, 
    'num_epochs' : num_epochs, 'batch_size' : batch_size, 'lr' : lr, 'hidden_size':hidden_size, 'num_linear':num_linear,
    'transformer_tag' : transformer_tag
}, sort_keys=True, indent=4)
records.append(record)
with open(save_path + f'/record-{num_epochs}.json', 'w') as f:
    f.write(record)

hid-128-numlin-1


ep  1:   0%|                                                                                    | 0/28 [00:00<?, ?it/s]

0
Start Training


ep  1:   4%|██▋                                                                         | 1/28 [00:02<01:14,  2.77s/it]

tensor([[-3.5706e-01, -2.2773e-01, -2.8686e-01,  ...,  1.1870e-01,
         -1.8938e-01,  1.0341e+04],
        [-3.1172e-01, -3.0280e-01, -2.5644e-01,  ...,  1.3254e-01,
         -2.7181e-01,  1.6429e+04],
        [-2.9156e-01, -3.0987e-01, -2.8559e-01,  ...,  1.1949e-01,
         -2.2765e-01,  1.5338e+04],
        ...,
        [-3.9015e-01, -2.3314e-01, -2.7727e-01,  ...,  1.5040e-01,
         -2.2969e-01,  8.8813e+03],
        [-3.8658e-01, -2.7444e-01, -3.1971e-01,  ...,  1.8218e-01,
         -1.8384e-01,  1.6513e+04],
        [-3.8697e-01, -2.5807e-01, -2.4641e-01,  ...,  8.1206e-02,
         -2.0088e-01,  1.0940e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1504],
        [ 0.0576],
        [ 0.1105],
        [ 0.1299],
        [ 0.1229],
        [ 0.1286],
        [ 0.0881],
        [-0.0137],
        [ 0.1603],
        [ 0.0555],
        [ 0.1294],
        [ 0.1956],
        [ 0.1095],
        [ 0.2127],
        [ 0.0643],
        [ 0.0705],
        [ 0.1953],
     

ep  1:   7%|█████▍                                                                      | 2/28 [00:05<01:06,  2.55s/it]

tensor([[-3.4577e-01, -2.6732e-01, -2.6023e-01,  ...,  1.3358e-01,
         -2.7730e-01,  1.3214e+04],
        [-3.2131e-01, -3.0727e-01, -2.2774e-01,  ...,  1.1293e-01,
         -1.7680e-01,  1.7615e+04],
        [-4.3547e-01, -3.0193e-01, -2.2938e-01,  ...,  1.5672e-01,
         -2.4110e-01,  1.8144e+04],
        ...,
        [-3.0759e-01, -2.1716e-01, -2.3023e-01,  ...,  8.5880e-02,
         -2.1141e-01,  1.0992e+04],
        [-3.8888e-01, -2.7762e-01, -3.4547e-01,  ...,  2.1853e-01,
         -2.3983e-01,  1.7928e+04],
        [-4.9018e-01, -1.0667e-01, -3.3239e-01,  ...,  3.0625e-01,
         -2.0594e-01,  1.7490e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0476],
        [ 0.0856],
        [ 0.0711],
        [ 0.1980],
        [ 0.1988],
        [ 0.0197],
        [ 0.1416],
        [ 0.0616],
        [ 0.0782],
        [ 0.2301],
        [ 0.1701],
        [ 0.1274],
        [ 0.1225],
        [ 0.1267],
        [ 0.1835],
        [ 0.0622],
        [ 0.1935],
     

ep  1:  11%|████████▏                                                                   | 3/28 [00:07<01:01,  2.47s/it]

tensor([[-3.4871e-01, -3.0565e-01, -2.4076e-01,  ...,  1.3576e-01,
         -3.0301e-01,  1.3033e+04],
        [-4.1949e-01, -2.7018e-01, -2.7752e-01,  ...,  3.7399e-02,
         -2.6383e-01,  1.2171e+04],
        [-4.2368e-01, -1.3272e-01, -3.4586e-01,  ...,  1.2911e-01,
         -2.2734e-01,  1.2042e+04],
        ...,
        [-3.3026e-01, -3.3012e-01, -2.8531e-01,  ...,  1.7482e-01,
         -2.3092e-01,  1.7408e+04],
        [-3.4527e-01, -3.2150e-01, -1.6973e-01,  ...,  7.9307e-02,
         -2.7676e-01,  1.3861e+04],
        [-2.4636e-01, -3.0872e-01, -2.3774e-01,  ...,  9.7947e-02,
         -2.3313e-01,  1.1755e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1571],
        [ 0.1395],
        [ 0.2513],
        [ 0.1979],
        [-0.0746],
        [ 0.0970],
        [ 0.2239],
        [ 0.1324],
        [ 0.0586],
        [ 0.0837],
        [ 0.2216],
        [ 0.2372],
        [ 0.1177],
        [ 0.1329],
        [ 0.0851],
        [ 0.1797],
        [ 0.0552],
     

ep  1:  14%|██████████▊                                                                 | 4/28 [00:09<00:58,  2.44s/it]

tensor([[-2.6740e-01, -3.0200e-01, -1.9595e-01,  ...,  8.7386e-02,
         -2.8043e-01,  1.4834e+04],
        [-3.5841e-01, -2.6381e-01, -2.2806e-01,  ...,  1.0156e-01,
         -2.5888e-01,  1.2409e+04],
        [-3.3390e-01, -2.6068e-01, -2.1884e-01,  ...,  5.4183e-02,
         -3.0225e-01,  1.1020e+04],
        ...,
        [-3.3012e-01, -2.4522e-01, -1.9279e-01,  ...,  1.9689e-01,
         -2.4737e-01,  1.7983e+04],
        [-3.2570e-01, -3.2317e-01, -3.0422e-01,  ...,  1.6883e-01,
         -2.2779e-01,  1.6263e+04],
        [-3.7688e-01, -2.7927e-01, -1.6602e-01,  ...,  9.2716e-02,
         -2.1995e-01,  1.4910e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[0.1174],
        [0.0579],
        [0.1795],
        [0.2074],
        [0.0145],
        [0.1248],
        [0.1502],
        [0.2488],
        [0.1120],
        [0.1219],
        [0.1771],
        [0.1593],
        [0.2678],
        [0.0321],
        [0.1510],
        [0.1210],
        [0.0196],
        [0.1279],
    

ep  1:  18%|█████████████▌                                                              | 5/28 [00:12<00:55,  2.43s/it]

tensor([[-3.7246e-01, -2.9936e-01, -1.9576e-01,  ...,  9.8097e-02,
         -2.0507e-01,  1.5191e+04],
        [-4.0043e-01, -2.3126e-01, -3.0154e-01,  ...,  2.3750e-01,
         -2.4533e-01,  1.7448e+04],
        [-3.7928e-01, -2.3811e-01, -3.1528e-01,  ...,  1.9517e-01,
         -2.0265e-01,  1.0060e+04],
        ...,
        [-2.5248e-01, -4.6347e-02, -2.7562e-01,  ...,  2.5839e-01,
         -2.3001e-01,  1.6697e+04],
        [-2.2512e-01, -2.6350e-01, -2.8325e-01,  ...,  1.7944e-01,
         -2.3356e-01,  1.5747e+04],
        [-3.4555e-01, -2.9483e-01, -2.8559e-01,  ...,  1.2836e-01,
         -2.5244e-01,  1.2101e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0941],
        [ 0.1606],
        [ 0.2058],
        [ 0.2714],
        [ 0.1137],
        [ 0.0960],
        [ 0.0722],
        [ 0.0534],
        [ 0.2442],
        [ 0.0272],
        [ 0.0957],
        [ 0.0769],
        [ 0.0708],
        [ 0.1122],
        [ 0.1952],
        [-0.0320],
        [ 0.1946],
     

ep  1:  21%|████████████████▎                                                           | 6/28 [00:14<00:53,  2.42s/it]

tensor([[-3.5655e-01, -2.9510e-01, -3.3995e-01,  ...,  2.6485e-01,
         -1.1859e-01,  1.2724e+04],
        [-3.3796e-01, -1.7084e-01, -2.2772e-01,  ...,  1.2580e-01,
         -2.4941e-01,  1.2807e+04],
        [-3.6240e-01, -2.8157e-01, -2.6692e-01,  ...,  1.6458e-01,
         -3.1216e-01,  1.7231e+04],
        ...,
        [-3.0859e-01, -2.8912e-01, -2.5153e-01,  ...,  6.2658e-02,
         -3.0266e-01,  1.6340e+04],
        [-4.2723e-01, -4.8451e-02, -5.7791e-01,  ...,  4.1929e-01,
         -1.1925e-01,  1.1981e+04],
        [-3.5065e-01, -2.1291e-01, -2.4321e-01,  ...,  1.1960e-01,
         -2.0038e-01,  1.0139e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[-0.0118],
        [ 0.1522],
        [-0.0038],
        [ 0.1488],
        [ 0.0743],
        [ 0.1084],
        [ 0.2019],
        [ 0.1137],
        [ 0.1209],
        [ 0.0437],
        [ 0.1721],
        [ 0.0657],
        [ 0.0775],
        [ 0.1202],
        [ 0.1865],
        [ 0.1059],
        [ 0.0728],
     

ep  1:  25%|███████████████████                                                         | 7/28 [00:17<00:50,  2.42s/it]

tensor([[-3.1585e-01, -3.0914e-01, -1.9098e-01,  ...,  1.7558e-01,
         -2.0669e-01,  1.7776e+04],
        [-3.2756e-01, -2.2391e-01, -2.5302e-01,  ...,  3.2680e-02,
         -2.7580e-01,  1.2878e+04],
        [-3.7662e-01, -2.8977e-01, -2.5903e-01,  ...,  1.2753e-01,
         -2.4237e-01,  1.7568e+04],
        ...,
        [-3.9868e-01, -2.0469e-01, -2.3869e-01,  ...,  1.4822e-01,
         -2.1111e-01,  1.7737e+04],
        [-3.3108e-01, -3.2600e-01, -3.4200e-01,  ...,  6.8564e-02,
         -2.4688e-01,  1.2381e+04],
        [-3.1945e-01, -3.3845e-01, -2.3856e-01,  ...,  3.0208e-02,
         -2.5564e-01,  1.1955e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0319],
        [ 0.2080],
        [ 0.0663],
        [ 0.2519],
        [ 0.1601],
        [ 0.1097],
        [ 0.0154],
        [ 0.0844],
        [ 0.2017],
        [ 0.0305],
        [ 0.2189],
        [ 0.1365],
        [ 0.0691],
        [ 0.2582],
        [ 0.1617],
        [ 0.1792],
        [ 0.0379],
     

ep  1:  29%|█████████████████████▋                                                      | 8/28 [00:19<00:48,  2.41s/it]

tensor([[-3.4409e-01, -3.4521e-01, -1.9579e-01,  ...,  1.5452e-01,
         -2.1524e-01,  1.6528e+04],
        [-3.4615e-01, -3.2760e-01, -2.7711e-01,  ...,  9.3976e-02,
         -2.5970e-01,  1.2938e+04],
        [-4.1245e-01, -2.7285e-01, -2.3538e-01,  ...,  2.1363e-01,
         -2.8357e-01,  1.6615e+04],
        ...,
        [-3.3266e-01, -2.8988e-01, -2.4345e-01,  ...,  2.1375e-01,
         -1.8255e-01,  1.8042e+04],
        [-4.0715e-01, -2.6525e-01, -2.8636e-01,  ...,  1.2351e-01,
         -2.6951e-01,  1.3176e+04],
        [-3.4436e-01, -1.6100e-01, -2.2255e-01,  ...,  7.2684e-02,
         -2.6612e-01,  1.0236e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1319],
        [ 0.2349],
        [ 0.0992],
        [ 0.1064],
        [ 0.1127],
        [ 0.1298],
        [ 0.1336],
        [ 0.0301],
        [ 0.0848],
        [ 0.1793],
        [ 0.2463],
        [ 0.2021],
        [ 0.2168],
        [ 0.0149],
        [ 0.1133],
        [ 0.1074],
        [ 0.1483],
     

ep  1:  32%|████████████████████████▍                                                   | 9/28 [00:21<00:45,  2.40s/it]

tensor([[-3.9940e-01, -2.5686e-01, -2.1902e-01,  ...,  1.8390e-01,
         -1.2522e-01,  1.7403e+04],
        [-3.6402e-01, -1.0659e-01, -3.7077e-01,  ...,  2.3405e-01,
         -2.0375e-01,  1.0198e+04],
        [-3.8895e-01, -2.9314e-01, -1.8942e-01,  ...,  1.4002e-01,
         -2.8575e-01,  1.6991e+04],
        ...,
        [-3.3635e-01, -2.4061e-01, -2.2590e-01,  ...,  1.9762e-01,
         -1.9553e-01,  1.7908e+04],
        [-3.6627e-01, -2.9064e-01, -2.6176e-01,  ...,  1.0631e-01,
         -2.3851e-01,  1.1872e+04],
        [-3.7871e-01, -2.5614e-01, -2.8567e-01,  ...,  2.3823e-01,
         -2.2089e-01,  1.7612e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1349],
        [ 0.1031],
        [ 0.1565],
        [ 0.0598],
        [ 0.1189],
        [ 0.0468],
        [ 0.1862],
        [ 0.0481],
        [ 0.0903],
        [ 0.1373],
        [-0.0360],
        [ 0.0886],
        [ 0.0563],
        [ 0.1859],
        [ 0.3027],
        [-0.0330],
        [ 0.0769],
     

ep  1:  36%|██████████████████████████▊                                                | 10/28 [00:24<00:43,  2.40s/it]

tensor([[-3.4869e-01, -2.8030e-01, -2.2122e-01,  ...,  2.1715e-01,
         -3.0026e-01,  1.6659e+04],
        [-2.9401e-01, -3.1333e-01, -3.1074e-01,  ...,  1.1514e-01,
         -1.8563e-01,  1.2944e+04],
        [-2.9667e-01, -1.4193e-01, -3.4497e-01,  ...,  2.8130e-01,
         -2.3471e-01,  1.5569e+04],
        ...,
        [-3.0986e-01, -2.5017e-01, -2.9442e-01,  ...,  1.3559e-01,
         -1.8278e-01,  1.2427e+04],
        [-3.7243e-01, -2.4053e-01, -2.5935e-01,  ...,  1.0696e-01,
         -2.7419e-01,  1.2213e+04],
        [-2.5446e-01, -3.2927e-01, -2.5040e-01,  ...,  9.5079e-02,
         -1.8664e-01,  1.3954e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1354],
        [ 0.1672],
        [ 0.0848],
        [ 0.0740],
        [ 0.0990],
        [ 0.0641],
        [ 0.2145],
        [ 0.2022],
        [ 0.0725],
        [ 0.1950],
        [ 0.1282],
        [ 0.1575],
        [ 0.0312],
        [ 0.2162],
        [ 0.1748],
        [ 0.1172],
        [ 0.0738],
     

ep  1:  39%|█████████████████████████████▍                                             | 11/28 [00:26<00:40,  2.40s/it]

tensor([[-3.2459e-01, -2.3794e-01, -3.0124e-01,  ...,  1.4222e-01,
         -2.8320e-01,  1.2080e+04],
        [-3.1203e-01, -1.7950e-01, -2.1349e-01,  ...,  1.7090e-01,
         -2.4304e-01,  1.0211e+04],
        [-3.2183e-01, -2.4019e-01, -3.1942e-01,  ...,  2.3279e-01,
         -1.6719e-01,  1.3250e+04],
        ...,
        [-3.3568e-01, -2.3363e-01, -3.0281e-01,  ...,  1.3788e-01,
         -2.0404e-01,  1.8004e+04],
        [-3.7842e-01, -3.0284e-01, -1.9405e-01,  ...,  1.3097e-01,
         -1.9621e-01,  1.5081e+04],
        [-2.8738e-01, -2.5533e-01, -2.9753e-01,  ...,  1.0496e-01,
         -3.2122e-01,  1.5973e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1852],
        [ 0.1104],
        [ 0.1202],
        [ 0.2398],
        [ 0.1206],
        [ 0.2271],
        [ 0.1841],
        [ 0.1375],
        [ 0.1859],
        [ 0.2192],
        [ 0.1393],
        [ 0.1727],
        [ 0.0257],
        [ 0.1801],
        [ 0.2749],
        [ 0.1051],
        [ 0.1749],
     

ep  1:  43%|████████████████████████████████▏                                          | 12/28 [00:29<00:38,  2.40s/it]

tensor([[-3.3743e-01, -2.0880e-01, -2.6536e-01,  ...,  1.4941e-01,
         -2.5504e-01,  9.1085e+03],
        [-3.0196e-01, -2.7591e-01, -1.7241e-01,  ...,  7.4056e-03,
         -1.5029e-01,  1.0099e+04],
        [-3.4823e-01, -3.1872e-01, -2.3464e-01,  ...,  1.5997e-01,
         -2.1481e-01,  1.4512e+04],
        ...,
        [-3.6855e-01, -2.9132e-01, -3.8324e-01,  ...,  1.7815e-01,
         -1.8171e-01,  1.0259e+04],
        [-3.9478e-01, -2.7989e-01, -2.2436e-01,  ...,  8.2913e-02,
         -2.5136e-01,  1.1407e+04],
        [-3.7601e-01, -2.0120e-01, -3.2406e-01,  ...,  1.8357e-01,
         -1.8900e-01,  1.0564e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[-0.0022],
        [ 0.2201],
        [ 0.0642],
        [ 0.0451],
        [ 0.1217],
        [ 0.2975],
        [ 0.1586],
        [ 0.1819],
        [ 0.3488],
        [ 0.1426],
        [ 0.0692],
        [ 0.2713],
        [ 0.1962],
        [ 0.1343],
        [ 0.0234],
        [-0.0298],
        [ 0.1211],
     

ep  1:  46%|██████████████████████████████████▊                                        | 13/28 [00:31<00:36,  2.41s/it]

tensor([[-2.5477e-01, -2.9157e-01, -2.8427e-01,  ...,  1.2659e-02,
         -3.1601e-01,  1.6322e+04],
        [-4.0370e-01, -2.4276e-01, -2.3983e-01,  ...,  8.7447e-02,
         -2.6708e-01,  1.5112e+04],
        [-3.6686e-01, -2.5629e-01, -2.4552e-01,  ...,  5.9271e-02,
         -2.2498e-01,  1.2119e+04],
        ...,
        [-3.5108e-01, -3.5509e-01, -2.7353e-01,  ...,  1.5664e-01,
         -3.1722e-01,  1.6358e+04],
        [-3.3663e-01, -1.9020e-01, -4.3378e-01,  ...,  3.9663e-01,
         -2.4719e-01,  9.8202e+03],
        [-4.0325e-01, -2.6744e-01, -2.7704e-01,  ...,  1.7760e-01,
         -1.8814e-01,  1.5122e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0922],
        [ 0.0421],
        [ 0.0245],
        [ 0.1123],
        [ 0.0770],
        [ 0.1513],
        [ 0.0342],
        [ 0.1441],
        [ 0.0628],
        [-0.0056],
        [ 0.2822],
        [ 0.1991],
        [ 0.0884],
        [ 0.1828],
        [ 0.0727],
        [ 0.0404],
        [ 0.2031],
     

ep  1:  50%|█████████████████████████████████████▌                                     | 14/28 [00:33<00:33,  2.40s/it]

tensor([[-3.5300e-01, -2.8900e-01, -2.5508e-01,  ...,  1.8285e-01,
         -2.4425e-01,  1.1415e+04],
        [-3.8611e-01, -2.5015e-01, -2.3484e-01,  ...,  1.0542e-01,
         -2.8282e-01,  1.7985e+04],
        [-4.0072e-01, -2.9792e-01, -2.4083e-01,  ...,  1.8090e-01,
         -1.8157e-01,  1.8096e+04],
        ...,
        [-2.8291e-01, -2.7840e-01, -2.5467e-01,  ...,  9.7575e-02,
         -2.3178e-01,  8.7709e+03],
        [-4.6685e-01, -2.2966e-01, -3.7665e-01,  ...,  1.1104e-01,
         -1.4542e-01,  8.7118e+03],
        [-3.5740e-01, -2.6865e-01, -2.6201e-01,  ...,  1.1811e-01,
         -1.8690e-01,  1.7666e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.2570],
        [ 0.0581],
        [ 0.1349],
        [-0.0731],
        [ 0.1187],
        [ 0.1046],
        [ 0.1495],
        [ 0.0807],
        [ 0.0233],
        [ 0.2424],
        [ 0.1963],
        [ 0.1772],
        [ 0.1587],
        [ 0.0359],
        [ 0.0918],
        [ 0.1190],
        [ 0.1045],
     

ep  1:  54%|████████████████████████████████████████▏                                  | 15/28 [00:36<00:31,  2.41s/it]

tensor([[-4.5305e-01, -2.7870e-01, -3.0196e-01,  ...,  2.7409e-01,
         -1.1064e-01,  1.5373e+04],
        [-3.3565e-01, -2.8179e-01, -3.2741e-01,  ...,  1.9027e-01,
         -2.7028e-01,  1.0809e+04],
        [-3.5398e-01, -2.6388e-01, -3.9895e-01,  ...,  2.1543e-01,
         -1.6756e-01,  1.7851e+04],
        ...,
        [-3.3572e-01, -2.3196e-01, -3.2597e-01,  ...,  2.2033e-01,
         -2.3701e-01,  1.1983e+04],
        [-3.7077e-01, -2.7277e-01, -2.9568e-01,  ...,  1.4839e-01,
         -1.9758e-01,  1.0406e+04],
        [-3.2535e-01, -2.2106e-01, -2.6107e-01,  ...,  1.0786e-01,
         -2.7994e-01,  1.1707e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 6.7431e-02],
        [ 1.7475e-01],
        [ 1.0584e-01],
        [ 1.4325e-01],
        [ 1.6375e-01],
        [ 2.3880e-02],
        [ 2.1142e-01],
        [ 1.2685e-01],
        [ 8.4818e-02],
        [ 2.4560e-02],
        [ 1.7275e-01],
        [ 1.3797e-01],
        [ 1.5316e-01],
        [ 4.0125e-02],
      

ep  1:  57%|██████████████████████████████████████████▊                                | 16/28 [00:38<00:29,  2.42s/it]

tensor([[-3.7931e-01, -2.2443e-01, -3.5890e-01,  ...,  1.5795e-01,
         -2.6515e-01,  1.3488e+04],
        [-3.0730e-01, -2.5806e-01, -3.0354e-01,  ...,  8.1934e-02,
         -2.2970e-01,  1.5962e+04],
        [-3.6164e-01, -2.1393e-01, -2.8348e-01,  ...,  4.2913e-02,
         -2.9530e-01,  1.2170e+04],
        ...,
        [-3.5320e-01, -2.9637e-01, -2.7375e-01,  ...,  7.3198e-02,
         -2.4979e-01,  1.5568e+04],
        [-3.4444e-01, -1.2365e-01, -3.7383e-01,  ...,  2.9008e-01,
         -1.9203e-01,  1.6995e+04],
        [-3.7464e-01, -2.5599e-01, -3.0195e-01,  ...,  2.4797e-01,
         -1.8899e-01,  1.6285e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.2122],
        [ 0.2553],
        [ 0.1529],
        [ 0.1312],
        [ 0.1929],
        [ 0.1566],
        [ 0.1286],
        [ 0.1989],
        [ 0.1915],
        [ 0.0523],
        [ 0.0302],
        [ 0.0481],
        [ 0.2171],
        [ 0.1299],
        [ 0.0951],
        [ 0.2315],
        [ 0.0372],
     

ep  1:  61%|█████████████████████████████████████████████▌                             | 17/28 [00:41<00:26,  2.44s/it]

tensor([[-2.7711e-01, -3.1349e-01, -2.9681e-01,  ...,  6.9508e-02,
         -2.2446e-01,  1.7052e+04],
        [-4.3018e-01, -3.2038e-01, -2.4274e-01,  ...,  1.1559e-01,
         -2.4719e-01,  9.8822e+03],
        [-2.7715e-01, -1.5343e-02, -5.3145e-01,  ...,  4.3553e-01,
         -8.8943e-02,  1.1096e+04],
        ...,
        [-4.3359e-01, -2.4853e-01, -4.3263e-01,  ...,  2.4637e-01,
         -1.4908e-01,  1.2377e+04],
        [-3.3929e-01, -3.3239e-01, -2.3733e-01,  ...,  8.1320e-02,
         -2.8956e-01,  9.5060e+03],
        [-3.8640e-01, -2.5571e-01, -2.4143e-01,  ...,  1.7047e-01,
         -2.0258e-01,  1.6577e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1293],
        [ 0.1429],
        [ 0.1398],
        [ 0.0536],
        [ 0.0711],
        [ 0.0603],
        [ 0.0193],
        [ 0.1390],
        [ 0.0903],
        [ 0.1046],
        [ 0.1492],
        [ 0.1891],
        [ 0.0822],
        [ 0.1463],
        [ 0.1778],
        [ 0.2689],
        [ 0.1650],
     

ep  1:  64%|████████████████████████████████████████████████▏                          | 18/28 [00:43<00:24,  2.43s/it]

tensor([[-3.7133e-01, -2.7087e-01, -3.5207e-01,  ...,  1.9291e-01,
         -2.6387e-01,  1.5168e+04],
        [-3.7062e-01, -2.3328e-01, -2.2481e-01,  ...,  2.1674e-01,
         -2.3106e-01,  1.7807e+04],
        [-3.8326e-01, -2.6951e-01, -2.3874e-01,  ...,  1.4601e-01,
         -2.5078e-01,  1.3596e+04],
        ...,
        [-3.3225e-01, -2.9320e-01, -3.9016e-01,  ...,  2.0294e-01,
         -1.7648e-01,  9.7869e+03],
        [-4.0602e-01, -2.9946e-01, -3.2356e-01,  ...,  1.6040e-01,
         -1.6973e-01,  1.2641e+04],
        [-3.9911e-01, -3.4591e-01, -2.1510e-01,  ...,  2.4329e-01,
         -1.9050e-01,  1.8042e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1174],
        [ 0.1246],
        [ 0.1632],
        [ 0.0030],
        [ 0.0621],
        [ 0.1083],
        [-0.0759],
        [ 0.0478],
        [ 0.2001],
        [ 0.1188],
        [ 0.1317],
        [ 0.0655],
        [ 0.0189],
        [ 0.2154],
        [ 0.0871],
        [ 0.0734],
        [ 0.1341],
     

ep  1:  68%|██████████████████████████████████████████████████▉                        | 19/28 [00:46<00:21,  2.44s/it]

tensor([[-2.8395e-01, -3.1726e-01, -2.6154e-01,  ...,  1.6826e-01,
         -2.9582e-01,  1.5452e+04],
        [-3.4824e-01, -2.1081e-01, -2.6195e-01,  ...,  1.3544e-01,
         -2.0118e-01,  1.5225e+04],
        [-3.7273e-01, -2.2576e-01, -3.1687e-01,  ...,  2.5766e-01,
         -1.9006e-01,  1.7068e+04],
        ...,
        [-3.3856e-01, -3.5045e-01, -2.0582e-01,  ...,  1.5379e-01,
         -2.1888e-01,  1.6445e+04],
        [-4.0525e-01, -4.3345e-02, -5.5586e-01,  ...,  4.8086e-01,
         -6.1041e-02,  1.0511e+04],
        [-4.0058e-01, -3.1717e-01, -2.7558e-01,  ...,  1.8690e-01,
         -1.6492e-01,  1.7149e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0850],
        [ 0.1470],
        [ 0.1643],
        [ 0.1058],
        [ 0.1586],
        [ 0.0422],
        [ 0.1142],
        [ 0.1576],
        [ 0.1622],
        [ 0.0665],
        [ 0.0834],
        [ 0.3098],
        [ 0.0825],
        [ 0.1186],
        [ 0.1513],
        [ 0.1577],
        [ 0.2173],
     

ep  1:  71%|█████████████████████████████████████████████████████▌                     | 20/28 [00:48<00:19,  2.44s/it]

tensor([[-2.8616e-01, -3.0795e-01, -2.6183e-01,  ...,  8.9562e-02,
         -2.5689e-01,  1.2503e+04],
        [-3.6694e-01, -2.4757e-01, -3.2970e-01,  ...,  9.0982e-02,
         -2.3456e-01,  1.2978e+04],
        [-4.3634e-01, -1.4243e-01, -4.4456e-01,  ...,  2.2481e-01,
         -2.4053e-01,  1.3928e+04],
        ...,
        [-3.6833e-01, -2.3179e-01, -2.7412e-01,  ...,  1.2985e-01,
         -2.6961e-01,  1.2654e+04],
        [-3.7569e-01, -2.4126e-01, -3.4337e-01,  ...,  1.6244e-01,
         -2.4565e-01,  1.4996e+04],
        [-3.6707e-01, -3.1097e-01, -1.8785e-01,  ...,  1.8149e-01,
         -3.1150e-01,  1.7949e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1007],
        [ 0.2261],
        [ 0.2474],
        [ 0.1629],
        [ 0.0916],
        [ 0.1168],
        [ 0.0773],
        [ 0.1925],
        [ 0.2325],
        [ 0.1530],
        [ 0.0802],
        [ 0.0922],
        [ 0.0616],
        [ 0.1141],
        [ 0.0745],
        [ 0.0979],
        [ 0.1393],
     

ep  1:  75%|████████████████████████████████████████████████████████▎                  | 21/28 [00:51<00:17,  2.46s/it]

tensor([[-3.8482e-01, -3.1239e-01, -3.2807e-01,  ...,  1.4298e-01,
         -2.8431e-01,  1.1658e+04],
        [-3.1563e-01, -2.6474e-01, -3.1360e-01,  ...,  1.2401e-01,
         -2.4362e-01,  1.3077e+04],
        [-3.4132e-01, -3.3503e-01, -2.3941e-01,  ...,  1.1473e-01,
         -2.8096e-01,  1.2397e+04],
        ...,
        [-4.1233e-01, -2.9614e-01, -2.5443e-01,  ...,  1.1757e-01,
         -2.5338e-01,  1.5130e+04],
        [-3.5356e-01, -2.3054e-01, -2.2823e-01,  ...,  1.8668e-01,
         -2.1822e-01,  1.7525e+04],
        [-3.3830e-01, -2.5286e-01, -2.4974e-01,  ...,  5.7808e-02,
         -2.2060e-01,  1.2627e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0772],
        [ 0.1847],
        [ 0.2194],
        [ 0.0326],
        [ 0.0553],
        [ 0.0818],
        [ 0.0908],
        [ 0.0730],
        [ 0.1686],
        [ 0.1134],
        [ 0.1369],
        [ 0.0549],
        [ 0.1014],
        [ 0.0675],
        [ 0.1490],
        [ 0.0504],
        [ 0.2009],
     

ep  1:  79%|██████████████████████████████████████████████████████████▉                | 22/28 [00:53<00:14,  2.46s/it]

tensor([[-3.7485e-01, -2.5687e-01, -3.1148e-01,  ...,  2.0848e-01,
         -2.3283e-01,  1.0835e+04],
        [-2.6575e-01, -8.9621e-02, -2.5638e-01,  ...,  2.6353e-01,
         -2.9237e-01,  1.7781e+04],
        [-3.9721e-01, -2.7197e-01, -3.5599e-01,  ...,  2.1331e-01,
         -2.4225e-01,  1.7024e+04],
        ...,
        [-3.8812e-01, -2.8072e-01, -2.4111e-01,  ...,  9.0655e-02,
         -2.8414e-01,  1.2096e+04],
        [-3.1469e-01, -2.4341e-01, -2.8113e-01,  ...,  1.8975e-01,
         -2.0186e-01,  1.7098e+04],
        [-3.4435e-01, -2.3824e-01, -3.2206e-01,  ...,  1.8363e-01,
         -2.9249e-01,  1.0720e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 1.2074e-01],
        [ 1.5917e-01],
        [-5.5998e-02],
        [ 2.1706e-01],
        [ 1.6547e-01],
        [ 1.7824e-01],
        [ 2.5717e-01],
        [ 2.3241e-01],
        [ 1.1633e-01],
        [ 1.0748e-01],
        [ 2.1426e-01],
        [ 1.2568e-01],
        [ 2.1936e-01],
        [ 1.6927e-01],
      

ep  1:  82%|█████████████████████████████████████████████████████████████▌             | 23/28 [00:56<00:12,  2.47s/it]

tensor([[-2.6379e-01, -2.7067e-01, -3.0095e-01,  ...,  1.5841e-01,
         -2.4826e-01,  1.1796e+04],
        [-3.4343e-01, -2.8342e-01, -2.5776e-01,  ...,  1.0191e-01,
         -2.3112e-01,  1.0023e+04],
        [-3.1869e-01, -3.1717e-01, -2.5144e-01,  ...,  1.8114e-01,
         -2.8837e-01,  1.6073e+04],
        ...,
        [-4.1703e-01, -3.5060e-01, -3.6578e-01,  ...,  6.8911e-02,
         -2.2357e-01,  9.8299e+03],
        [-3.7059e-01, -2.4359e-01, -3.0042e-01,  ...,  2.5463e-02,
         -2.2841e-01,  1.2130e+04],
        [-3.7229e-01, -1.2278e-01, -4.0790e-01,  ...,  3.7274e-01,
         -2.0518e-01,  1.5112e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[0.0902],
        [0.1588],
        [0.1022],
        [0.1492],
        [0.1045],
        [0.1234],
        [0.1561],
        [0.1080],
        [0.1038],
        [0.1526],
        [0.0689],
        [0.0905],
        [0.0298],
        [0.1002],
        [0.1038],
        [0.0516],
        [0.1214],
        [0.1444],
    

ep  1:  86%|████████████████████████████████████████████████████████████████▎          | 24/28 [00:58<00:09,  2.47s/it]

tensor([[-3.1123e-01, -2.1693e-01, -3.3806e-01,  ...,  1.1354e-01,
         -2.7620e-01,  1.3115e+04],
        [-4.3366e-01, -1.6017e-01, -4.0959e-01,  ...,  2.4805e-01,
         -1.7386e-01,  1.6409e+04],
        [-3.1132e-01, -2.7820e-01, -2.7719e-01,  ...,  9.1738e-02,
         -2.2302e-01,  9.7894e+03],
        ...,
        [-3.5408e-01,  3.2669e-02, -6.0792e-01,  ...,  4.2723e-01,
         -7.4309e-02,  1.0725e+04],
        [-4.1321e-01, -2.8514e-01, -3.1058e-01,  ...,  1.0981e-01,
         -1.9352e-01,  1.0607e+04],
        [-4.4648e-01, -2.7116e-01, -2.8365e-01,  ...,  1.9463e-01,
         -2.0865e-01,  1.4573e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1721],
        [ 0.1328],
        [ 0.0939],
        [ 0.1086],
        [ 0.2809],
        [ 0.0558],
        [ 0.1400],
        [ 0.0840],
        [ 0.0957],
        [ 0.0560],
        [-0.0161],
        [ 0.1153],
        [ 0.1987],
        [ 0.2037],
        [ 0.1734],
        [ 0.1675],
        [ 0.0912],
     

ep  1:  89%|██████████████████████████████████████████████████████████████████▉        | 25/28 [01:01<00:07,  2.47s/it]

tensor([[-4.2010e-01, -2.8318e-01, -3.1734e-01,  ...,  6.7488e-02,
         -2.5366e-01,  1.5751e+04],
        [-3.4040e-01, -1.7519e-01, -3.0696e-01,  ...,  9.3888e-02,
         -2.8799e-01,  1.2303e+04],
        [-3.9119e-01, -2.0580e-01, -2.4603e-01,  ...,  1.3229e-01,
         -2.6461e-01,  1.0297e+04],
        ...,
        [-3.4437e-01, -2.4213e-01, -2.5824e-01,  ...,  1.0213e-01,
         -1.8415e-01,  1.2574e+04],
        [-2.6983e-01, -1.5044e-01, -2.2041e-01,  ...,  1.6992e-01,
         -2.0414e-01,  1.7265e+04],
        [-2.8993e-01, -3.0053e-01, -3.3836e-01,  ...,  1.6820e-01,
         -2.7964e-01,  1.5464e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0867],
        [ 0.1433],
        [ 0.0268],
        [ 0.1148],
        [ 0.0354],
        [ 0.0231],
        [ 0.0430],
        [ 0.2764],
        [ 0.0144],
        [ 0.0731],
        [ 0.0494],
        [ 0.0988],
        [ 0.0443],
        [ 0.0261],
        [ 0.1613],
        [ 0.1441],
        [ 0.2256],
     

ep  1:  93%|█████████████████████████████████████████████████████████████████████▋     | 26/28 [01:03<00:04,  2.47s/it]

tensor([[-4.2474e-01, -1.2543e-01, -5.0242e-01,  ...,  3.3865e-01,
         -1.8406e-01,  1.2395e+04],
        [-2.9741e-01, -2.9884e-01, -2.3595e-01,  ...,  1.1788e-01,
         -2.8584e-01,  1.2897e+04],
        [-3.7837e-01, -2.6571e-01, -2.7910e-01,  ...,  1.7422e-01,
         -2.6895e-01,  1.8116e+04],
        ...,
        [-3.1776e-01, -2.5391e-01, -2.9055e-01,  ...,  1.0213e-01,
         -2.5599e-01,  1.7614e+04],
        [-3.7628e-01, -2.1160e-01, -3.3304e-01,  ...,  1.8846e-01,
         -1.8618e-01,  9.9745e+03],
        [-2.8787e-01, -2.3555e-01, -1.4645e-01,  ...,  1.0126e-01,
         -2.0077e-01,  1.8024e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0669],
        [ 0.1605],
        [ 0.0844],
        [ 0.1975],
        [ 0.0639],
        [ 0.0810],
        [ 0.0273],
        [ 0.1980],
        [ 0.1425],
        [ 0.1306],
        [ 0.1499],
        [ 0.1400],
        [ 0.1844],
        [ 0.2097],
        [-0.0088],
        [ 0.1788],
        [-0.0127],
     

ep  1:  96%|████████████████████████████████████████████████████████████████████████▎  | 27/28 [01:06<00:02,  2.48s/it]

tensor([[-3.9705e-01, -2.3532e-01, -3.6924e-01,  ...,  2.5607e-01,
         -1.5058e-01,  1.6600e+04],
        [-3.5163e-01, -2.0402e-01, -5.4794e-01,  ...,  3.1770e-01,
         -1.2157e-01,  1.2811e+04],
        [-3.5085e-01, -2.6410e-01, -2.9014e-01,  ...,  1.9831e-01,
         -3.1048e-01,  1.7321e+04],
        ...,
        [-4.0395e-01, -1.9881e-01, -2.9938e-01,  ...,  1.2429e-01,
         -1.9713e-01,  1.0927e+04],
        [-4.3329e-01, -2.7915e-01, -3.4521e-01,  ...,  1.7081e-01,
         -2.2137e-01,  1.3125e+04],
        [-3.5227e-01, -2.2306e-01, -2.5311e-01,  ...,  1.4296e-01,
         -1.7026e-01,  9.3500e+03]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1386],
        [ 0.2632],
        [ 0.1588],
        [ 0.1023],
        [ 0.0707],
        [ 0.1883],
        [ 0.0997],
        [ 0.0151],
        [ 0.0815],
        [ 0.1066],
        [ 0.2394],
        [ 0.0574],
        [-0.0511],
        [ 0.0966],
        [ 0.2424],
        [ 0.2084],
        [ 0.1585],
     

ep  1: 100%|███████████████████████████████████████████████████████████████████████████| 28/28 [01:08<00:00,  2.44s/it]
valid:   0%|                                                                                     | 0/4 [00:00<?, ?it/s]

tensor([[-4.2497e-01, -2.1426e-01, -1.6749e-01,  ...,  1.6105e-01,
         -1.7643e-01,  1.7721e+04],
        [-4.2516e-01, -2.8570e-01, -2.7787e-01,  ...,  2.5332e-01,
         -1.7212e-01,  1.4713e+04],
        [-3.8417e-01, -2.6580e-01, -2.8672e-01,  ...,  2.6160e-01,
         -2.1652e-01,  1.7493e+04],
        ...,
        [-4.0268e-01, -3.4772e-01, -3.0572e-01,  ...,  2.0169e-01,
         -2.3841e-01,  1.0390e+04],
        [-4.0140e-01, -2.7776e-01, -3.2262e-01,  ...,  9.9439e-02,
         -2.7022e-01,  1.6173e+04],
        [-3.8801e-01, -2.1365e-01, -2.0972e-01,  ...,  1.0942e-01,
         -2.2007e-01,  1.5301e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1246],
        [ 0.2023],
        [ 0.0551],
        [ 0.1023],
        [ 0.1903],
        [ 0.2069],
        [ 0.2031],
        [ 0.0519],
        [ 0.1319],
        [ 0.1224],
        [ 0.1773],
        [ 0.1459],
        [ 0.1880],
        [ 0.1523],
        [ 0.0452],
        [ 0.0851],
        [ 0.1860],
     

valid:  25%|███████████████████▎                                                         | 1/4 [00:02<00:07,  2.50s/it]

tensor([[-3.2233e-01, -2.8638e-01, -2.5438e-01,  ...,  1.1127e-01,
         -2.4928e-01,  8.2773e+03],
        [-3.6531e-01, -1.8541e-01, -3.2476e-01,  ...,  9.8329e-02,
         -2.3989e-01,  8.2921e+03],
        [-3.7270e-01, -3.1051e-01, -2.6006e-01,  ...,  1.0789e-01,
         -1.8490e-01,  8.4220e+03],
        ...,
        [-3.7780e-01, -2.1708e-01, -3.6814e-01,  ...,  2.7568e-01,
         -1.1368e-01,  7.2709e+03],
        [-3.9939e-01, -2.8329e-01, -2.7683e-01,  ...,  1.3335e-01,
         -2.4340e-01,  7.3509e+03],
        [-3.1813e-01, -1.7252e-01, -3.3864e-01,  ...,  1.8896e-01,
         -2.7303e-01,  7.1148e+03]], device='cuda:0')
tensor([[0.1177],
        [0.1269],
        [0.1281],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [

valid:  50%|██████████████████████████████████████▌                                      | 2/4 [00:05<00:05,  2.50s/it]

tensor([[-2.6459e-01, -1.9816e-01, -2.3166e-01,  ...,  4.3367e-02,
         -2.4275e-01,  7.3657e+03],
        [-4.4351e-01, -1.6997e-01, -2.4599e-01,  ...,  5.0830e-02,
         -2.2045e-01,  7.4660e+03],
        [-3.6358e-01, -2.7964e-01, -2.1717e-01,  ...,  1.4754e-01,
         -2.0238e-01,  7.5556e+03],
        ...,
        [-3.0289e-01, -2.8608e-01, -3.0153e-01,  ...,  9.7022e-02,
         -2.0641e-01,  7.5523e+03],
        [-3.2801e-01, -2.4983e-01, -2.3538e-01,  ...,  1.0617e-01,
         -3.3416e-01,  7.9973e+03],
        [-3.5315e-01, -2.7097e-01, -2.6696e-01,  ...,  1.3142e-01,
         -2.4509e-01,  8.4248e+03]], device='cuda:0')
tensor([[0.1177],
        [0.1269],
        [0.1281],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [

valid:  75%|█████████████████████████████████████████████████████████▊                   | 3/4 [00:07<00:02,  2.51s/it]

tensor([[-4.3092e-01, -2.7025e-01, -3.0428e-01,  ...,  1.7351e-01,
         -2.1021e-01,  8.2736e+03],
        [-3.9686e-01, -1.6447e-01, -3.1085e-01,  ...,  2.0987e-01,
         -1.9773e-01,  8.4973e+03],
        [-4.0648e-01, -2.3350e-01, -3.5499e-01,  ...,  1.4228e-01,
         -2.3323e-01,  8.8352e+03],
        ...,
        [-2.5792e-01, -2.7998e-01, -2.9788e-01,  ...,  6.9041e-02,
         -1.5715e-01,  1.1430e+04],
        [-3.6105e-01, -3.1575e-01, -2.2100e-01,  ...,  1.0072e-01,
         -2.3743e-01,  1.1417e+04],
        [-3.4809e-01, -2.3759e-01, -2.0738e-01,  ...,  1.4380e-01,
         -2.2184e-01,  1.1349e+04]], device='cuda:0')
tensor([[0.1177],
        [0.1269],
        [0.1281],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283],
        [

valid: 100%|█████████████████████████████████████████████████████████████████████████████| 4/4 [00:07<00:00,  1.94s/it]
ep  2:   0%|                                                                                    | 0/28 [00:00<?, ?it/s]

tensor([[-3.0299e-01, -2.0548e-01, -3.7163e-01,  ...,  2.2204e-01,
         -2.4157e-01,  1.1479e+04],
        [-5.2351e-01, -1.3666e-01, -4.0322e-01,  ...,  1.6678e-01,
         -2.3608e-01,  1.1660e+04],
        [-2.8602e-01, -2.1112e-01, -2.7816e-01,  ...,  2.0929e-01,
         -2.4698e-01,  1.1616e+04],
        ...,
        [-4.1360e-01, -3.0982e-01, -2.5829e-01,  ...,  8.7612e-02,
         -2.1257e-01,  1.1642e+04],
        [-4.2525e-01, -2.9543e-01, -3.0947e-01,  ...,  7.3900e-02,
         -2.2249e-01,  1.1782e+04],
        [-2.6573e-01, -2.9025e-01, -3.4377e-01,  ...,  1.3080e-01,
         -1.8334e-01,  1.1734e+04]], device='cuda:0')
tensor([[0.1177],
        [0.1269],
        [0.1281],
        [0.1283],
        [0.1283],
        [0.1283],
        [0.1283]], device='cuda:0')
valid acc 0.5376884422110553


ep  2:   4%|██▋                                                                         | 1/28 [00:02<01:08,  2.54s/it]

tensor([[-2.2419e-01, -1.4146e-01, -3.0499e-01,  ...,  5.6024e-02,
         -2.5358e-01,  1.0783e+04],
        [-3.3678e-01, -2.6744e-01, -3.1534e-01,  ...,  1.4317e-01,
         -2.8767e-01,  1.2098e+04],
        [-4.4709e-01, -3.6001e-01, -2.8681e-01,  ...,  1.2866e-01,
         -1.0287e-01,  1.7972e+04],
        ...,
        [-3.7146e-01, -2.0861e-01, -4.1357e-01,  ...,  2.3719e-01,
         -2.2152e-01,  1.5967e+04],
        [-3.7831e-01, -2.0204e-01, -4.7379e-01,  ...,  2.8515e-01,
         -1.4798e-01,  1.1836e+04],
        [-3.6323e-01, -2.5868e-01, -2.8219e-01,  ...,  1.9990e-01,
         -2.8093e-01,  1.5619e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1768],
        [ 0.2052],
        [ 0.2008],
        [ 0.1280],
        [ 0.2177],
        [ 0.0689],
        [ 0.0953],
        [ 0.1087],
        [ 0.0882],
        [ 0.1007],
        [ 0.1866],
        [ 0.1774],
        [ 0.1371],
        [ 0.1233],
        [ 0.3001],
        [ 0.1001],
        [ 0.0063],
     

ep  2:   7%|█████▍                                                                      | 2/28 [00:05<01:06,  2.54s/it]

tensor([[-3.8028e-01, -2.9210e-01, -2.2280e-01,  ...,  1.0941e-01,
         -2.6782e-01,  1.2503e+04],
        [-3.6499e-01, -1.7875e-01, -2.1814e-01,  ...,  5.1647e-02,
         -2.2522e-01,  9.1716e+03],
        [-2.8655e-01, -2.7082e-01, -3.3381e-01,  ...,  1.1852e-01,
         -3.1277e-01,  1.0453e+04],
        ...,
        [-3.9720e-01, -2.9734e-01, -2.8515e-01,  ...,  8.4707e-02,
         -2.0492e-01,  1.4995e+04],
        [-3.8929e-01, -2.3611e-01, -2.1793e-01,  ...,  7.9710e-02,
         -1.9122e-01,  1.2393e+04],
        [-3.2970e-01, -2.4861e-01, -2.0659e-01,  ...,  1.0500e-01,
         -2.7375e-01,  1.7087e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0745],
        [ 0.2025],
        [ 0.2083],
        [ 0.1338],
        [ 0.2086],
        [ 0.1740],
        [ 0.1636],
        [ 0.1225],
        [ 0.1638],
        [ 0.1832],
        [ 0.2733],
        [ 0.0629],
        [ 0.2481],
        [ 0.1260],
        [ 0.1547],
        [ 0.0738],
        [ 0.0492],
     

ep  2:  11%|████████▏                                                                   | 3/28 [00:07<01:03,  2.53s/it]

tensor([[-4.2770e-01, -2.6157e-01, -2.7998e-01,  ...,  2.1762e-01,
         -2.0398e-01,  1.6315e+04],
        [-3.9792e-01, -2.4752e-01, -2.5154e-01,  ...,  2.1127e-01,
         -1.6999e-01,  1.7664e+04],
        [-3.5451e-01, -1.6673e-01, -4.3754e-01,  ...,  3.5644e-01,
         -1.6787e-01,  1.5875e+04],
        ...,
        [-3.2680e-01, -2.9765e-01, -2.5188e-01,  ...,  1.3327e-01,
         -2.6115e-01,  1.7417e+04],
        [-3.9300e-01, -1.7237e-01, -4.4011e-01,  ...,  1.6408e-01,
         -2.2132e-01,  1.0282e+04],
        [-3.2751e-01, -1.8968e-01, -2.9080e-01,  ...,  3.7985e-02,
         -2.4650e-01,  1.0627e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1253],
        [ 0.0101],
        [ 0.0463],
        [ 0.1265],
        [ 0.2632],
        [ 0.0842],
        [ 0.3602],
        [ 0.1387],
        [ 0.0463],
        [ 0.0495],
        [ 0.0987],
        [ 0.0612],
        [ 0.2171],
        [ 0.1539],
        [ 0.0506],
        [ 0.1323],
        [ 0.0720],
     

ep  2:  14%|██████████▊                                                                 | 4/28 [00:10<01:00,  2.54s/it]

tensor([[-3.2804e-01, -2.2651e-01, -2.6661e-01,  ...,  1.8661e-01,
         -2.1290e-01,  1.8050e+04],
        [-3.4665e-01, -2.5800e-01, -1.7945e-01,  ...,  6.8822e-02,
         -2.7608e-01,  1.2927e+04],
        [-2.5010e-01, -3.1880e-01, -1.7650e-01,  ...,  6.0610e-02,
         -2.1590e-01,  1.7760e+04],
        ...,
        [-3.4446e-01, -1.8069e-01, -2.6802e-01,  ...,  1.2977e-01,
         -2.3758e-01,  1.2090e+04],
        [-3.9039e-01, -3.0637e-01, -3.1312e-01,  ...,  1.7153e-01,
         -2.3803e-01,  1.7489e+04],
        [-3.4701e-01, -2.8950e-01, -2.0261e-01,  ...,  1.7073e-01,
         -2.8898e-01,  1.7111e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1440],
        [ 0.0721],
        [ 0.0500],
        [ 0.1625],
        [ 0.2645],
        [ 0.1945],
        [ 0.0457],
        [ 0.2526],
        [-0.0127],
        [ 0.2093],
        [ 0.0486],
        [ 0.0980],
        [ 0.2083],
        [-0.0058],
        [ 0.2934],
        [-0.0041],
        [ 0.1782],
     

ep  2:  18%|█████████████▌                                                              | 5/28 [00:12<00:59,  2.57s/it]

tensor([[-3.4531e-01, -2.3031e-01, -2.6036e-01,  ...,  1.8791e-01,
         -2.0748e-01,  1.7542e+04],
        [-4.1313e-01, -2.3705e-01, -3.7843e-01,  ...,  2.4157e-01,
         -2.1135e-01,  1.8120e+04],
        [-3.1828e-01, -3.1655e-01, -2.6142e-01,  ...,  8.5632e-02,
         -3.0497e-01,  1.1177e+04],
        ...,
        [-3.7666e-01, -3.0000e-01, -3.2456e-01,  ...,  1.9312e-01,
         -1.5685e-01,  1.8030e+04],
        [-3.5731e-01, -3.0880e-01, -2.6493e-01,  ...,  7.5308e-02,
         -2.5260e-01,  1.2633e+04],
        [-3.3810e-01, -1.8181e-01, -2.4541e-01,  ...,  1.8361e-01,
         -1.8616e-01,  1.6964e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1773],
        [-0.0109],
        [ 0.2007],
        [ 0.3927],
        [ 0.2907],
        [ 0.0152],
        [ 0.0735],
        [ 0.0880],
        [-0.0046],
        [ 0.1678],
        [ 0.1649],
        [ 0.0989],
        [ 0.2571],
        [ 0.0655],
        [ 0.1038],
        [ 0.0878],
        [ 0.1787],
     

ep  2:  21%|████████████████▎                                                           | 6/28 [00:15<00:56,  2.56s/it]

tensor([[-3.3780e-01, -3.3942e-01, -2.1059e-01,  ...,  1.3705e-01,
         -2.2122e-01,  1.7849e+04],
        [-3.3109e-01, -4.0199e-01, -2.9982e-01,  ...,  8.8311e-02,
         -2.4402e-01,  1.0785e+04],
        [-3.4372e-01, -2.7101e-01, -3.0463e-01,  ...,  1.6187e-01,
         -1.8392e-01,  1.4537e+04],
        ...,
        [-4.3299e-01, -2.0525e-01, -2.4819e-01,  ...,  1.2748e-01,
         -2.0539e-01,  1.3170e+04],
        [-3.1132e-01, -2.7820e-01, -2.7719e-01,  ...,  9.1738e-02,
         -2.2302e-01,  9.7894e+03],
        [-3.8950e-01, -2.5194e-01, -2.8755e-01,  ...,  5.8617e-02,
         -2.5159e-01,  1.3091e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0042],
        [ 0.2702],
        [ 0.0820],
        [ 0.1545],
        [ 0.2317],
        [ 0.0959],
        [ 0.0921],
        [ 0.0615],
        [ 0.1333],
        [ 0.2040],
        [ 0.0712],
        [ 0.1422],
        [ 0.1917],
        [ 0.1345],
        [ 0.1430],
        [ 0.1875],
        [ 0.0162],
     

ep  2:  25%|███████████████████                                                         | 7/28 [00:17<00:53,  2.57s/it]

tensor([[-2.9563e-01, -3.4143e-01, -2.9219e-01,  ...,  1.2291e-01,
         -2.6646e-01,  1.2240e+04],
        [-4.0154e-01, -2.3943e-01, -2.2154e-01,  ...,  1.3188e-01,
         -2.5278e-01,  1.1977e+04],
        [-3.9941e-01, -2.5686e-01, -2.1903e-01,  ...,  1.8389e-01,
         -1.2521e-01,  1.7403e+04],
        ...,
        [-3.7705e-01, -2.8034e-01, -3.4579e-01,  ...,  2.1953e-01,
         -2.4024e-01,  9.2792e+03],
        [-4.0058e-01, -3.1717e-01, -2.7558e-01,  ...,  1.8690e-01,
         -1.6492e-01,  1.7149e+04],
        [-3.5201e-01, -3.3102e-01, -3.0311e-01,  ...,  1.0313e-01,
         -2.8311e-01,  1.3575e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1838],
        [ 0.1857],
        [ 0.1593],
        [ 0.1071],
        [ 0.1947],
        [ 0.1551],
        [ 0.1895],
        [ 0.1658],
        [ 0.1075],
        [ 0.0634],
        [ 0.0452],
        [ 0.1572],
        [ 0.0385],
        [-0.0351],
        [ 0.0600],
        [ 0.0987],
        [ 0.1415],
     

ep  2:  29%|█████████████████████▋                                                      | 8/28 [00:20<00:51,  2.57s/it]

tensor([[-3.2678e-01, -2.2474e-01, -3.1223e-01,  ...,  3.1335e-01,
         -2.0264e-01,  1.7216e+04],
        [-3.8635e-01, -2.9413e-01, -2.8776e-01,  ...,  6.0888e-02,
         -1.6599e-01,  8.7993e+03],
        [-3.5107e-01, -2.7279e-01, -1.7334e-01,  ...,  1.1278e-01,
         -2.4311e-01,  1.3264e+04],
        ...,
        [-4.2194e-01, -2.1275e-01, -4.3251e-01,  ...,  3.6249e-01,
         -2.0019e-01,  9.1353e+03],
        [-4.0073e-01, -2.9792e-01, -2.4083e-01,  ...,  1.8089e-01,
         -1.8156e-01,  1.8096e+04],
        [-3.3021e-01, -2.9705e-01, -3.1483e-01,  ...,  1.0274e-01,
         -2.6637e-01,  1.1985e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[0.0839],
        [0.1637],
        [0.1345],
        [0.2014],
        [0.2002],
        [0.0462],
        [0.2337],
        [0.0303],
        [0.1069],
        [0.1057],
        [0.1248],
        [0.1121],
        [0.1874],
        [0.0691],
        [0.1689],
        [0.1454],
        [0.0806],
        [0.0628],
    

ep  2:  32%|████████████████████████▍                                                   | 9/28 [00:23<00:48,  2.57s/it]

tensor([[-1.8185e-01, -3.1410e-01, -2.6966e-01,  ...,  1.1347e-01,
         -2.6688e-01,  9.7894e+03],
        [-3.9588e-01, -2.5914e-01, -3.3163e-01,  ...,  1.6032e-01,
         -2.9270e-01,  1.6419e+04],
        [-4.2034e-01, -2.3058e-01, -3.2197e-01,  ...,  1.2336e-01,
         -2.5352e-01,  9.9722e+03],
        ...,
        [-3.7158e-01, -2.4812e-01, -2.9887e-01,  ...,  2.1151e-01,
         -2.0637e-01,  1.6066e+04],
        [-3.7305e-01, -3.1574e-01, -2.0023e-01,  ...,  3.9638e-02,
         -1.8539e-01,  1.2719e+04],
        [-3.1099e-01, -2.4655e-01, -1.6991e-01,  ...,  9.5611e-02,
         -2.4402e-01,  1.7869e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1981],
        [ 0.0333],
        [ 0.1548],
        [ 0.1351],
        [ 0.1496],
        [ 0.0849],
        [ 0.2138],
        [ 0.2152],
        [ 0.1085],
        [ 0.2623],
        [ 0.2216],
        [ 0.0844],
        [ 0.1543],
        [ 0.0680],
        [ 0.0990],
        [ 0.1471],
        [ 0.0354],
     

ep  2:  36%|██████████████████████████▊                                                | 10/28 [00:25<00:46,  2.58s/it]

tensor([[-3.7116e-01, -2.0675e-01, -2.4161e-01,  ...,  2.0879e-01,
         -2.1851e-01,  1.7741e+04],
        [-3.5357e-01, -2.3054e-01, -2.2823e-01,  ...,  1.8668e-01,
         -2.1822e-01,  1.7525e+04],
        [-3.5278e-01, -3.4140e-01, -2.8865e-01,  ...,  1.1771e-01,
         -1.9806e-01,  1.2454e+04],
        ...,
        [-4.1821e-01, -3.2588e-01, -2.9103e-01,  ...,  1.8377e-01,
         -1.5814e-01,  1.2019e+04],
        [-4.1061e-01, -2.9977e-01, -2.5792e-01,  ...,  2.3501e-01,
         -1.4480e-01,  1.7863e+04],
        [-4.5050e-01, -2.5158e-01, -3.1677e-01,  ...,  1.8283e-01,
         -1.5187e-01,  1.8232e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 2.0202e-01],
        [ 5.9721e-02],
        [ 1.8542e-01],
        [ 1.6993e-01],
        [ 1.6555e-01],
        [ 1.4754e-01],
        [ 1.4126e-01],
        [-5.6814e-03],
        [ 1.5985e-01],
        [ 3.5747e-01],
        [ 2.8365e-01],
        [ 1.4772e-01],
        [ 2.1415e-01],
        [ 4.2142e-02],
      

ep  2:  39%|█████████████████████████████▍                                             | 11/28 [00:28<00:43,  2.58s/it]

tensor([[-4.9322e-01, -2.3207e-01, -3.2665e-01,  ...,  2.2416e-01,
         -1.3380e-01,  1.1990e+04],
        [-3.4965e-01, -3.2979e-01, -2.4615e-01,  ...,  1.3050e-01,
         -2.3201e-01,  1.5522e+04],
        [-3.5349e-01, -2.4693e-01, -3.5887e-01,  ...,  2.0039e-01,
         -2.3204e-01,  1.3900e+04],
        ...,
        [-3.2556e-01, -2.2911e-01, -2.4954e-01,  ...,  1.4875e-01,
         -2.9181e-01,  1.7675e+04],
        [-2.2634e-01, -6.0480e-02, -2.9187e-01,  ...,  2.7287e-01,
         -2.3739e-01,  1.6570e+04],
        [-3.3044e-01, -1.7218e-01, -3.1110e-01,  ...,  1.1008e-01,
         -1.0216e-01,  1.2240e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1303],
        [ 0.1569],
        [ 0.0441],
        [ 0.1247],
        [ 0.0512],
        [ 0.1362],
        [ 0.1501],
        [ 0.1590],
        [ 0.0223],
        [ 0.1377],
        [ 0.0612],
        [ 0.1452],
        [ 0.1302],
        [ 0.1238],
        [ 0.2420],
        [ 0.2628],
        [ 0.1085],
     

ep  2:  43%|████████████████████████████████▏                                          | 12/28 [00:30<00:41,  2.59s/it]

tensor([[-3.9060e-01, -2.9308e-01, -2.5256e-01,  ...,  1.0533e-01,
         -2.7164e-01,  1.3104e+04],
        [-3.0606e-01, -2.8102e-01, -1.6150e-01,  ...,  1.7149e-01,
         -2.4196e-01,  1.6463e+04],
        [-4.2881e-01, -2.5989e-01, -2.0768e-01,  ...,  6.7363e-02,
         -3.6985e-01,  1.5544e+04],
        ...,
        [-3.5085e-01, -2.6410e-01, -2.9014e-01,  ...,  1.9831e-01,
         -3.1048e-01,  1.7321e+04],
        [-3.2403e-01, -2.5641e-01, -3.0052e-01,  ...,  7.1031e-02,
         -2.9490e-01,  9.3214e+03],
        [-3.4821e-01, -2.8013e-01, -2.7043e-01,  ...,  1.3145e-01,
         -1.6341e-01,  1.8037e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0305],
        [ 0.2104],
        [ 0.2384],
        [ 0.1588],
        [ 0.2361],
        [ 0.0840],
        [ 0.2127],
        [ 0.1906],
        [ 0.2314],
        [ 0.1586],
        [ 0.2030],
        [ 0.0570],
        [ 0.0616],
        [-0.0224],
        [ 0.1381],
        [ 0.2161],
        [ 0.1536],
     

ep  2:  46%|██████████████████████████████████▊                                        | 13/28 [00:33<00:38,  2.59s/it]

tensor([[-4.1406e-01, -3.2810e-01, -2.4289e-01,  ...,  1.9869e-01,
         -2.0560e-01,  1.2571e+04],
        [-3.7063e-01, -2.3328e-01, -2.2482e-01,  ...,  2.1673e-01,
         -2.3106e-01,  1.7807e+04],
        [-3.8241e-01, -3.3520e-01, -2.8065e-01,  ...,  5.9659e-02,
         -3.0225e-01,  1.6385e+04],
        ...,
        [-3.5562e-01, -2.6933e-01, -3.0811e-01,  ...,  1.3852e-01,
         -1.6704e-01,  1.5471e+04],
        [-2.4638e-01, -3.0871e-01, -2.3775e-01,  ...,  9.7939e-02,
         -2.3311e-01,  1.1755e+04],
        [-3.8994e-01, -3.3908e-01, -2.4932e-01,  ...,  7.2639e-02,
         -1.9911e-01,  1.8058e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1463],
        [ 0.1623],
        [ 0.0520],
        [-0.0040],
        [ 0.0632],
        [ 0.0960],
        [ 0.1989],
        [ 0.0216],
        [ 0.0253],
        [ 0.0486],
        [ 0.1001],
        [ 0.1706],
        [ 0.0801],
        [ 0.1295],
        [-0.0033],
        [ 0.0053],
        [-0.0276],
     

ep  2:  50%|█████████████████████████████████████▌                                     | 14/28 [00:35<00:36,  2.58s/it]

tensor([[-3.6981e-01, -3.1122e-01, -2.8601e-01,  ...,  1.4610e-01,
         -1.9858e-01,  1.2128e+04],
        [-3.5299e-01, -2.6017e-01, -2.9378e-01,  ...,  8.8319e-02,
         -2.0205e-01,  1.3233e+04],
        [-4.4406e-01, -2.0456e-01, -2.9012e-01,  ...,  1.6277e-01,
         -3.0401e-01,  1.1838e+04],
        ...,
        [-3.3179e-01, -3.4927e-01, -2.3518e-01,  ...,  1.6775e-01,
         -2.4184e-01,  1.6460e+04],
        [-3.8915e-01, -1.4516e-01, -3.7912e-01,  ...,  2.0986e-01,
         -1.9628e-01,  9.0932e+03],
        [-4.3765e-01, -2.5380e-01, -2.6619e-01,  ...,  2.1349e-01,
         -2.1322e-01,  1.5988e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1260],
        [ 0.2252],
        [ 0.0420],
        [ 0.0415],
        [ 0.1576],
        [ 0.1799],
        [ 0.1522],
        [ 0.1740],
        [ 0.1318],
        [ 0.0863],
        [-0.0018],
        [ 0.1066],
        [ 0.2662],
        [ 0.2284],
        [ 0.1825],
        [ 0.1044],
        [ 0.1214],
     

ep  2:  54%|████████████████████████████████████████▏                                  | 15/28 [00:38<00:33,  2.58s/it]

tensor([[-3.8827e-01, -2.1415e-01, -4.2645e-01,  ...,  3.0873e-01,
         -1.6167e-01,  1.6323e+04],
        [-3.4748e-01, -2.5333e-01, -2.8754e-01,  ...,  1.8283e-01,
         -2.5133e-01,  1.3511e+04],
        [-3.3186e-01, -3.3786e-01, -2.4623e-01,  ...,  1.3533e-01,
         -1.6994e-01,  1.2581e+04],
        ...,
        [-5.3850e-01, -2.0739e-01, -3.6352e-01,  ...,  3.3791e-01,
         -1.3638e-01,  1.7132e+04],
        [-3.6365e-01, -3.6664e-01, -2.3616e-01,  ...,  1.5617e-01,
         -2.4494e-01,  1.3155e+04],
        [-3.8375e-01, -3.5576e-01, -2.4437e-01,  ...,  1.4666e-01,
         -2.0514e-01,  1.4579e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[0.1127],
        [0.0693],
        [0.0750],
        [0.1670],
        [0.2315],
        [0.0160],
        [0.0830],
        [0.1283],
        [0.1078],
        [0.1528],
        [0.1680],
        [0.1443],
        [0.1513],
        [0.0867],
        [0.1812],
        [0.1809],
        [0.1153],
        [0.0732],
    

ep  2:  57%|██████████████████████████████████████████▊                                | 16/28 [00:41<00:31,  2.59s/it]

tensor([[-3.3796e-01, -2.6609e-01, -2.6114e-01,  ...,  1.9801e-01,
         -2.2904e-01,  1.1232e+04],
        [-3.7089e-01, -3.0211e-01, -2.1948e-01,  ...,  2.0392e-01,
         -1.7377e-01,  1.7014e+04],
        [-4.1270e-01, -2.4804e-01, -2.9193e-01,  ...,  1.4030e-01,
         -2.2064e-01,  1.6640e+04],
        ...,
        [-3.7889e-01, -2.9723e-01, -2.7188e-01,  ...,  1.0623e-01,
         -2.4638e-01,  1.7515e+04],
        [-4.9020e-01, -1.0666e-01, -3.3240e-01,  ...,  3.0624e-01,
         -2.0592e-01,  1.7490e+04],
        [-3.4225e-01, -2.5272e-01, -2.8888e-01,  ...,  2.2232e-01,
         -2.7486e-01,  1.7281e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0273],
        [ 0.1393],
        [ 0.0900],
        [ 0.0704],
        [ 0.0273],
        [ 0.2359],
        [ 0.0241],
        [-0.0147],
        [ 0.1963],
        [ 0.0266],
        [ 0.2117],
        [ 0.0745],
        [ 0.1643],
        [ 0.1365],
        [ 0.0904],
        [ 0.0979],
        [ 0.1694],
     

ep  2:  61%|█████████████████████████████████████████████▌                             | 17/28 [00:43<00:28,  2.61s/it]

tensor([[-2.8924e-01, -2.7027e-01, -2.3954e-01,  ...,  1.5833e-01,
         -2.4384e-01,  1.2231e+04],
        [-2.8426e-01, -2.4660e-01, -2.8498e-01,  ...,  1.2266e-01,
         -1.9426e-01,  8.8482e+03],
        [-4.0394e-01, -1.0770e-01, -5.7590e-01,  ...,  4.3615e-01,
         -1.0274e-01,  1.0788e+04],
        ...,
        [-2.9312e-01, -2.3475e-01, -2.0464e-01,  ...,  1.4575e-01,
         -2.0263e-01,  1.7651e+04],
        [-3.2664e-01, -2.3396e-01, -2.6549e-01,  ...,  9.6552e-02,
         -1.8463e-01,  1.0068e+04],
        [-3.9897e-01, -1.2447e-01, -2.0418e-01,  ...,  1.7286e-01,
         -2.9900e-01,  1.0574e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1600],
        [ 0.0710],
        [ 0.1797],
        [ 0.1986],
        [ 0.0828],
        [ 0.0778],
        [ 0.0812],
        [ 0.0879],
        [ 0.0809],
        [ 0.1774],
        [ 0.0961],
        [ 0.0259],
        [ 0.2062],
        [ 0.0561],
        [ 0.1718],
        [ 0.2869],
        [ 0.1541],
     

ep  2:  64%|████████████████████████████████████████████████▏                          | 18/28 [00:46<00:26,  2.60s/it]

tensor([[-3.5298e-01, -1.6399e-01, -2.4613e-01,  ...,  2.3685e-01,
         -1.5084e-01,  1.6346e+04],
        [-2.9131e-01, -3.3647e-01, -3.0612e-01,  ...,  1.0542e-01,
         -2.1931e-01,  1.0573e+04],
        [-3.9960e-01, -2.9212e-01, -3.5465e-01,  ...,  1.0665e-01,
         -2.0778e-01,  1.7056e+04],
        ...,
        [-3.9211e-01, -2.1298e-01, -2.9768e-01,  ...,  1.9585e-01,
         -2.2380e-01,  1.1052e+04],
        [-4.1481e-01, -2.6142e-01, -2.9199e-01,  ...,  1.6227e-01,
         -2.2637e-01,  1.5461e+04],
        [-3.8343e-01, -1.9531e-01, -3.0533e-01,  ...,  1.8378e-01,
         -3.2077e-01,  8.4972e+03]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0505],
        [ 0.1415],
        [ 0.1387],
        [ 0.1189],
        [ 0.0868],
        [ 0.1224],
        [ 0.1085],
        [ 0.2127],
        [ 0.1788],
        [ 0.1639],
        [ 0.1760],
        [ 0.1821],
        [ 0.2192],
        [ 0.0534],
        [ 0.0716],
        [ 0.2300],
        [ 0.1219],
     

ep  2:  68%|██████████████████████████████████████████████████▉                        | 19/28 [00:49<00:23,  2.62s/it]

tensor([[-3.6191e-01, -2.4259e-01, -2.3643e-01,  ...,  7.1835e-02,
         -2.6600e-01,  1.1542e+04],
        [-4.3645e-01, -1.8698e-01, -4.6236e-01,  ...,  2.8848e-01,
         -2.0286e-01,  1.3146e+04],
        [-4.1917e-01, -2.8159e-01, -3.0760e-01,  ...,  1.7593e-01,
         -1.4976e-01,  1.7569e+04],
        ...,
        [-3.7252e-01, -2.9491e-01, -3.1334e-01,  ...,  1.9728e-01,
         -1.9813e-01,  1.6246e+04],
        [-3.5211e-01, -1.3102e-01, -3.8800e-01,  ...,  3.0619e-01,
         -1.5847e-01,  1.7252e+04],
        [-3.0647e-01, -3.1938e-01, -2.3066e-01,  ...,  1.6218e-01,
         -2.5533e-01,  1.6986e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1916],
        [ 0.1542],
        [ 0.0263],
        [ 0.0774],
        [-0.0005],
        [ 0.1137],
        [ 0.1083],
        [ 0.1251],
        [ 0.2144],
        [ 0.2119],
        [ 0.0604],
        [ 0.2366],
        [ 0.1339],
        [ 0.2611],
        [ 0.0817],
        [ 0.0287],
        [ 0.1210],
     

ep  2:  71%|█████████████████████████████████████████████████████▌                     | 20/28 [00:51<00:20,  2.62s/it]

tensor([[-3.2608e-01, -2.4032e-01, -2.3081e-01,  ...,  1.4076e-01,
         -1.8057e-01,  1.2966e+04],
        [-2.9353e-01, -1.8997e-01, -2.4092e-01,  ...,  1.0810e-01,
         -2.4201e-01,  1.2233e+04],
        [-3.7631e-01, -2.5635e-01, -3.0635e-01,  ...,  9.3865e-02,
         -2.5537e-01,  1.0595e+04],
        ...,
        [-4.1261e-01, -2.0914e-01, -2.6652e-01,  ...,  1.6195e-01,
         -2.1004e-01,  1.6371e+04],
        [-3.3173e-01, -2.1272e-01, -2.7858e-01,  ...,  1.1583e-01,
         -2.5102e-01,  1.6396e+04],
        [-3.0635e-01, -2.2245e-01, -1.8073e-01,  ...,  1.5041e-01,
         -2.0460e-01,  1.6925e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[0.1464],
        [0.2549],
        [0.1241],
        [0.1560],
        [0.1217],
        [0.0241],
        [0.2020],
        [0.1312],
        [0.1676],
        [0.1010],
        [0.1644],
        [0.1424],
        [0.1672],
        [0.0977],
        [0.1440],
        [0.1399],
        [0.0113],
        [0.1573],
    

ep  2:  75%|████████████████████████████████████████████████████████▎                  | 21/28 [00:54<00:18,  2.62s/it]

tensor([[-4.1889e-01, -1.3002e-01, -3.0639e-01,  ...,  2.7102e-01,
         -1.9283e-01,  1.7577e+04],
        [-3.1206e-01, -2.5036e-01, -3.3243e-01,  ...,  1.4896e-01,
         -2.4159e-01,  1.6459e+04],
        [-3.4126e-01, -2.9451e-01, -2.0882e-01,  ...,  8.6014e-02,
         -2.1372e-01,  1.5318e+04],
        ...,
        [-3.7615e-01, -2.3265e-01, -2.5026e-01,  ...,  1.2242e-01,
         -1.6281e-01,  1.7959e+04],
        [-4.1856e-01, -2.7149e-01, -2.2667e-01,  ...,  1.6969e-01,
         -2.7211e-01,  1.7165e+04],
        [-3.3789e-01, -2.7989e-01, -2.9995e-01,  ...,  1.0489e-01,
         -2.6547e-01,  1.2874e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0726],
        [ 0.1776],
        [-0.0067],
        [ 0.1125],
        [ 0.2791],
        [ 0.0788],
        [-0.0035],
        [ 0.0803],
        [ 0.2581],
        [ 0.0107],
        [ 0.0842],
        [ 0.2081],
        [ 0.0223],
        [ 0.0794],
        [ 0.0961],
        [ 0.0546],
        [-0.0964],
     

ep  2:  79%|██████████████████████████████████████████████████████████▉                | 22/28 [00:56<00:15,  2.62s/it]

tensor([[-3.4604e-01, -3.2104e-01, -2.4843e-01,  ...,  9.1493e-02,
         -2.7697e-01,  1.2019e+04],
        [-4.4120e-01, -2.0841e-01, -2.9606e-01,  ...,  2.0397e-01,
         -2.1738e-01,  1.5885e+04],
        [-3.8762e-01, -3.7258e-02, -5.3454e-01,  ...,  4.2211e-01,
         -8.6074e-02,  1.7280e+04],
        ...,
        [-2.5816e-01, -3.8214e-01, -2.7272e-01,  ...,  7.4683e-03,
         -2.1076e-01,  1.6330e+04],
        [-3.2088e-01, -2.2953e-01, -3.1333e-01,  ...,  1.8128e-01,
         -2.4778e-01,  1.6912e+04],
        [-3.1392e-01, -2.7724e-01, -2.3925e-01,  ...,  1.6026e-01,
         -2.3088e-01,  9.7325e+03]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0858],
        [-0.0325],
        [ 0.0991],
        [ 0.0755],
        [ 0.1901],
        [ 0.0790],
        [ 0.2967],
        [ 0.0879],
        [ 0.0074],
        [ 0.1290],
        [ 0.1289],
        [ 0.2141],
        [ 0.1017],
        [ 0.1284],
        [ 0.0165],
        [ 0.3352],
        [ 0.1974],
     

ep  2:  82%|█████████████████████████████████████████████████████████████▌             | 23/28 [00:59<00:13,  2.63s/it]

tensor([[-4.0067e-01, -2.6052e-01, -2.9089e-01,  ...,  1.0600e-01,
         -2.3551e-01,  1.0286e+04],
        [-3.7176e-01, -2.6482e-01, -2.6047e-01,  ...,  2.0887e-01,
         -2.2086e-01,  1.7478e+04],
        [-3.9394e-01, -3.2345e-01, -2.6753e-01,  ...,  1.1090e-01,
         -2.0170e-01,  1.4988e+04],
        ...,
        [-3.7348e-01, -2.3195e-01, -2.7464e-01,  ...,  1.5379e-01,
         -2.4741e-01,  1.5118e+04],
        [-3.4535e-01, -2.8672e-01, -2.1426e-01,  ...,  2.1455e-01,
         -2.5627e-01,  1.5105e+04],
        [-3.3176e-01, -3.4075e-01, -2.7343e-01,  ...,  1.0494e-01,
         -2.3551e-01,  1.6256e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0240],
        [ 0.0419],
        [ 0.2084],
        [ 0.1621],
        [ 0.0992],
        [ 0.1966],
        [ 0.1795],
        [ 0.0154],
        [-0.0436],
        [ 0.0999],
        [ 0.1202],
        [ 0.1799],
        [ 0.1496],
        [ 0.1193],
        [ 0.1494],
        [ 0.1562],
        [ 0.1674],
     

ep  2:  86%|████████████████████████████████████████████████████████████████▎          | 24/28 [01:02<00:10,  2.62s/it]

tensor([[-3.1022e-01, -1.8703e-01, -2.7300e-01,  ...,  2.1941e-01,
         -2.4358e-01,  1.7720e+04],
        [-3.7041e-01, -3.0064e-01, -2.6173e-01,  ...,  1.9359e-01,
         -1.6904e-01,  1.8038e+04],
        [-4.3775e-01, -2.3280e-01, -2.3169e-01,  ...,  1.8710e-01,
         -1.9914e-01,  1.0414e+04],
        ...,
        [-4.0026e-01, -3.3545e-01, -2.6718e-01,  ...,  2.2542e-01,
         -2.0691e-01,  1.6491e+04],
        [-4.0140e-01, -2.7776e-01, -3.2262e-01,  ...,  9.9438e-02,
         -2.7022e-01,  1.6173e+04],
        [-4.0474e-01, -2.0529e-01, -2.9639e-01,  ...,  3.2189e-01,
         -2.3843e-01,  1.5276e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 2.5938e-01],
        [ 7.4847e-02],
        [ 1.5784e-01],
        [ 1.8533e-01],
        [ 1.1527e-01],
        [ 1.7057e-01],
        [ 1.3074e-01],
        [-1.5845e-03],
        [ 9.5946e-02],
        [ 2.1413e-01],
        [ 4.6766e-02],
        [ 1.0452e-01],
        [ 1.4798e-01],
        [ 1.1078e-01],
      

ep  2:  89%|██████████████████████████████████████████████████████████████████▉        | 25/28 [01:04<00:07,  2.62s/it]

tensor([[-3.4761e-01, -3.7318e-01, -2.3399e-01,  ...,  1.4139e-01,
         -2.5983e-01,  1.6915e+04],
        [-2.2585e-01, -2.1238e-01, -3.1960e-01,  ...,  2.0828e-01,
         -2.0870e-01,  1.3391e+04],
        [-3.4727e-01, -2.2425e-01, -4.2008e-01,  ...,  2.6411e-01,
         -1.9298e-01,  1.2288e+04],
        ...,
        [-3.2713e-01, -1.6660e-01, -2.4351e-01,  ...,  2.4159e-01,
         -2.5983e-01,  1.7706e+04],
        [-4.3058e-01, -3.2221e-01, -2.6141e-01,  ...,  1.7008e-01,
         -2.3332e-01,  1.6504e+04],
        [-3.6204e-01, -2.8229e-01, -2.9786e-01,  ...,  1.1430e-01,
         -2.6426e-01,  1.6133e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[0.2622],
        [0.1205],
        [0.2497],
        [0.0781],
        [0.3596],
        [0.3111],
        [0.1335],
        [0.0366],
        [0.1409],
        [0.1173],
        [0.0339],
        [0.0219],
        [0.1996],
        [0.0918],
        [0.2508],
        [0.0236],
        [0.0964],
        [0.0175],
    

ep  2:  93%|█████████████████████████████████████████████████████████████████████▋     | 26/28 [01:07<00:05,  2.62s/it]

tensor([[-4.1247e-01, -2.7284e-01, -2.3538e-01,  ...,  2.1363e-01,
         -2.8356e-01,  1.6615e+04],
        [-3.3291e-01, -2.9052e-01, -2.2333e-01,  ...,  2.4047e-01,
         -1.8636e-01,  1.7862e+04],
        [-3.4652e-01, -2.9041e-01, -2.6480e-01,  ...,  2.2792e-01,
         -2.5364e-01,  1.7384e+04],
        ...,
        [-3.5907e-01, -1.8485e-01, -2.9579e-01,  ...,  2.1352e-01,
         -2.7630e-01,  1.0452e+04],
        [-3.7957e-01, -3.3877e-01, -2.7814e-01,  ...,  1.7920e-01,
         -2.1444e-01,  1.8100e+04],
        [-3.4980e-01, -2.9343e-01, -3.1180e-01,  ...,  9.1082e-02,
         -1.4368e-01,  1.1247e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1954],
        [ 0.0688],
        [ 0.2140],
        [ 0.1420],
        [ 0.1076],
        [-0.0189],
        [ 0.1604],
        [ 0.1018],
        [ 0.2392],
        [ 0.0810],
        [ 0.1461],
        [ 0.1789],
        [ 0.1528],
        [ 0.0426],
        [ 0.1630],
        [ 0.2161],
        [ 0.2441],
     

ep  2:  96%|████████████████████████████████████████████████████████████████████████▎  | 27/28 [01:10<00:02,  2.64s/it]

tensor([[-4.1412e-01, -2.4917e-01, -2.8674e-01,  ...,  1.3618e-01,
         -2.2998e-01,  1.1167e+04],
        [-2.7454e-01, -2.7811e-01, -2.5261e-01,  ...,  1.5950e-01,
         -2.1417e-01,  1.8060e+04],
        [-4.8608e-01, -2.3142e-01, -3.8985e-01,  ...,  2.5077e-01,
         -1.9815e-01,  1.0463e+04],
        ...,
        [-3.6366e-01, -2.3304e-01, -2.5642e-01,  ...,  5.0858e-02,
         -2.3503e-01,  1.1362e+04],
        [-3.0825e-01, -2.5210e-01, -2.7628e-01,  ...,  1.9201e-01,
         -2.0372e-01,  1.7729e+04],
        [-4.5436e-01, -3.1093e-01, -2.7416e-01,  ...,  1.7868e-01,
         -1.6803e-01,  1.2437e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1089],
        [ 0.1873],
        [ 0.1752],
        [ 0.2164],
        [ 0.0719],
        [ 0.2418],
        [ 0.1585],
        [ 0.0881],
        [ 0.0815],
        [ 0.0828],
        [ 0.0897],
        [ 0.1783],
        [ 0.1983],
        [ 0.2300],
        [ 0.1519],
        [ 0.1199],
        [ 0.1466],
     

ep  2: 100%|███████████████████████████████████████████████████████████████████████████| 28/28 [01:12<00:00,  2.60s/it]
valid:   0%|                                                                                     | 0/4 [00:00<?, ?it/s]

tensor([[-2.8990e-01, -2.8739e-01, -2.4120e-01,  ...,  1.9121e-01,
         -1.9309e-01,  1.7630e+04],
        [-4.0903e-01, -2.6816e-01, -3.0147e-01,  ...,  1.8071e-01,
         -2.1686e-01,  1.0154e+04],
        [-3.0090e-01, -2.5054e-01, -2.6815e-01,  ...,  2.0753e-01,
         -2.7671e-01,  1.2985e+04],
        ...,
        [-3.1734e-01, -2.8358e-01, -2.5236e-01,  ...,  7.4284e-02,
         -2.2192e-01,  1.2068e+04],
        [-3.5656e-01, -2.5345e-01, -1.8035e-01,  ...,  1.9261e-01,
         -2.0344e-01,  1.6621e+04],
        [-2.5624e-01, -1.5757e-01, -2.3494e-01,  ...,  1.8252e-01,
         -2.9421e-01,  1.6399e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0697],
        [ 0.0881],
        [ 0.1894],
        [ 0.1304],
        [ 0.1245],
        [ 0.0547],
        [ 0.0594],
        [ 0.1944],
        [ 0.0953],
        [ 0.2825],
        [ 0.0795],
        [ 0.1833],
        [ 0.0955],
        [ 0.1062],
        [ 0.1430],
        [ 0.0834],
        [ 0.0668],
     

valid:  25%|███████████████████▎                                                         | 1/4 [00:02<00:07,  2.65s/it]

tensor([[-3.2233e-01, -2.8638e-01, -2.5438e-01,  ...,  1.1127e-01,
         -2.4928e-01,  8.2773e+03],
        [-3.6531e-01, -1.8541e-01, -3.2476e-01,  ...,  9.8329e-02,
         -2.3989e-01,  8.2921e+03],
        [-3.7271e-01, -3.1051e-01, -2.6006e-01,  ...,  1.0789e-01,
         -1.8490e-01,  8.4220e+03],
        ...,
        [-3.7780e-01, -2.1708e-01, -3.6814e-01,  ...,  2.7568e-01,
         -1.1368e-01,  7.2709e+03],
        [-3.9939e-01, -2.8329e-01, -2.7683e-01,  ...,  1.3335e-01,
         -2.4340e-01,  7.3509e+03],
        [-3.1813e-01, -1.7252e-01, -3.3864e-01,  ...,  1.8896e-01,
         -2.7303e-01,  7.1148e+03]], device='cuda:0')
tensor([[0.1216],
        [0.1306],
        [0.1318],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [

valid:  50%|██████████████████████████████████████▌                                      | 2/4 [00:05<00:05,  2.65s/it]

tensor([[-2.6459e-01, -1.9816e-01, -2.3166e-01,  ...,  4.3366e-02,
         -2.4275e-01,  7.3657e+03],
        [-4.4351e-01, -1.6997e-01, -2.4599e-01,  ...,  5.0830e-02,
         -2.2045e-01,  7.4660e+03],
        [-3.6358e-01, -2.7964e-01, -2.1717e-01,  ...,  1.4754e-01,
         -2.0238e-01,  7.5556e+03],
        ...,
        [-3.0289e-01, -2.8608e-01, -3.0153e-01,  ...,  9.7022e-02,
         -2.0641e-01,  7.5523e+03],
        [-3.2801e-01, -2.4983e-01, -2.3538e-01,  ...,  1.0617e-01,
         -3.3416e-01,  7.9973e+03],
        [-3.5315e-01, -2.7097e-01, -2.6696e-01,  ...,  1.3142e-01,
         -2.4509e-01,  8.4248e+03]], device='cuda:0')
tensor([[0.1216],
        [0.1306],
        [0.1318],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [

valid:  75%|█████████████████████████████████████████████████████████▊                   | 3/4 [00:07<00:02,  2.65s/it]

tensor([[-4.3092e-01, -2.7025e-01, -3.0428e-01,  ...,  1.7351e-01,
         -2.1021e-01,  8.2736e+03],
        [-3.9687e-01, -1.6447e-01, -3.1085e-01,  ...,  2.0987e-01,
         -1.9773e-01,  8.4973e+03],
        [-4.0648e-01, -2.3350e-01, -3.5499e-01,  ...,  1.4227e-01,
         -2.3323e-01,  8.8352e+03],
        ...,
        [-2.5792e-01, -2.7998e-01, -2.9789e-01,  ...,  6.9041e-02,
         -1.5714e-01,  1.1430e+04],
        [-3.6105e-01, -3.1575e-01, -2.2100e-01,  ...,  1.0072e-01,
         -2.3743e-01,  1.1417e+04],
        [-3.4809e-01, -2.3759e-01, -2.0738e-01,  ...,  1.4380e-01,
         -2.2184e-01,  1.1349e+04]], device='cuda:0')
tensor([[0.1216],
        [0.1306],
        [0.1318],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320],
        [

valid: 100%|█████████████████████████████████████████████████████████████████████████████| 4/4 [00:08<00:00,  2.06s/it]
ep  3:   0%|                                                                                    | 0/28 [00:00<?, ?it/s]

tensor([[-3.0299e-01, -2.0548e-01, -3.7163e-01,  ...,  2.2204e-01,
         -2.4157e-01,  1.1479e+04],
        [-5.2351e-01, -1.3666e-01, -4.0322e-01,  ...,  1.6678e-01,
         -2.3607e-01,  1.1660e+04],
        [-2.8602e-01, -2.1112e-01, -2.7816e-01,  ...,  2.0929e-01,
         -2.4698e-01,  1.1616e+04],
        ...,
        [-4.1360e-01, -3.0982e-01, -2.5829e-01,  ...,  8.7611e-02,
         -2.1257e-01,  1.1642e+04],
        [-4.2525e-01, -2.9543e-01, -3.0947e-01,  ...,  7.3900e-02,
         -2.2249e-01,  1.1782e+04],
        [-2.6573e-01, -2.9025e-01, -3.4377e-01,  ...,  1.3080e-01,
         -1.8334e-01,  1.1734e+04]], device='cuda:0')
tensor([[0.1216],
        [0.1306],
        [0.1318],
        [0.1320],
        [0.1320],
        [0.1320],
        [0.1320]], device='cuda:0')
valid acc 0.5376884422110553


ep  3:   4%|██▋                                                                         | 1/28 [00:02<01:11,  2.66s/it]

tensor([[-3.6536e-01, -2.5236e-01, -2.3157e-01,  ...,  1.1472e-01,
         -2.3437e-01,  1.2716e+04],
        [-3.1692e-01, -3.4895e-01, -1.8473e-01,  ...,  3.9459e-02,
         -2.7700e-01,  1.5915e+04],
        [-4.0726e-01, -2.4532e-01, -4.7755e-01,  ...,  3.8868e-01,
         -1.9176e-01,  1.3112e+04],
        ...,
        [-2.0097e-01, -1.4819e-01, -5.3714e-01,  ...,  4.3473e-01,
         -2.1721e-01,  1.6633e+04],
        [-3.7261e-01, -3.0743e-01, -2.0168e-01,  ...,  1.6302e-01,
         -3.1991e-01,  1.6844e+04],
        [-4.1133e-01, -2.7734e-01, -3.6478e-01,  ...,  1.0248e-01,
         -2.1866e-01,  1.0416e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1130],
        [ 0.1075],
        [ 0.1905],
        [ 0.2722],
        [ 0.0425],
        [ 0.3291],
        [ 0.1429],
        [ 0.0783],
        [ 0.1620],
        [ 0.2185],
        [-0.0744],
        [ 0.0706],
        [ 0.1095],
        [ 0.0697],
        [ 0.0889],
        [ 0.0326],
        [ 0.1241],
     

ep  3:   7%|█████▍                                                                      | 2/28 [00:05<01:08,  2.63s/it]

tensor([[-4.0763e-01, -2.4197e-01, -4.5180e-01,  ...,  3.8159e-01,
         -2.1892e-01,  8.3249e+03],
        [-3.8892e-01, -3.0700e-01, -2.7924e-01,  ...,  1.2427e-01,
         -2.4846e-01,  1.7113e+04],
        [-3.8361e-01, -3.1339e-01, -2.8429e-01,  ...,  1.4958e-01,
         -1.7615e-01,  1.5666e+04],
        ...,
        [-3.2431e-01, -1.7918e-01, -2.9542e-01,  ...,  2.0301e-01,
         -2.6104e-01,  1.3982e+04],
        [-2.2585e-01, -2.1238e-01, -3.1960e-01,  ...,  2.0828e-01,
         -2.0870e-01,  1.3391e+04],
        [-4.1684e-01, -2.1007e-01, -2.1573e-01,  ...,  1.5244e-01,
         -2.2402e-01,  1.7891e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 1.2998e-01],
        [ 1.5822e-01],
        [ 1.0604e-01],
        [ 4.1311e-02],
        [ 8.8513e-02],
        [ 2.2578e-01],
        [ 9.8104e-02],
        [ 7.3135e-02],
        [ 1.1779e-01],
        [ 8.4433e-02],
        [ 1.6863e-01],
        [ 9.3179e-02],
        [ 7.1192e-02],
        [ 2.3128e-01],
      

ep  3:  11%|████████▏                                                                   | 3/28 [00:07<01:05,  2.63s/it]

tensor([[-4.1310e-01, -2.3542e-01, -2.4912e-01,  ...,  1.0974e-01,
         -2.5079e-01,  1.7623e+04],
        [-4.1987e-01, -2.9582e-01, -2.2743e-01,  ...,  1.3903e-01,
         -2.3932e-01,  1.2151e+04],
        [-3.4818e-01, -3.0913e-01, -2.0225e-01,  ...,  2.0458e-01,
         -2.9903e-01,  1.2554e+04],
        ...,
        [-4.1255e-01, -2.2252e-01, -3.3290e-01,  ...,  1.1824e-01,
         -2.5446e-01,  1.1024e+04],
        [-3.5336e-01, -1.7096e-01, -2.4021e-01,  ...,  1.2804e-01,
         -2.2859e-01,  1.7368e+04],
        [-3.3945e-01, -2.8684e-01, -3.2056e-01,  ...,  2.0349e-01,
         -2.0155e-01,  1.3553e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0190],
        [ 0.1463],
        [ 0.1888],
        [ 0.0116],
        [ 0.2305],
        [ 0.1227],
        [ 0.0161],
        [ 0.1797],
        [ 0.1732],
        [ 0.2549],
        [ 0.2386],
        [ 0.2553],
        [ 0.2747],
        [ 0.1992],
        [ 0.1309],
        [ 0.1017],
        [ 0.0666],
     

ep  3:  14%|██████████▊                                                                 | 4/28 [00:10<01:03,  2.63s/it]

tensor([[-3.6767e-01, -2.2675e-01, -2.9387e-01,  ...,  8.5994e-02,
         -2.6699e-01,  9.2810e+03],
        [-4.3018e-01, -3.0888e-01, -2.8688e-01,  ...,  1.3984e-01,
         -2.4008e-01,  1.2512e+04],
        [-4.4514e-01, -2.1580e-01, -3.0095e-01,  ...,  2.0017e-01,
         -2.4058e-01,  1.0442e+04],
        ...,
        [-2.5824e-01, -2.3949e-01, -2.1718e-01,  ...,  1.1234e-01,
         -2.2813e-01,  1.2049e+04],
        [-2.7916e-01, -1.7648e-01, -2.3676e-01,  ...,  2.3446e-01,
         -2.2328e-01,  1.7078e+04],
        [-3.4031e-01, -2.2289e-01, -2.6927e-01,  ...,  1.6278e-01,
         -2.5149e-01,  1.1771e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.2162],
        [ 0.2321],
        [-0.0599],
        [ 0.2071],
        [ 0.2082],
        [ 0.0714],
        [ 0.2016],
        [ 0.1218],
        [ 0.2953],
        [ 0.1243],
        [ 0.1228],
        [ 0.0566],
        [ 0.0712],
        [ 0.1082],
        [ 0.2045],
        [ 0.2492],
        [ 0.0754],
     

ep  3:  18%|█████████████▌                                                              | 5/28 [00:13<01:00,  2.63s/it]

tensor([[-3.8263e-01, -1.5271e-01, -3.8456e-01,  ...,  1.8018e-01,
         -1.7891e-01,  8.5047e+03],
        [-3.6344e-01, -2.8729e-01, -2.9996e-01,  ...,  1.8668e-01,
         -2.5146e-01,  1.5612e+04],
        [-3.5794e-01, -2.8589e-01, -2.9491e-01,  ...,  2.3025e-01,
         -2.0938e-01,  1.1519e+04],
        ...,
        [-3.4225e-01, -2.5272e-01, -2.8888e-01,  ...,  2.2232e-01,
         -2.7486e-01,  1.7281e+04],
        [-3.5949e-01, -2.7073e-01, -2.1092e-01,  ...,  1.1750e-01,
         -2.1872e-01,  1.8054e+04],
        [-3.4529e-01, -3.0674e-01, -3.1387e-01,  ...,  2.1878e-01,
         -1.9480e-01,  1.7604e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1746],
        [ 0.0241],
        [-0.0420],
        [ 0.0759],
        [ 0.1960],
        [ 0.0078],
        [ 0.1330],
        [ 0.2026],
        [ 0.2461],
        [ 0.1782],
        [ 0.2287],
        [ 0.0611],
        [ 0.0542],
        [ 0.0594],
        [ 0.1221],
        [ 0.1782],
        [ 0.1521],
     

ep  3:  21%|████████████████▎                                                           | 6/28 [00:15<00:58,  2.64s/it]

tensor([[-3.2484e-01, -1.8789e-01, -2.0434e-01,  ...,  6.8002e-02,
         -2.7145e-01,  1.5546e+04],
        [-2.7420e-01, -1.8211e-01, -2.8865e-01,  ...,  2.1371e-02,
         -3.1516e-01,  1.2226e+04],
        [-3.9721e-01, -2.7197e-01, -3.5599e-01,  ...,  2.1330e-01,
         -2.4224e-01,  1.7024e+04],
        ...,
        [-4.0474e-01, -2.0529e-01, -2.9639e-01,  ...,  3.2189e-01,
         -2.3843e-01,  1.5276e+04],
        [-3.6057e-01, -2.2378e-01, -2.9901e-01,  ...,  1.8151e-01,
         -1.8897e-01,  1.0472e+04],
        [-4.0603e-01, -2.9946e-01, -3.2356e-01,  ...,  1.6040e-01,
         -1.6972e-01,  1.2641e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1438],
        [ 0.2288],
        [ 0.0926],
        [ 0.1199],
        [ 0.1598],
        [ 0.0843],
        [ 0.1374],
        [ 0.1844],
        [ 0.0884],
        [ 0.1901],
        [ 0.1229],
        [ 0.1375],
        [ 0.0850],
        [ 0.1518],
        [ 0.2902],
        [ 0.1247],
        [ 0.1367],
     

ep  3:  25%|███████████████████                                                         | 7/28 [00:18<00:55,  2.63s/it]

tensor([[-2.8655e-01, -2.7082e-01, -3.3381e-01,  ...,  1.1852e-01,
         -3.1277e-01,  1.0453e+04],
        [-3.7179e-01, -3.2242e-01, -2.9981e-01,  ...,  5.5976e-02,
         -3.6070e-01,  9.6054e+03],
        [-4.1816e-01, -2.1277e-01, -4.8117e-01,  ...,  1.7365e-01,
         -1.7194e-01,  1.1555e+04],
        ...,
        [-3.3167e-01, -2.5996e-01, -2.8100e-01,  ...,  3.4715e-02,
         -2.3055e-01,  1.3239e+04],
        [-2.9869e-01, -3.6268e-01, -2.3666e-01,  ...,  1.3313e-01,
         -2.3222e-01,  1.6425e+04],
        [-4.2820e-01, -2.3291e-01, -4.5784e-01,  ...,  4.0329e-01,
         -1.6441e-01,  1.6179e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1778],
        [ 0.0777],
        [ 0.1680],
        [ 0.0875],
        [ 0.1129],
        [ 0.2156],
        [ 0.1139],
        [ 0.1833],
        [ 0.1250],
        [ 0.2484],
        [ 0.0953],
        [ 0.0675],
        [ 0.0103],
        [ 0.1611],
        [ 0.0936],
        [ 0.0117],
        [ 0.2634],
     

ep  3:  29%|█████████████████████▋                                                      | 8/28 [00:21<00:52,  2.63s/it]

tensor([[-3.9468e-01, -2.6042e-01, -3.2978e-01,  ...,  2.7401e-01,
         -2.2071e-01,  1.5303e+04],
        [-3.3553e-01, -2.0631e-01, -3.5493e-01,  ...,  1.5216e-01,
         -2.7690e-01,  1.0855e+04],
        [-3.2072e-01, -2.7191e-01, -2.4150e-01,  ...,  1.1294e-01,
         -2.6563e-01,  1.5176e+04],
        ...,
        [-3.2184e-01, -2.4019e-01, -3.1942e-01,  ...,  2.3279e-01,
         -1.6719e-01,  1.3250e+04],
        [-3.5147e-01, -3.0185e-01, -3.0369e-01,  ...,  2.1647e-01,
         -1.7794e-01,  1.8039e+04],
        [-2.1095e-01, -3.0289e-01, -1.9653e-01,  ...,  1.5720e-01,
         -2.5741e-01,  1.7793e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1613],
        [ 0.1571],
        [ 0.1104],
        [ 0.1159],
        [ 0.2634],
        [ 0.0854],
        [ 0.0220],
        [ 0.1263],
        [ 0.0318],
        [ 0.2025],
        [ 0.1548],
        [ 0.1145],
        [ 0.1509],
        [ 0.0986],
        [ 0.0061],
        [ 0.0926],
        [ 0.2245],
     

ep  3:  32%|████████████████████████▍                                                   | 9/28 [00:23<00:49,  2.63s/it]

tensor([[-3.1734e-01, -2.8358e-01, -2.5236e-01,  ...,  7.4284e-02,
         -2.2192e-01,  1.2068e+04],
        [-3.4343e-01, -2.7254e-01, -1.8527e-01,  ...,  9.1151e-02,
         -2.5411e-01,  1.3242e+04],
        [-3.2707e-01, -2.7421e-01, -3.2653e-01,  ...,  1.9402e-01,
         -1.8292e-01,  1.7391e+04],
        ...,
        [-3.7303e-01, -2.6492e-01, -3.6812e-01,  ...,  2.5513e-01,
         -2.6033e-01,  1.1866e+04],
        [-4.0678e-01, -1.3138e-01, -3.9912e-01,  ...,  3.9144e-01,
         -1.3150e-01,  1.5783e+04],
        [-3.7901e-01, -2.5520e-01, -3.4872e-01,  ...,  1.3156e-01,
         -2.6338e-01,  1.2781e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1823],
        [ 0.0389],
        [ 0.1693],
        [ 0.0818],
        [ 0.1447],
        [ 0.2175],
        [ 0.2689],
        [ 0.0738],
        [ 0.1919],
        [ 0.1453],
        [ 0.1460],
        [ 0.2876],
        [ 0.0550],
        [ 0.2098],
        [ 0.1003],
        [ 0.1286],
        [ 0.0497],
     

ep  3:  36%|██████████████████████████▊                                                | 10/28 [00:26<00:47,  2.63s/it]

tensor([[-3.5548e-01, -1.1612e-01, -2.8163e-01,  ...,  2.1029e-01,
         -1.9561e-01,  1.7888e+04],
        [-3.2011e-01, -2.7569e-01, -2.6852e-01,  ...,  9.9736e-02,
         -1.6878e-01,  1.6168e+04],
        [-4.2770e-01, -2.6157e-01, -2.7998e-01,  ...,  2.1762e-01,
         -2.0398e-01,  1.6315e+04],
        ...,
        [-2.9050e-01, -2.2564e-01, -2.7755e-01,  ...,  1.6993e-01,
         -2.7911e-01,  1.5995e+04],
        [-3.8471e-01, -2.0178e-01, -2.3233e-01,  ...,  1.8675e-01,
         -1.6712e-01,  1.0520e+04],
        [-2.8922e-01, -2.9200e-01, -2.0554e-01,  ...,  2.0597e-01,
         -2.8366e-01,  1.7603e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0982],
        [ 0.0300],
        [ 0.0829],
        [ 0.1202],
        [ 0.1313],
        [ 0.0743],
        [ 0.1248],
        [ 0.0911],
        [ 0.1606],
        [ 0.1874],
        [-0.0153],
        [ 0.1774],
        [ 0.0916],
        [ 0.1720],
        [ 0.0968],
        [ 0.2294],
        [ 0.1697],
     

ep  3:  39%|█████████████████████████████▍                                             | 11/28 [00:28<00:44,  2.63s/it]

tensor([[-3.6563e-01, -1.7093e-01, -2.7095e-01,  ...,  1.2502e-01,
         -3.3816e-01,  1.1283e+04],
        [-3.6336e-01, -3.0126e-01, -2.9737e-01,  ...,  1.0575e-01,
         -2.4669e-01,  1.5327e+04],
        [-3.8438e-01, -1.8772e-02, -5.4449e-01,  ...,  3.9325e-01,
         -2.2451e-01,  1.1133e+04],
        ...,
        [-4.3128e-01, -3.3070e-01, -3.4736e-01,  ...,  1.4332e-01,
         -2.5336e-01,  1.2535e+04],
        [-3.6208e-01, -2.9434e-01, -2.5774e-01,  ...,  1.5918e-01,
         -2.1579e-01,  1.4254e+04],
        [-3.1310e-01, -2.6754e-01, -3.0379e-01,  ...,  2.1739e-01,
         -2.2750e-01,  1.6058e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.2646],
        [ 0.0686],
        [ 0.1166],
        [ 0.1479],
        [ 0.1176],
        [ 0.1443],
        [ 0.1731],
        [ 0.1684],
        [ 0.1812],
        [ 0.1773],
        [ 0.1749],
        [ 0.0890],
        [ 0.1928],
        [ 0.2174],
        [ 0.2366],
        [ 0.2458],
        [ 0.0866],
     

ep  3:  43%|████████████████████████████████▏                                          | 12/28 [00:31<00:41,  2.62s/it]

tensor([[-3.7470e-01, -1.6805e-01, -2.6818e-01,  ...,  1.3348e-01,
         -1.8499e-01,  1.7733e+04],
        [-3.6735e-01, -2.2063e-01, -1.9290e-01,  ...,  4.4271e-02,
         -2.6190e-01,  1.5041e+04],
        [-3.2783e-01, -3.4014e-01, -2.5058e-01,  ...,  1.4893e-01,
         -2.4239e-01,  1.5495e+04],
        ...,
        [-3.0647e-01, -3.1938e-01, -2.3066e-01,  ...,  1.6218e-01,
         -2.5533e-01,  1.6986e+04],
        [-3.7678e-01, -1.7745e-01, -2.9356e-01,  ...,  2.2438e-01,
         -2.7752e-01,  1.7074e+04],
        [-3.1489e-01, -2.5737e-01, -2.4265e-01,  ...,  2.0741e-01,
         -1.3880e-01,  1.7813e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1032],
        [ 0.1387],
        [-0.0199],
        [ 0.1389],
        [ 0.0707],
        [ 0.1708],
        [ 0.1642],
        [ 0.1033],
        [ 0.1591],
        [ 0.1215],
        [ 0.0146],
        [ 0.1006],
        [ 0.2792],
        [ 0.1035],
        [ 0.1637],
        [ 0.1487],
        [ 0.0501],
     

ep  3:  46%|██████████████████████████████████▊                                        | 13/28 [00:34<00:39,  2.62s/it]

tensor([[-3.8361e-01, -2.5815e-01, -2.3466e-01,  ...,  1.7808e-01,
         -2.0312e-01,  1.7549e+04],
        [-4.1233e-01, -2.9614e-01, -2.5443e-01,  ...,  1.1757e-01,
         -2.5338e-01,  1.5130e+04],
        [-3.5163e-01, -2.0402e-01, -5.4794e-01,  ...,  3.1770e-01,
         -1.2157e-01,  1.2811e+04],
        ...,
        [-3.3214e-01, -1.9921e-01, -2.3708e-01,  ...,  1.2502e-01,
         -2.4068e-01,  9.9959e+03],
        [-3.4923e-01, -2.3005e-01, -2.5359e-01,  ...,  1.2981e-01,
         -2.3391e-01,  1.2079e+04],
        [-3.4445e-01, -1.2365e-01, -3.7383e-01,  ...,  2.9008e-01,
         -1.9202e-01,  1.6995e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1066],
        [ 0.1494],
        [ 0.2081],
        [ 0.0350],
        [ 0.1508],
        [ 0.0130],
        [ 0.2360],
        [-0.0014],
        [ 0.1033],
        [ 0.3041],
        [ 0.0342],
        [ 0.0890],
        [ 0.1993],
        [ 0.2272],
        [ 0.0407],
        [ 0.1072],
        [ 0.2150],
     

ep  3:  50%|█████████████████████████████████████▌                                     | 14/28 [00:36<00:36,  2.62s/it]

tensor([[-3.4904e-01, -3.3305e-01, -1.8327e-01,  ...,  3.8793e-02,
         -2.5094e-01,  1.7824e+04],
        [-2.7569e-01, -2.9130e-01, -2.4420e-01,  ...,  8.2906e-02,
         -3.1943e-01,  1.3384e+04],
        [-3.8785e-01, -2.3561e-01, -2.6039e-01,  ...,  1.8710e-01,
         -2.2448e-01,  1.0044e+04],
        ...,
        [-2.9416e-01, -2.1175e-01, -2.5101e-01,  ...,  7.8270e-02,
         -3.3443e-01,  1.5739e+04],
        [-3.4435e-01, -2.3824e-01, -3.2206e-01,  ...,  1.8362e-01,
         -2.9249e-01,  1.0720e+04],
        [-4.0864e-01, -2.6857e-01, -2.1168e-01,  ...,  1.9768e-01,
         -1.9762e-01,  1.7805e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0316],
        [ 0.1024],
        [ 0.0829],
        [ 0.1203],
        [ 0.2127],
        [ 0.0354],
        [ 0.1665],
        [ 0.1670],
        [ 0.1074],
        [ 0.2139],
        [ 0.1003],
        [ 0.1500],
        [ 0.1246],
        [ 0.1746],
        [-0.0030],
        [ 0.1360],
        [ 0.0587],
     

ep  3:  54%|████████████████████████████████████████▏                                  | 15/28 [00:39<00:34,  2.62s/it]

tensor([[-3.3551e-01, -2.4465e-01, -2.1983e-01,  ...,  2.5731e-01,
         -2.4170e-01,  9.3106e+03],
        [-3.6175e-01, -2.3995e-01, -2.2939e-01,  ...,  1.5392e-01,
         -2.3079e-01,  1.2040e+04],
        [-2.8395e-01, -3.1726e-01, -2.6154e-01,  ...,  1.6826e-01,
         -2.9582e-01,  1.5452e+04],
        ...,
        [-3.8320e-01, -2.4706e-01, -2.9717e-01,  ...,  1.4029e-01,
         -1.6265e-01,  1.0153e+04],
        [-3.8834e-01, -2.6819e-01, -3.7788e-01,  ...,  1.7679e-01,
         -1.2899e-01,  9.2866e+03],
        [-3.4104e-01, -2.6856e-01, -2.7314e-01,  ...,  2.0348e-01,
         -1.6202e-01,  1.4776e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1767],
        [ 0.1314],
        [-0.0110],
        [ 0.1683],
        [ 0.1902],
        [ 0.2494],
        [ 0.1756],
        [ 0.1439],
        [ 0.1210],
        [ 0.1699],
        [ 0.1268],
        [ 0.0612],
        [ 0.1930],
        [ 0.0949],
        [ 0.1850],
        [ 0.1996],
        [ 0.0647],
     

ep  3:  57%|██████████████████████████████████████████▊                                | 16/28 [00:42<00:31,  2.62s/it]

tensor([[-4.2466e-01, -2.2249e-01, -1.8340e-01,  ...,  1.1842e-01,
         -1.6583e-01,  1.7801e+04],
        [-3.8444e-01, -2.4477e-01, -3.5112e-01,  ...,  2.2560e-01,
         -2.5464e-01,  1.4974e+04],
        [-3.3540e-01, -2.9936e-01, -2.3458e-01,  ...,  1.0538e-01,
         -2.0126e-01,  1.0002e+04],
        ...,
        [-3.7666e-01, -3.0000e-01, -3.2456e-01,  ...,  1.9312e-01,
         -1.5685e-01,  1.8030e+04],
        [-3.8351e-01, -2.9822e-01, -2.2279e-01,  ...,  8.8409e-02,
         -3.0001e-01,  1.5372e+04],
        [-3.5550e-01, -3.0887e-01, -3.3853e-01,  ...,  2.3050e-01,
         -2.2120e-01,  1.4719e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1593],
        [ 0.2600],
        [ 0.1689],
        [ 0.0964],
        [ 0.2122],
        [ 0.2210],
        [ 0.0621],
        [ 0.1052],
        [ 0.1408],
        [ 0.1855],
        [ 0.0181],
        [ 0.1634],
        [ 0.1166],
        [ 0.1289],
        [ 0.1747],
        [ 0.1322],
        [ 0.1013],
     

ep  3:  61%|█████████████████████████████████████████████▌                             | 17/28 [00:44<00:28,  2.61s/it]

tensor([[-4.5436e-01, -3.1093e-01, -2.7416e-01,  ...,  1.7868e-01,
         -1.6803e-01,  1.2437e+04],
        [-3.1869e-01, -3.1717e-01, -2.5144e-01,  ...,  1.8114e-01,
         -2.8837e-01,  1.6073e+04],
        [-3.7151e-01, -2.8559e-01, -2.4777e-01,  ...,  1.0498e-01,
         -2.7297e-01,  1.7603e+04],
        ...,
        [-3.5052e-01, -2.7821e-01, -1.9067e-01,  ...,  1.8624e-01,
         -2.1652e-01,  1.6254e+04],
        [-3.9720e-01, -2.9734e-01, -2.8515e-01,  ...,  8.4706e-02,
         -2.0492e-01,  1.4995e+04],
        [-4.1313e-01, -2.3705e-01, -3.7843e-01,  ...,  2.4157e-01,
         -2.1135e-01,  1.8120e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1910],
        [ 0.0812],
        [ 0.2014],
        [ 0.0653],
        [ 0.0511],
        [ 0.1002],
        [ 0.1104],
        [ 0.2293],
        [ 0.3437],
        [ 0.0742],
        [ 0.1313],
        [ 0.1236],
        [ 0.3063],
        [ 0.1356],
        [ 0.1094],
        [ 0.3773],
        [ 0.1004],
     

ep  3:  64%|████████████████████████████████████████████████▏                          | 18/28 [00:47<00:26,  2.61s/it]

tensor([[-3.9641e-01, -2.8995e-01, -2.9212e-01,  ...,  1.8115e-01,
         -2.3447e-01,  1.3046e+04],
        [-3.0116e-01, -2.7040e-01, -2.8907e-01,  ...,  1.7651e-01,
         -2.1443e-01,  1.7751e+04],
        [-4.2619e-01, -3.3081e-01, -2.1961e-01,  ...,  1.5029e-01,
         -2.1870e-01,  1.3126e+04],
        ...,
        [-3.5385e-01, -2.7218e-01, -2.6614e-01,  ...,  1.3347e-01,
         -3.0293e-01,  1.5401e+04],
        [-3.1607e-01, -2.6021e-01, -2.8483e-01,  ...,  1.5557e-01,
         -2.1025e-01,  1.1573e+04],
        [-3.7647e-01, -2.0906e-01, -2.0967e-01,  ...,  1.7943e-01,
         -2.2944e-01,  1.7159e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0648],
        [ 0.1075],
        [ 0.0079],
        [ 0.0761],
        [ 0.2216],
        [ 0.0986],
        [ 0.1535],
        [ 0.2344],
        [ 0.1693],
        [ 0.1974],
        [ 0.1117],
        [ 0.2158],
        [ 0.1962],
        [ 0.0825],
        [ 0.1260],
        [ 0.1076],
        [ 0.1869],
     

ep  3:  68%|██████████████████████████████████████████████████▉                        | 19/28 [00:49<00:23,  2.60s/it]

tensor([[-3.9376e-01, -2.3986e-01, -2.7641e-01,  ...,  1.9607e-01,
         -2.3825e-01,  1.6992e+04],
        [-2.7445e-01, -3.2888e-01, -2.4196e-01,  ...,  8.9530e-02,
         -2.4701e-01,  1.0444e+04],
        [-4.0524e-01, -2.1045e-01, -4.4603e-01,  ...,  2.0014e-01,
         -2.2614e-01,  9.3202e+03],
        ...,
        [-3.2349e-01, -2.3472e-01, -2.6363e-01,  ...,  1.1278e-01,
         -2.9590e-01,  1.2416e+04],
        [-3.4199e-01, -2.1212e-01, -2.3939e-01,  ...,  7.9091e-02,
         -2.7641e-01,  1.3252e+04],
        [-3.3572e-01, -2.3196e-01, -3.2597e-01,  ...,  2.2033e-01,
         -2.3701e-01,  1.1983e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.2609],
        [ 0.0890],
        [ 0.0355],
        [ 0.2238],
        [ 0.1300],
        [ 0.1892],
        [-0.0206],
        [ 0.1840],
        [ 0.1655],
        [ 0.1006],
        [ 0.1414],
        [ 0.0018],
        [ 0.1448],
        [ 0.1694],
        [ 0.2216],
        [ 0.0270],
        [ 0.0667],
     

ep  3:  71%|█████████████████████████████████████████████████████▌                     | 20/28 [00:52<00:20,  2.61s/it]

tensor([[-3.7990e-01, -3.1110e-01, -3.0691e-01,  ...,  1.5624e-01,
         -2.2072e-01,  1.6947e+04],
        [-3.6981e-01, -3.1122e-01, -2.8601e-01,  ...,  1.4610e-01,
         -1.9858e-01,  1.2128e+04],
        [-4.1985e-01, -2.8284e-01, -3.1662e-01,  ...,  6.6587e-02,
         -2.6195e-01,  1.3235e+04],
        ...,
        [-3.7009e-01, -2.9071e-01, -2.6052e-01,  ...,  9.4638e-02,
         -2.7129e-01,  1.1824e+04],
        [-2.5837e-01, -2.1285e-01, -2.4334e-01,  ...,  1.6058e-01,
         -2.3043e-01,  1.0448e+04],
        [-3.1369e-01, -2.8385e-01, -2.6461e-01,  ...,  4.5594e-02,
         -2.5789e-01,  1.2966e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1544],
        [ 0.2473],
        [ 0.0896],
        [ 0.1433],
        [ 0.1723],
        [ 0.2004],
        [ 0.1595],
        [ 0.1132],
        [ 0.0617],
        [ 0.0896],
        [ 0.0979],
        [ 0.2713],
        [ 0.2101],
        [ 0.3070],
        [ 0.2284],
        [ 0.0852],
        [ 0.0690],
     

ep  3:  75%|████████████████████████████████████████████████████████▎                  | 21/28 [00:55<00:18,  2.61s/it]

tensor([[-3.9365e-01, -3.0964e-01, -3.2914e-01,  ...,  1.0702e-01,
         -1.7893e-01,  1.4662e+04],
        [-3.3582e-01, -2.4971e-01, -2.5818e-01,  ...,  1.3553e-01,
         -3.0568e-01,  1.4819e+04],
        [-4.3549e-01, -3.0192e-01, -2.2939e-01,  ...,  1.5671e-01,
         -2.4108e-01,  1.8144e+04],
        ...,
        [-3.0301e-01, -1.8538e-01, -2.1006e-01,  ...,  1.7466e-01,
         -2.6990e-01,  1.7926e+04],
        [-3.5297e-01, -2.9420e-01, -2.1998e-01,  ...,  5.2890e-02,
         -2.7862e-01,  1.6805e+04],
        [-3.8950e-01, -2.5194e-01, -2.8755e-01,  ...,  5.8617e-02,
         -2.5159e-01,  1.3091e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0519],
        [ 0.1082],
        [ 0.1761],
        [ 0.2232],
        [ 0.2137],
        [ 0.0677],
        [-0.0265],
        [ 0.0745],
        [ 0.0497],
        [ 0.1879],
        [-0.0438],
        [ 0.1039],
        [ 0.0585],
        [ 0.0109],
        [ 0.0821],
        [ 0.1563],
        [ 0.1788],
     

ep  3:  79%|██████████████████████████████████████████████████████████▉                | 22/28 [00:57<00:15,  2.62s/it]

tensor([[-3.3926e-01, -1.1478e-01, -4.1778e-01,  ...,  3.7720e-01,
         -1.6239e-01,  1.7685e+04],
        [-3.0048e-01, -2.7144e-01, -2.9141e-01,  ...,  1.8447e-01,
         -3.1589e-01,  1.7217e+04],
        [-3.1799e-01, -2.7334e-01, -2.7373e-01,  ...,  1.3691e-01,
         -3.2885e-01,  1.6551e+04],
        ...,
        [-2.4471e-01, -3.3220e-01, -2.6703e-01,  ...,  1.2799e-01,
         -1.6549e-01,  1.6170e+04],
        [-3.4776e-01, -2.3041e-01, -2.8570e-01,  ...,  2.2647e-01,
         -2.3475e-01,  1.4936e+04],
        [-3.4580e-01, -2.6731e-01, -2.6024e-01,  ...,  1.3357e-01,
         -2.7728e-01,  1.3214e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0851],
        [ 0.2089],
        [ 0.1333],
        [ 0.0854],
        [ 0.0863],
        [ 0.1156],
        [ 0.0916],
        [ 0.0977],
        [ 0.2107],
        [ 0.0872],
        [ 0.1172],
        [-0.0060],
        [ 0.0739],
        [ 0.1730],
        [ 0.1300],
        [ 0.0419],
        [ 0.0769],
     

ep  3:  82%|█████████████████████████████████████████████████████████████▌             | 23/28 [01:00<00:13,  2.61s/it]

tensor([[-3.4040e-01, -1.7519e-01, -3.0696e-01,  ...,  9.3887e-02,
         -2.8799e-01,  1.2303e+04],
        [-3.5866e-01, -2.3821e-01, -2.5212e-01,  ...,  1.0440e-01,
         -2.8745e-01,  1.1370e+04],
        [-3.7273e-01, -2.2576e-01, -3.1687e-01,  ...,  2.5766e-01,
         -1.9006e-01,  1.7068e+04],
        ...,
        [-4.0505e-01, -2.6577e-01, -2.7486e-01,  ...,  1.9824e-01,
         -2.6278e-01,  1.4932e+04],
        [-3.5802e-01, -2.8762e-01, -2.4838e-01,  ...,  1.4790e-01,
         -2.1459e-01,  1.2471e+04],
        [-2.8543e-01, -2.4393e-01, -3.3339e-01,  ...,  2.3348e-01,
         -2.0390e-01,  1.7581e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0682],
        [ 0.1518],
        [ 0.0734],
        [ 0.0800],
        [ 0.1100],
        [ 0.1829],
        [ 0.0875],
        [ 0.0906],
        [ 0.0041],
        [-0.0170],
        [ 0.2476],
        [ 0.1216],
        [ 0.1952],
        [-0.0418],
        [-0.0651],
        [ 0.2524],
        [ 0.2424],
     

ep  3:  86%|████████████████████████████████████████████████████████████████▎          | 24/28 [01:02<00:10,  2.61s/it]

tensor([[-3.3595e-01, -2.8704e-01, -2.4443e-01,  ...,  1.1174e-01,
         -2.9088e-01,  1.6142e+04],
        [-2.9049e-01, -1.5832e-01, -2.3804e-01,  ...,  1.4284e-01,
         -2.1840e-01,  1.6836e+04],
        [-3.5502e-01, -2.5876e-01, -2.1649e-01,  ...,  9.0758e-02,
         -2.0800e-01,  1.2020e+04],
        ...,
        [-3.9111e-01, -2.7042e-01, -3.1246e-01,  ...,  1.3853e-01,
         -2.5672e-01,  1.3026e+04],
        [-3.4904e-01, -1.8638e-01, -2.3458e-01,  ...,  1.9876e-01,
         -2.2575e-01,  1.6154e+04],
        [-3.7394e-01, -3.0783e-01, -2.3767e-01,  ...,  1.1166e-01,
         -1.8382e-01,  1.2759e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.2240],
        [ 0.1994],
        [ 0.1511],
        [ 0.0971],
        [ 0.2392],
        [ 0.1322],
        [ 0.1785],
        [ 0.1212],
        [ 0.1370],
        [ 0.1432],
        [ 0.0623],
        [ 0.2889],
        [ 0.1719],
        [ 0.1742],
        [ 0.0728],
        [ 0.1155],
        [ 0.2581],
     

ep  3:  89%|██████████████████████████████████████████████████████████████████▉        | 25/28 [01:05<00:07,  2.62s/it]

tensor([[-4.2946e-01, -3.0170e-01, -3.3206e-01,  ...,  1.1957e-01,
         -2.3761e-01,  1.1509e+04],
        [-2.6601e-01, -2.0072e-01, -4.0303e-01,  ...,  1.9598e-01,
         -2.4103e-01,  8.7390e+03],
        [-2.9820e-01, -2.4000e-01, -2.5479e-01,  ...,  1.0228e-01,
         -1.8715e-01,  1.0012e+04],
        ...,
        [-4.2517e-01, -2.4234e-01, -3.0206e-01,  ...,  1.3376e-01,
         -2.1869e-01,  1.6516e+04],
        [-3.9821e-01, -2.3205e-01, -2.9142e-01,  ...,  1.5012e-01,
         -2.0808e-01,  1.6744e+04],
        [-3.7930e-01, -2.3811e-01, -3.1529e-01,  ...,  1.9516e-01,
         -2.0264e-01,  1.0060e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[0.1133],
        [0.1502],
        [0.2007],
        [0.1097],
        [0.1734],
        [0.2382],
        [0.3145],
        [0.0758],
        [0.1448],
        [0.1791],
        [0.1527],
        [0.2234],
        [0.0614],
        [0.0974],
        [0.2837],
        [0.2804],
        [0.0938],
        [0.1443],
    

ep  3:  93%|█████████████████████████████████████████████████████████████████████▋     | 26/28 [01:08<00:05,  2.62s/it]

tensor([[-3.3716e-01, -2.2385e-01, -2.0406e-01,  ...,  1.3599e-01,
         -2.5382e-01,  1.0626e+04],
        [-4.3154e-01, -2.2596e-01, -2.4224e-01,  ...,  1.7318e-01,
         -1.9913e-01,  1.1823e+04],
        [-3.6198e-01, -2.9902e-01, -2.0722e-01,  ...,  1.2753e-01,
         -2.8123e-01,  1.2414e+04],
        ...,
        [-4.0413e-01, -1.7112e-01, -2.8010e-01,  ...,  1.1605e-01,
         -2.4876e-01,  1.2593e+04],
        [-4.3183e-01, -2.7388e-01, -2.7309e-01,  ...,  1.0821e-01,
         -2.5013e-01,  9.0707e+03],
        [-3.8545e-01, -2.1755e-01, -2.5876e-01,  ...,  1.9068e-01,
         -2.4844e-01,  1.1347e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1119],
        [ 0.0973],
        [ 0.3021],
        [ 0.0779],
        [ 0.2426],
        [ 0.2004],
        [ 0.0471],
        [ 0.1485],
        [ 0.1648],
        [ 0.1876],
        [ 0.1254],
        [ 0.1247],
        [ 0.1574],
        [ 0.1185],
        [ 0.0809],
        [-0.0681],
        [ 0.0889],
     

ep  3:  96%|████████████████████████████████████████████████████████████████████████▎  | 27/28 [01:10<00:02,  2.65s/it]

tensor([[-3.3687e-01, -2.7016e-01, -2.4285e-01,  ...,  2.1022e-01,
         -2.3223e-01,  1.4512e+04],
        [-3.1228e-01, -1.8379e-01, -4.0735e-01,  ...,  2.5088e-01,
         -2.0779e-01,  1.2190e+04],
        [-4.1747e-01, -1.7076e-01, -4.3206e-01,  ...,  2.9142e-01,
         -1.5839e-01,  1.1006e+04],
        ...,
        [-3.1131e-01, -3.7291e-01, -2.7728e-01,  ...,  5.6001e-02,
         -2.3126e-01,  1.2227e+04],
        [-4.0903e-01, -2.6816e-01, -3.0147e-01,  ...,  1.8071e-01,
         -2.1686e-01,  1.0154e+04],
        [-2.9526e-01, -2.9588e-01, -2.4599e-01,  ...,  7.9702e-02,
         -2.6027e-01,  1.3971e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1104],
        [ 0.1449],
        [ 0.1043],
        [ 0.1779],
        [ 0.1918],
        [ 0.0835],
        [ 0.1333],
        [ 0.0994],
        [ 0.0685],
        [ 0.2368],
        [ 0.0558],
        [ 0.0807],
        [ 0.1283],
        [ 0.1087],
        [ 0.0720],
        [ 0.1197],
        [ 0.1842],
     

ep  3: 100%|███████████████████████████████████████████████████████████████████████████| 28/28 [01:13<00:00,  2.62s/it]
valid:   0%|                                                                                     | 0/4 [00:00<?, ?it/s]

tensor([[-3.0139e-01, -2.4865e-01, -2.3791e-01,  ...,  8.2510e-02,
         -2.7901e-01,  1.6049e+04],
        [-3.6052e-01, -2.7077e-01, -2.7057e-01,  ...,  1.4180e-01,
         -1.9552e-01,  1.1433e+04],
        [-3.0951e-01, -2.3068e-01, -2.1789e-01,  ...,  1.1528e-01,
         -3.1005e-01,  1.5974e+04],
        ...,
        [-4.1398e-01, -1.9822e-01, -2.6480e-01,  ...,  6.3111e-02,
         -1.9066e-01,  1.0428e+04],
        [-3.0276e-01, -3.1938e-01, -2.4883e-01,  ...,  9.7528e-02,
         -2.4566e-01,  1.3495e+04],
        [-3.2494e-01, -2.6900e-01, -2.2515e-01,  ...,  4.4001e-02,
         -3.1323e-01,  1.3979e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[0.2991],
        [0.1550],
        [0.1175],
        [0.0644],
        [0.0526],
        [0.1846],
        [0.0198],
        [0.0986],
        [0.1198],
        [0.2403],
        [0.0088],
        [0.1658],
        [0.1576],
        [0.2539],
        [0.1042],
        [0.2365],
        [0.1168],
        [0.3057],
    

valid:  25%|███████████████████▎                                                         | 1/4 [00:02<00:07,  2.61s/it]

tensor([[-3.2233e-01, -2.8638e-01, -2.5438e-01,  ...,  1.1127e-01,
         -2.4928e-01,  8.2773e+03],
        [-3.6531e-01, -1.8541e-01, -3.2476e-01,  ...,  9.8329e-02,
         -2.3989e-01,  8.2921e+03],
        [-3.7271e-01, -3.1051e-01, -2.6006e-01,  ...,  1.0789e-01,
         -1.8490e-01,  8.4220e+03],
        ...,
        [-3.7780e-01, -2.1708e-01, -3.6814e-01,  ...,  2.7568e-01,
         -1.1368e-01,  7.2709e+03],
        [-3.9939e-01, -2.8329e-01, -2.7683e-01,  ...,  1.3335e-01,
         -2.4340e-01,  7.3509e+03],
        [-3.1813e-01, -1.7252e-01, -3.3864e-01,  ...,  1.8896e-01,
         -2.7303e-01,  7.1148e+03]], device='cuda:0')
tensor([[0.1233],
        [0.1321],
        [0.1335],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [

valid:  50%|██████████████████████████████████████▌                                      | 2/4 [00:05<00:05,  2.62s/it]

tensor([[-2.6459e-01, -1.9816e-01, -2.3166e-01,  ...,  4.3366e-02,
         -2.4275e-01,  7.3657e+03],
        [-4.4351e-01, -1.6997e-01, -2.4599e-01,  ...,  5.0830e-02,
         -2.2045e-01,  7.4660e+03],
        [-3.6358e-01, -2.7964e-01, -2.1717e-01,  ...,  1.4754e-01,
         -2.0238e-01,  7.5556e+03],
        ...,
        [-3.0289e-01, -2.8608e-01, -3.0153e-01,  ...,  9.7022e-02,
         -2.0641e-01,  7.5523e+03],
        [-3.2801e-01, -2.4983e-01, -2.3538e-01,  ...,  1.0617e-01,
         -3.3416e-01,  7.9973e+03],
        [-3.5315e-01, -2.7097e-01, -2.6696e-01,  ...,  1.3142e-01,
         -2.4509e-01,  8.4248e+03]], device='cuda:0')
tensor([[0.1233],
        [0.1321],
        [0.1335],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [

valid:  75%|█████████████████████████████████████████████████████████▊                   | 3/4 [00:07<00:02,  2.62s/it]

tensor([[-4.3092e-01, -2.7025e-01, -3.0428e-01,  ...,  1.7351e-01,
         -2.1021e-01,  8.2736e+03],
        [-3.9687e-01, -1.6447e-01, -3.1085e-01,  ...,  2.0987e-01,
         -1.9773e-01,  8.4973e+03],
        [-4.0648e-01, -2.3350e-01, -3.5499e-01,  ...,  1.4228e-01,
         -2.3323e-01,  8.8352e+03],
        ...,
        [-2.5792e-01, -2.7998e-01, -2.9789e-01,  ...,  6.9041e-02,
         -1.5714e-01,  1.1430e+04],
        [-3.6105e-01, -3.1575e-01, -2.2100e-01,  ...,  1.0072e-01,
         -2.3743e-01,  1.1417e+04],
        [-3.4809e-01, -2.3759e-01, -2.0738e-01,  ...,  1.4380e-01,
         -2.2184e-01,  1.1349e+04]], device='cuda:0')
tensor([[0.1233],
        [0.1321],
        [0.1335],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337],
        [

valid: 100%|█████████████████████████████████████████████████████████████████████████████| 4/4 [00:08<00:00,  2.03s/it]
ep  4:   0%|                                                                                    | 0/28 [00:00<?, ?it/s]

tensor([[-3.0299e-01, -2.0548e-01, -3.7163e-01,  ...,  2.2204e-01,
         -2.4157e-01,  1.1479e+04],
        [-5.2351e-01, -1.3666e-01, -4.0322e-01,  ...,  1.6678e-01,
         -2.3607e-01,  1.1660e+04],
        [-2.8602e-01, -2.1112e-01, -2.7816e-01,  ...,  2.0929e-01,
         -2.4698e-01,  1.1616e+04],
        ...,
        [-4.1360e-01, -3.0982e-01, -2.5829e-01,  ...,  8.7611e-02,
         -2.1257e-01,  1.1642e+04],
        [-4.2525e-01, -2.9543e-01, -3.0947e-01,  ...,  7.3900e-02,
         -2.2249e-01,  1.1782e+04],
        [-2.6573e-01, -2.9025e-01, -3.4377e-01,  ...,  1.3080e-01,
         -1.8334e-01,  1.1734e+04]], device='cuda:0')
tensor([[0.1233],
        [0.1321],
        [0.1335],
        [0.1337],
        [0.1337],
        [0.1337],
        [0.1337]], device='cuda:0')
valid acc 0.5376884422110553


ep  4:   4%|██▋                                                                         | 1/28 [00:02<01:09,  2.58s/it]

tensor([[-4.0422e-01, -3.2982e-01, -2.8745e-01,  ...,  2.5830e-02,
         -1.6451e-01,  1.4606e+04],
        [-2.9133e-01, -2.7430e-01, -3.1104e-01,  ...,  2.0555e-01,
         -2.2196e-01,  1.6543e+04],
        [-4.0299e-01, -2.6450e-01, -2.6293e-01,  ...,  1.4164e-01,
         -2.2114e-01,  1.1187e+04],
        ...,
        [-2.7272e-01, -1.8165e-01, -2.5444e-01,  ...,  1.5176e-01,
         -3.0181e-01,  1.0968e+04],
        [-3.7342e-01, -2.7687e-01, -2.6942e-01,  ...,  1.2889e-01,
         -2.3656e-01,  1.4802e+04],
        [-4.4161e-01, -2.9677e-01, -2.6457e-01,  ...,  1.7659e-01,
         -2.4516e-01,  1.5468e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0703],
        [ 0.1531],
        [-0.0313],
        [ 0.1426],
        [ 0.1144],
        [ 0.2028],
        [ 0.1172],
        [ 0.1403],
        [ 0.1948],
        [ 0.0547],
        [ 0.0718],
        [ 0.0579],
        [ 0.1361],
        [ 0.2465],
        [ 0.2023],
        [ 0.2606],
        [ 0.0211],
     

ep  4:   7%|█████▍                                                                      | 2/28 [00:05<01:07,  2.59s/it]

tensor([[-3.5904e-01, -2.4665e-01, -3.3036e-01,  ...,  1.0696e-01,
         -2.1271e-01,  1.2067e+04],
        [-4.1078e-01, -2.2713e-01, -2.5976e-01,  ...,  1.4149e-01,
         -1.5978e-01,  1.3272e+04],
        [-3.7063e-01, -2.3328e-01, -2.2482e-01,  ...,  2.1673e-01,
         -2.3106e-01,  1.7807e+04],
        ...,
        [-4.3832e-01, -1.5977e-01, -3.1844e-01,  ...,  2.1437e-01,
         -2.4763e-01,  1.2442e+04],
        [-3.7772e-01, -3.1635e-01, -2.3945e-01,  ...,  9.7310e-02,
         -2.6783e-01,  1.1723e+04],
        [-3.4616e-01, -2.2965e-01, -2.3060e-01,  ...,  1.9353e-01,
         -2.1995e-01,  1.5915e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0939],
        [ 0.1337],
        [-0.0188],
        [ 0.1464],
        [ 0.0520],
        [ 0.1968],
        [ 0.0703],
        [ 0.0553],
        [ 0.1516],
        [ 0.1422],
        [ 0.1966],
        [ 0.0166],
        [ 0.1896],
        [ 0.0896],
        [ 0.1145],
        [ 0.1274],
        [ 0.0191],
     

ep  4:  11%|████████▏                                                                   | 3/28 [00:07<01:04,  2.60s/it]

tensor([[-3.9376e-01, -2.3986e-01, -2.7641e-01,  ...,  1.9607e-01,
         -2.3825e-01,  1.6992e+04],
        [-3.0575e-01, -3.1580e-01, -2.9379e-01,  ...,  1.1958e-01,
         -2.7292e-01,  1.4421e+04],
        [-3.9657e-01, -1.6828e-01, -3.1565e-01,  ...,  6.4122e-02,
         -3.0275e-01,  1.0603e+04],
        ...,
        [-4.2721e-01, -3.0027e-01, -2.6535e-01,  ...,  1.5453e-01,
         -1.8394e-01,  1.0318e+04],
        [-3.5469e-01, -2.3961e-01, -2.6863e-01,  ...,  1.0798e-01,
         -2.3702e-01,  1.3200e+04],
        [-3.8409e-01, -2.3872e-01, -3.0405e-01,  ...,  9.3757e-02,
         -1.9970e-01,  1.8016e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1897],
        [ 0.0015],
        [ 0.2631],
        [ 0.1029],
        [ 0.1588],
        [ 0.2232],
        [ 0.2771],
        [ 0.1072],
        [ 0.1111],
        [ 0.1598],
        [ 0.2302],
        [-0.0043],
        [ 0.0710],
        [ 0.0722],
        [-0.0403],
        [ 0.0936],
        [ 0.1176],
     

ep  4:  14%|██████████▊                                                                 | 4/28 [00:10<01:02,  2.62s/it]

tensor([[-4.1322e-01, -3.5436e-01, -2.8108e-01,  ...,  5.4047e-02,
         -2.5233e-01,  1.3579e+04],
        [-4.5860e-01, -1.1931e-01, -4.3069e-01,  ...,  2.8575e-01,
         -2.0611e-01,  1.3191e+04],
        [-4.0230e-01, -2.8133e-01, -3.1633e-01,  ...,  1.9099e-01,
         -2.0879e-01,  1.3584e+04],
        ...,
        [-3.1923e-01, -1.6966e-01, -2.7427e-01,  ...,  2.1747e-01,
         -1.9646e-01,  9.7436e+03],
        [-3.1203e-01, -1.7950e-01, -2.1350e-01,  ...,  1.7090e-01,
         -2.4303e-01,  1.0211e+04],
        [-3.4652e-01, -2.9723e-01, -2.8267e-01,  ...,  2.6150e-01,
         -1.8239e-01,  1.7069e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.2202],
        [ 0.1781],
        [ 0.0244],
        [ 0.1652],
        [ 0.0851],
        [ 0.0116],
        [ 0.1988],
        [ 0.1472],
        [ 0.1642],
        [ 0.0782],
        [ 0.2055],
        [ 0.1189],
        [ 0.1466],
        [ 0.1100],
        [ 0.2630],
        [ 0.0533],
        [ 0.2818],
     

ep  4:  18%|█████████████▌                                                              | 5/28 [00:13<01:00,  2.62s/it]

tensor([[-3.9295e-01, -2.8495e-01, -2.2426e-01,  ...,  1.1241e-01,
         -2.7065e-01,  1.8113e+04],
        [-3.6118e-01, -3.4549e-01, -2.5838e-01,  ...,  4.7314e-02,
         -3.3234e-01,  1.1143e+04],
        [-4.2085e-01, -3.0320e-01, -2.7861e-01,  ...,  1.6676e-01,
         -1.4688e-01,  1.5471e+04],
        ...,
        [-3.2666e-01, -2.7645e-01, -3.6782e-01,  ...,  1.9493e-01,
         -2.0466e-01,  1.4898e+04],
        [-3.8531e-01, -3.0977e-01, -2.6267e-01,  ...,  1.3327e-01,
         -2.5768e-01,  1.3565e+04],
        [-3.6585e-01, -2.7683e-01, -3.1729e-01,  ...,  1.2660e-01,
         -1.8372e-01,  1.0675e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[0.1321],
        [0.2157],
        [0.0641],
        [0.1810],
        [0.2570],
        [0.2475],
        [0.2603],
        [0.0550],
        [0.1860],
        [0.1797],
        [0.1572],
        [0.0139],
        [0.1248],
        [0.0657],
        [0.0550],
        [0.2441],
        [0.1442],
        [0.2107],
    

ep  4:  21%|████████████████▎                                                           | 6/28 [00:15<00:57,  2.63s/it]

tensor([[-2.7609e-01, -2.3489e-01, -2.2958e-01,  ...,  5.4118e-02,
         -2.4483e-01,  8.7631e+03],
        [-2.8787e-01, -2.3555e-01, -1.4645e-01,  ...,  1.0126e-01,
         -2.0076e-01,  1.8024e+04],
        [-4.3645e-01, -1.8698e-01, -4.6236e-01,  ...,  2.8848e-01,
         -2.0286e-01,  1.3146e+04],
        ...,
        [-3.3856e-01, -3.5045e-01, -2.0582e-01,  ...,  1.5379e-01,
         -2.1888e-01,  1.6445e+04],
        [-2.7333e-01, -1.8306e-01, -2.4039e-01,  ...,  9.4023e-02,
         -2.3144e-01,  1.7938e+04],
        [-4.4514e-01, -2.1580e-01, -3.0095e-01,  ...,  2.0017e-01,
         -2.4058e-01,  1.0442e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1015],
        [ 0.3014],
        [ 0.1372],
        [ 0.1694],
        [ 0.1392],
        [ 0.1789],
        [ 0.2178],
        [ 0.0388],
        [ 0.1352],
        [ 0.1473],
        [ 0.2046],
        [ 0.0135],
        [ 0.2082],
        [ 0.1129],
        [ 0.3279],
        [ 0.1415],
        [ 0.1275],
     

ep  4:  25%|███████████████████                                                         | 7/28 [00:18<00:55,  2.64s/it]

tensor([[-3.8813e-01, -2.8072e-01, -2.4111e-01,  ...,  9.0654e-02,
         -2.8414e-01,  1.2096e+04],
        [-4.0746e-01, -2.7043e-01, -2.2685e-01,  ...,  1.8103e-01,
         -2.8655e-01,  1.7758e+04],
        [-3.5278e-01, -3.4140e-01, -2.8865e-01,  ...,  1.1771e-01,
         -1.9806e-01,  1.2454e+04],
        ...,
        [-3.6707e-01, -3.1097e-01, -1.8785e-01,  ...,  1.8149e-01,
         -3.1150e-01,  1.7949e+04],
        [-4.1036e-01, -3.2919e-01, -2.0308e-01,  ...,  1.8837e-01,
         -2.3108e-01,  1.7719e+04],
        [-3.3997e-01, -2.7893e-01, -2.0655e-01,  ...,  2.8164e-02,
         -2.0303e-01,  1.3198e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0815],
        [ 0.2481],
        [ 0.1007],
        [ 0.1241],
        [ 0.1373],
        [ 0.1561],
        [ 0.1304],
        [ 0.0175],
        [ 0.0830],
        [ 0.0867],
        [ 0.1243],
        [ 0.0694],
        [ 0.0962],
        [ 0.1317],
        [ 0.1523],
        [ 0.0726],
        [ 0.1099],
     

ep  4:  29%|█████████████████████▋                                                      | 8/28 [00:21<00:52,  2.64s/it]

tensor([[-2.9034e-01, -2.1593e-01, -2.9349e-01,  ...,  3.5208e-02,
         -1.9758e-01,  1.1985e+04],
        [-2.8789e-01, -2.8614e-01, -2.8721e-01,  ...,  1.9523e-01,
         -2.4019e-01,  1.7829e+04],
        [-3.7041e-01, -3.0064e-01, -2.6173e-01,  ...,  1.9359e-01,
         -1.6904e-01,  1.8038e+04],
        ...,
        [-3.0067e-01, -2.1020e-01, -1.9355e-01,  ...,  1.6668e-01,
         -2.6090e-01,  1.7007e+04],
        [-3.0309e-01, -2.5830e-01, -2.0299e-01,  ...,  1.0917e-01,
         -2.5172e-01,  1.4567e+04],
        [-4.7838e-01, -1.6113e-01, -2.6515e-01,  ...,  1.1456e-01,
         -2.1854e-01,  1.0363e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 1.1677e-01],
        [ 8.9252e-02],
        [ 7.8953e-02],
        [ 1.2835e-01],
        [ 1.4527e-01],
        [ 3.2290e-02],
        [ 1.9591e-01],
        [ 8.5680e-02],
        [ 1.3878e-01],
        [ 1.1371e-02],
        [ 1.2136e-01],
        [ 1.3981e-01],
        [ 2.5889e-01],
        [ 1.2110e-01],
      

ep  4:  32%|████████████████████████▍                                                   | 9/28 [00:23<00:49,  2.63s/it]

tensor([[-4.0157e-01, -1.9299e-01, -3.1764e-01,  ...,  1.1570e-01,
         -3.2807e-01,  1.6273e+04],
        [-3.4401e-01, -3.1200e-01, -3.0935e-01,  ...,  4.4304e-02,
         -2.0337e-01,  1.0636e+04],
        [-3.4542e-01, -3.0519e-01, -2.9068e-01,  ...,  1.9199e-01,
         -1.7531e-01,  1.6502e+04],
        ...,
        [-2.4735e-01, -2.8713e-01, -2.2819e-01,  ...,  7.2272e-02,
         -2.8225e-01,  1.1321e+04],
        [-3.0642e-01, -2.8676e-01, -1.6226e-01,  ...,  1.1029e-01,
         -2.0571e-01,  1.5126e+04],
        [-4.1505e-01, -3.0561e-01, -3.0559e-01,  ...,  1.7189e-01,
         -2.8727e-01,  1.1578e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1148],
        [ 0.2083],
        [ 0.2271],
        [ 0.1995],
        [ 0.1173],
        [-0.0869],
        [ 0.1462],
        [ 0.1249],
        [ 0.1623],
        [ 0.1561],
        [ 0.2371],
        [ 0.1086],
        [ 0.1269],
        [ 0.1792],
        [ 0.2038],
        [ 0.1515],
        [ 0.1744],
     

ep  4:  36%|██████████████████████████▊                                                | 10/28 [00:26<00:47,  2.63s/it]

tensor([[-3.1607e-01, -2.6021e-01, -2.8483e-01,  ...,  1.5557e-01,
         -2.1025e-01,  1.1573e+04],
        [-3.5430e-01, -1.9027e-01, -4.3700e-01,  ...,  1.5915e-01,
         -2.2503e-01,  1.1204e+04],
        [-4.1912e-01, -2.8263e-01, -3.4490e-01,  ...,  1.7267e-01,
         -2.3944e-01,  1.6027e+04],
        ...,
        [-3.5827e-01, -2.2541e-01, -2.9891e-01,  ...,  9.3634e-03,
         -2.6574e-01,  1.2632e+04],
        [-2.6010e-01, -1.4958e-01, -3.7532e-01,  ...,  3.6609e-01,
         -1.7422e-01,  1.7624e+04],
        [-3.5528e-01, -3.0814e-01, -2.9495e-01,  ...,  1.6698e-01,
         -1.9463e-01,  1.7000e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0761],
        [ 0.1821],
        [ 0.0507],
        [ 0.2087],
        [ 0.1866],
        [ 0.0994],
        [ 0.2213],
        [ 0.1196],
        [ 0.1579],
        [ 0.0808],
        [ 0.0456],
        [ 0.2095],
        [ 0.1322],
        [ 0.1270],
        [ 0.1165],
        [ 0.1280],
        [ 0.0812],
     

ep  4:  39%|█████████████████████████████▍                                             | 11/28 [00:28<00:44,  2.62s/it]

tensor([[-4.3018e-01, -3.2038e-01, -2.4274e-01,  ...,  1.1559e-01,
         -2.4719e-01,  9.8822e+03],
        [-3.0209e-01, -4.7763e-02, -5.0921e-01,  ...,  4.0191e-01,
         -2.1227e-01,  1.3116e+04],
        [-3.0116e-01, -2.7040e-01, -2.8907e-01,  ...,  1.7651e-01,
         -2.1443e-01,  1.7751e+04],
        ...,
        [-2.8993e-01, -3.0053e-01, -3.3836e-01,  ...,  1.6820e-01,
         -2.7963e-01,  1.5464e+04],
        [-3.5999e-01, -3.0043e-01, -3.2091e-01,  ...,  1.7434e-01,
         -2.1616e-01,  1.6531e+04],
        [-3.3687e-01, -2.7016e-01, -2.4285e-01,  ...,  2.1022e-01,
         -2.3223e-01,  1.4512e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0671],
        [ 0.1281],
        [ 0.1137],
        [ 0.1608],
        [ 0.1165],
        [ 0.0018],
        [ 0.1615],
        [ 0.2255],
        [ 0.1997],
        [ 0.1139],
        [ 0.0796],
        [ 0.0866],
        [ 0.0466],
        [ 0.0911],
        [ 0.1403],
        [ 0.0470],
        [ 0.0951],
     

ep  4:  43%|████████████████████████████████▏                                          | 12/28 [00:31<00:41,  2.61s/it]

tensor([[-3.4031e-01, -2.2289e-01, -2.6927e-01,  ...,  1.6278e-01,
         -2.5149e-01,  1.1771e+04],
        [-4.0792e-01, -3.1271e-01, -2.2086e-01,  ...,  1.0422e-01,
         -2.1584e-01,  1.6331e+04],
        [-3.5729e-01, -2.8481e-01, -2.4780e-01,  ...,  1.0326e-01,
         -1.7227e-01,  1.2342e+04],
        ...,
        [-3.9659e-01, -2.7956e-01, -2.8267e-01,  ...,  1.4917e-01,
         -1.6351e-01,  1.7043e+04],
        [-4.0903e-01, -1.9494e-01, -2.6544e-01,  ...,  1.8569e-01,
         -2.1425e-01,  1.7746e+04],
        [-3.7273e-01, -2.2576e-01, -3.1687e-01,  ...,  2.5766e-01,
         -1.9006e-01,  1.7068e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1196],
        [-0.0335],
        [ 0.1370],
        [ 0.0729],
        [-0.0016],
        [ 0.0775],
        [ 0.1961],
        [ 0.0730],
        [ 0.0741],
        [ 0.0991],
        [ 0.0738],
        [ 0.3278],
        [ 0.1927],
        [ 0.1262],
        [ 0.2116],
        [ 0.1568],
        [ 0.1313],
     

ep  4:  46%|██████████████████████████████████▊                                        | 13/28 [00:34<00:39,  2.62s/it]

tensor([[-3.9689e-01, -3.3483e-01, -2.5859e-01,  ...,  1.4981e-01,
         -2.7157e-01,  1.3205e+04],
        [-2.8426e-01, -2.4660e-01, -2.8498e-01,  ...,  1.2266e-01,
         -1.9426e-01,  8.8482e+03],
        [-3.3778e-01, -2.4794e-01, -2.5098e-01,  ...,  6.9231e-02,
         -2.8705e-01,  9.7740e+03],
        ...,
        [-4.1704e-01, -3.5060e-01, -3.6578e-01,  ...,  6.8910e-02,
         -2.2357e-01,  9.8299e+03],
        [-3.8351e-01, -2.0453e-01, -2.5638e-01,  ...,  5.8612e-02,
         -2.2195e-01,  1.3346e+04],
        [-3.4126e-01, -2.9451e-01, -2.0882e-01,  ...,  8.6014e-02,
         -2.1372e-01,  1.5318e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.3459],
        [ 0.0014],
        [ 0.0305],
        [ 0.0399],
        [ 0.2412],
        [ 0.0822],
        [ 0.1374],
        [ 0.1116],
        [ 0.2317],
        [ 0.0199],
        [ 0.2078],
        [ 0.0190],
        [ 0.1437],
        [ 0.1825],
        [ 0.0130],
        [ 0.1005],
        [ 0.1375],
     

ep  4:  50%|█████████████████████████████████████▌                                     | 14/28 [00:36<00:36,  2.63s/it]

tensor([[-3.5502e-01, -2.5876e-01, -2.1649e-01,  ...,  9.0758e-02,
         -2.0800e-01,  1.2020e+04],
        [-3.8637e-01, -1.7566e-01, -3.2787e-01,  ...,  6.3223e-02,
         -1.5179e-01,  1.0269e+04],
        [-4.1310e-01, -2.3542e-01, -2.4912e-01,  ...,  1.0974e-01,
         -2.5079e-01,  1.7623e+04],
        ...,
        [-3.0090e-01, -2.5054e-01, -2.6815e-01,  ...,  2.0753e-01,
         -2.7671e-01,  1.2985e+04],
        [-4.5794e-01, -2.6463e-01, -3.2530e-01,  ...,  1.5546e-01,
         -2.0043e-01,  1.1147e+04],
        [-3.1228e-01, -1.8379e-01, -4.0735e-01,  ...,  2.5088e-01,
         -2.0779e-01,  1.2190e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0952],
        [ 0.2091],
        [ 0.1575],
        [ 0.0997],
        [ 0.0913],
        [ 0.1731],
        [ 0.0333],
        [ 0.0084],
        [ 0.0505],
        [ 0.2138],
        [ 0.1935],
        [ 0.0762],
        [ 0.1387],
        [ 0.3100],
        [ 0.1173],
        [ 0.0512],
        [ 0.0253],
     

ep  4:  54%|████████████████████████████████████████▏                                  | 15/28 [00:39<00:34,  2.62s/it]

tensor([[-4.4775e-01, -1.9130e-01, -2.1526e-01,  ...,  1.5381e-01,
         -2.0843e-01,  1.2763e+04],
        [-3.1664e-01, -2.7430e-01, -4.0429e-01,  ...,  2.8693e-01,
         -2.3968e-01,  1.1045e+04],
        [-3.6388e-01, -3.0117e-01, -3.1616e-01,  ...,  9.5336e-02,
         -2.4665e-01,  1.0321e+04],
        ...,
        [-3.4616e-01, -2.0937e-01, -3.3338e-01,  ...,  1.5660e-01,
         -2.2474e-01,  1.0549e+04],
        [-3.2932e-01, -2.3154e-01, -2.2354e-01,  ...,  1.6787e-01,
         -2.5468e-01,  1.6102e+04],
        [-3.6198e-01, -2.9902e-01, -2.0722e-01,  ...,  1.2753e-01,
         -2.8123e-01,  1.2414e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0056],
        [ 0.1603],
        [ 0.1850],
        [ 0.1683],
        [ 0.0938],
        [ 0.1091],
        [ 0.1448],
        [ 0.0777],
        [ 0.0593],
        [ 0.1857],
        [ 0.0956],
        [ 0.0503],
        [ 0.0734],
        [ 0.0321],
        [ 0.0220],
        [ 0.3516],
        [ 0.2188],
     

ep  4:  57%|██████████████████████████████████████████▊                                | 16/28 [00:41<00:31,  2.63s/it]

tensor([[-2.8217e-01, -2.3255e-01, -3.4943e-01,  ...,  1.5291e-01,
         -1.9073e-01,  1.3507e+04],
        [-3.8660e-01, -2.7442e-01, -3.1972e-01,  ...,  1.8217e-01,
         -1.8381e-01,  1.6513e+04],
        [-3.6235e-01, -3.4141e-01, -3.3717e-01,  ...,  1.7243e-01,
         -2.5384e-01,  1.5871e+04],
        ...,
        [-4.0067e-01, -2.6052e-01, -2.9089e-01,  ...,  1.0600e-01,
         -2.3551e-01,  1.0286e+04],
        [-3.4821e-01, -2.8013e-01, -2.7043e-01,  ...,  1.3145e-01,
         -1.6341e-01,  1.8037e+04],
        [-3.6536e-01, -2.5236e-01, -2.3157e-01,  ...,  1.1472e-01,
         -2.3437e-01,  1.2716e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[3.4663e-02],
        [1.3547e-01],
        [6.7213e-02],
        [1.2397e-01],
        [8.0165e-02],
        [1.4463e-01],
        [7.8638e-02],
        [5.9739e-02],
        [1.8183e-01],
        [1.0052e-01],
        [2.6209e-01],
        [2.8914e-01],
        [1.4860e-01],
        [1.1715e-01],
        [5.4874e-02]

ep  4:  61%|█████████████████████████████████████████████▌                             | 17/28 [00:44<00:28,  2.63s/it]

tensor([[-4.1919e-01, -2.3866e-01, -2.6980e-01,  ...,  3.9768e-02,
         -2.1521e-01,  1.0144e+04],
        [-2.4898e-01, -1.3785e-01, -3.4131e-01,  ...,  3.0844e-01,
         -2.2581e-01,  1.7503e+04],
        [-2.8947e-01, -2.8295e-01, -2.5467e-01,  ...,  1.2965e-01,
         -2.0127e-01,  1.7031e+04],
        ...,
        [-2.9159e-01, -3.0986e-01, -2.8561e-01,  ...,  1.1948e-01,
         -2.2763e-01,  1.5338e+04],
        [-3.7322e-01, -2.6957e-01, -2.9887e-01,  ...,  1.6167e-01,
         -2.5231e-01,  1.5637e+04],
        [-3.5713e-01, -2.7528e-01, -2.1745e-01,  ...,  1.5995e-01,
         -2.2625e-01,  1.6417e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1578],
        [-0.0476],
        [-0.0076],
        [ 0.1019],
        [ 0.1820],
        [ 0.2009],
        [ 0.1152],
        [ 0.0354],
        [ 0.2127],
        [ 0.0638],
        [ 0.1158],
        [ 0.1102],
        [-0.0110],
        [ 0.1531],
        [ 0.1837],
        [ 0.1235],
        [ 0.0968],
     

ep  4:  64%|████████████████████████████████████████████████▏                          | 18/28 [00:47<00:26,  2.63s/it]

tensor([[-3.4253e-01, -2.4708e-01, -2.6203e-01,  ...,  1.1895e-01,
         -3.0976e-01,  1.0898e+04],
        [-3.6312e-01, -2.9652e-01, -1.9786e-01,  ...,  1.2804e-01,
         -2.8884e-01,  1.6994e+04],
        [-3.6442e-01,  3.1239e-03, -6.2649e-01,  ...,  4.4634e-01,
         -1.5282e-01,  1.1416e+04],
        ...,
        [-3.5874e-01, -1.7250e-01, -3.2968e-01,  ...,  3.1223e-01,
         -2.2715e-01,  1.6432e+04],
        [-3.7241e-01, -2.5891e-01, -2.8536e-01,  ...,  1.2375e-01,
         -2.4715e-01,  1.2624e+04],
        [-4.3299e-01, -2.0525e-01, -2.4819e-01,  ...,  1.2748e-01,
         -2.0539e-01,  1.3170e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1609],
        [ 0.0824],
        [ 0.2194],
        [ 0.1086],
        [ 0.0588],
        [ 0.1326],
        [ 0.1330],
        [ 0.0014],
        [ 0.0915],
        [ 0.1667],
        [ 0.0860],
        [ 0.0154],
        [ 0.1504],
        [ 0.1925],
        [ 0.0017],
        [ 0.1674],
        [ 0.2312],
     

ep  4:  68%|██████████████████████████████████████████████████▉                        | 19/28 [00:49<00:23,  2.63s/it]

tensor([[-3.4637e-01, -2.9712e-01, -2.9307e-01,  ...,  6.4643e-02,
         -2.2825e-01,  1.0897e+04],
        [-3.4965e-01, -3.4036e-01, -2.7774e-01,  ...,  1.0553e-01,
         -2.6727e-01,  1.4660e+04],
        [-4.0045e-01, -2.3125e-01, -3.0155e-01,  ...,  2.3750e-01,
         -2.4532e-01,  1.7448e+04],
        ...,
        [-3.8960e-01, -3.4833e-01, -2.7972e-01,  ...,  1.1803e-01,
         -1.9698e-01,  1.4010e+04],
        [-3.0793e-01, -2.1403e-01, -3.5144e-01,  ...,  1.2687e-01,
         -1.7847e-01,  1.8128e+04],
        [-3.2394e-01, -2.9974e-01, -2.9536e-01,  ...,  5.6627e-02,
         -2.3812e-01,  1.2653e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1805],
        [ 0.0969],
        [ 0.0984],
        [ 0.0946],
        [ 0.3670],
        [ 0.2554],
        [ 0.2859],
        [ 0.0521],
        [ 0.0041],
        [ 0.1465],
        [ 0.0329],
        [ 0.3129],
        [ 0.0642],
        [ 0.0588],
        [ 0.1144],
        [ 0.0680],
        [ 0.1381],
     

ep  4:  71%|█████████████████████████████████████████████████████▌                     | 20/28 [00:52<00:21,  2.64s/it]

tensor([[-3.8760e-01, -2.3735e-01, -3.0099e-01,  ...,  1.4737e-01,
         -2.7058e-01,  1.0405e+04],
        [-4.2466e-01, -2.2249e-01, -1.8340e-01,  ...,  1.1842e-01,
         -1.6583e-01,  1.7801e+04],
        [-3.7697e-01, -2.7139e-01, -3.4398e-01,  ...,  9.0579e-02,
         -1.9233e-01,  1.2548e+04],
        ...,
        [-3.8082e-01, -2.9701e-01, -2.3625e-01,  ...,  1.9641e-01,
         -2.6395e-01,  1.2964e+04],
        [-3.3546e-01, -2.6179e-01, -2.7518e-01,  ...,  9.9212e-02,
         -2.3345e-01,  1.0322e+04],
        [-2.4289e-01, -2.4664e-01, -2.8632e-01,  ...,  9.7463e-02,
         -2.5864e-01,  1.2071e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0298],
        [ 0.2237],
        [ 0.1589],
        [ 0.1053],
        [ 0.1136],
        [ 0.1906],
        [ 0.1463],
        [ 0.1281],
        [ 0.1036],
        [ 0.0720],
        [ 0.2683],
        [ 0.0687],
        [ 0.2264],
        [ 0.1044],
        [ 0.2156],
        [ 0.1334],
        [-0.0063],
     

ep  4:  75%|████████████████████████████████████████████████████████▎                  | 21/28 [00:55<00:18,  2.63s/it]

tensor([[-3.8331e-01, -2.4208e-01, -3.3067e-01,  ...,  1.7326e-01,
         -2.0584e-01,  1.6776e+04],
        [-3.9157e-01, -2.6151e-01, -2.7368e-01,  ...,  1.9859e-01,
         -2.4701e-01,  1.5273e+04],
        [-3.2513e-01, -2.1235e-01, -2.2293e-01,  ...,  1.2823e-01,
         -2.2946e-01,  1.6437e+04],
        ...,
        [-2.8873e-01, -1.8800e-01, -3.3964e-01,  ...,  1.5761e-01,
         -2.8531e-01,  1.2318e+04],
        [-3.6421e-01, -2.9346e-01, -2.4262e-01,  ...,  1.9759e-01,
         -1.6911e-01,  1.8040e+04],
        [-2.6577e-01, -2.0148e-01, -2.7029e-01,  ...,  2.4395e-01,
         -1.5022e-01,  1.7977e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1884],
        [ 0.2153],
        [ 0.1074],
        [ 0.2082],
        [ 0.1538],
        [ 0.1772],
        [ 0.2252],
        [ 0.1044],
        [ 0.0988],
        [ 0.1585],
        [ 0.1799],
        [ 0.1555],
        [ 0.1743],
        [ 0.0870],
        [ 0.0922],
        [ 0.0439],
        [ 0.2281],
     

ep  4:  79%|██████████████████████████████████████████████████████████▉                | 22/28 [00:57<00:15,  2.63s/it]

tensor([[-3.2757e-01, -2.8775e-01, -3.0047e-01,  ...,  1.0664e-01,
         -2.4935e-01,  1.0271e+04],
        [-2.4471e-01, -3.3220e-01, -2.6703e-01,  ...,  1.2799e-01,
         -1.6549e-01,  1.6170e+04],
        [-3.8740e-01, -1.7741e-01, -3.0826e-01,  ...,  2.6911e-02,
         -2.2582e-01,  1.0711e+04],
        ...,
        [-3.2681e-01, -2.8299e-01, -2.4543e-01,  ...,  1.5832e-01,
         -2.1534e-01,  1.6790e+04],
        [-4.2383e-01, -1.8086e-01, -3.4578e-01,  ...,  2.0221e-01,
         -1.9961e-01,  1.0538e+04],
        [-4.1856e-01, -2.7149e-01, -2.2667e-01,  ...,  1.6969e-01,
         -2.7211e-01,  1.7165e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0517],
        [ 0.1744],
        [-0.0324],
        [ 0.1524],
        [ 0.0572],
        [ 0.1391],
        [ 0.2127],
        [ 0.0740],
        [ 0.1172],
        [ 0.2932],
        [ 0.0089],
        [ 0.1334],
        [ 0.1956],
        [ 0.1432],
        [ 0.0781],
        [ 0.1008],
        [ 0.2204],
     

ep  4:  82%|█████████████████████████████████████████████████████████████▌             | 23/28 [01:00<00:13,  2.62s/it]

tensor([[-3.9822e-01, -3.0698e-01, -3.0965e-01,  ...,  1.4726e-01,
         -2.7643e-01,  8.4384e+03],
        [-3.5297e-01, -2.9420e-01, -2.1998e-01,  ...,  5.2890e-02,
         -2.7862e-01,  1.6805e+04],
        [-4.2946e-01, -3.0170e-01, -3.3206e-01,  ...,  1.1957e-01,
         -2.3761e-01,  1.1509e+04],
        ...,
        [-2.1182e-01, -1.6575e-01, -2.9628e-01,  ...,  2.0684e-01,
         -1.8657e-01,  1.7983e+04],
        [-2.8480e-01, -1.8029e-01, -3.3956e-01,  ...,  3.0732e-01,
         -1.3282e-01,  1.7711e+04],
        [-3.4616e-01, -3.2759e-01, -2.7711e-01,  ...,  9.3972e-02,
         -2.5969e-01,  1.2938e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.2156],
        [ 0.0507],
        [ 0.0686],
        [ 0.1617],
        [ 0.1099],
        [ 0.2416],
        [ 0.1618],
        [ 0.2188],
        [ 0.0447],
        [ 0.1340],
        [ 0.1004],
        [ 0.1937],
        [ 0.1202],
        [ 0.1940],
        [ 0.1763],
        [ 0.0702],
        [ 0.2337],
     

ep  4:  86%|████████████████████████████████████████████████████████████████▎          | 24/28 [01:02<00:10,  2.62s/it]

tensor([[-3.3464e-01, -2.2151e-01, -3.3590e-01,  ...,  2.4967e-01,
         -2.8392e-01,  1.5680e+04],
        [-3.9028e-01, -1.6770e-01, -3.0187e-01,  ...,  1.2186e-01,
         -2.6733e-01,  9.5093e+03],
        [-3.7009e-01, -2.9071e-01, -2.6052e-01,  ...,  9.4638e-02,
         -2.7129e-01,  1.1824e+04],
        ...,
        [-2.8248e-01, -2.4319e-01, -1.6683e-01,  ...,  5.5610e-02,
         -2.6724e-01,  1.2736e+04],
        [-3.6354e-01, -2.7409e-01, -2.2103e-01,  ...,  2.1830e-01,
         -1.9815e-01,  1.7673e+04],
        [-4.1961e-01, -2.3234e-01, -2.0888e-01,  ...,  8.3719e-02,
         -2.2842e-01,  1.0402e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1310],
        [ 0.0338],
        [ 0.1317],
        [ 0.0605],
        [ 0.1588],
        [ 0.0382],
        [ 0.1319],
        [ 0.1121],
        [ 0.0835],
        [ 0.1984],
        [ 0.1038],
        [ 0.1277],
        [ 0.1265],
        [ 0.2425],
        [ 0.1153],
        [ 0.1214],
        [ 0.1529],
     

ep  4:  89%|██████████████████████████████████████████████████████████████████▉        | 25/28 [01:05<00:07,  2.63s/it]

tensor([[-3.8383e-01, -3.1183e-01, -2.6222e-01,  ...,  2.7291e-01,
         -1.7375e-01,  1.8059e+04],
        [-2.5771e-01, -2.7501e-01, -2.5068e-01,  ...,  1.3121e-01,
         -2.1817e-01,  1.7815e+04],
        [-3.2757e-01, -2.2391e-01, -2.5303e-01,  ...,  3.2675e-02,
         -2.7579e-01,  1.2878e+04],
        ...,
        [-2.9605e-01, -3.0309e-01, -1.8609e-01,  ...,  1.0760e-01,
         -2.5633e-01,  1.8036e+04],
        [-2.6983e-01, -1.5044e-01, -2.2041e-01,  ...,  1.6992e-01,
         -2.0413e-01,  1.7265e+04],
        [-3.8343e-01, -1.9531e-01, -3.0533e-01,  ...,  1.8378e-01,
         -3.2077e-01,  8.4972e+03]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1387],
        [ 0.1022],
        [ 0.0454],
        [ 0.0288],
        [ 0.1564],
        [ 0.1507],
        [ 0.0889],
        [ 0.0046],
        [ 0.0788],
        [ 0.2026],
        [-0.0910],
        [ 0.0776],
        [ 0.1273],
        [ 0.0422],
        [ 0.1249],
        [ 0.2554],
        [ 0.1162],
     

ep  4:  93%|█████████████████████████████████████████████████████████████████████▋     | 26/28 [01:08<00:05,  2.62s/it]

tensor([[-5.1481e-01, -2.9994e-01, -3.9298e-01,  ...,  2.3033e-01,
         -1.7179e-01,  1.0139e+04],
        [-3.4854e-01, -2.6576e-01, -3.1358e-01,  ...,  8.4111e-02,
         -2.0287e-01,  1.0608e+04],
        [-3.9641e-01, -2.8323e-01, -3.0627e-01,  ...,  7.1188e-02,
         -2.7206e-01,  1.3158e+04],
        ...,
        [-2.9212e-01, -2.5928e-01, -2.7701e-01,  ...,  5.6322e-02,
         -2.7968e-01,  1.0662e+04],
        [-3.8469e-01, -2.7792e-01, -2.2506e-01,  ...,  1.4637e-01,
         -2.1714e-01,  1.2862e+04],
        [-3.0552e-01, -2.8250e-01, -2.5913e-01,  ...,  1.7306e-01,
         -2.5231e-01,  1.1585e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0688],
        [ 0.1220],
        [ 0.1317],
        [ 0.1527],
        [ 0.0399],
        [ 0.0746],
        [ 0.1481],
        [ 0.1153],
        [ 0.0758],
        [ 0.0317],
        [ 0.0596],
        [ 0.0550],
        [ 0.0397],
        [ 0.1511],
        [ 0.2011],
        [ 0.1961],
        [ 0.0428],
     

ep  4:  96%|████████████████████████████████████████████████████████████████████████▎  | 27/28 [01:10<00:02,  2.62s/it]

tensor([[-3.7644e-01, -2.9094e-01, -3.0876e-01,  ...,  1.7985e-01,
         -1.9471e-01,  1.1117e+04],
        [-3.1022e-01, -1.8703e-01, -2.7300e-01,  ...,  2.1941e-01,
         -2.4358e-01,  1.7720e+04],
        [-3.0835e-01, -1.7722e-01, -3.3259e-01,  ...,  1.6746e-01,
         -2.6243e-01,  1.2727e+04],
        ...,
        [-3.8417e-01, -2.6580e-01, -2.8672e-01,  ...,  2.6160e-01,
         -2.1652e-01,  1.7493e+04],
        [-4.3957e-01, -2.5109e-01, -3.4160e-01,  ...,  2.1672e-01,
         -2.1498e-01,  1.6449e+04],
        [-3.4637e-01, -2.3788e-01, -2.6091e-01,  ...,  1.7130e-02,
         -2.7820e-01,  1.2599e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[-0.0143],
        [ 0.0430],
        [ 0.3520],
        [ 0.1470],
        [ 0.2014],
        [ 0.0679],
        [ 0.2426],
        [ 0.1251],
        [ 0.0588],
        [ 0.1448],
        [ 0.1488],
        [ 0.2130],
        [ 0.1845],
        [ 0.0555],
        [ 0.1488],
        [ 0.1592],
        [ 0.0480],
     

ep  4: 100%|███████████████████████████████████████████████████████████████████████████| 28/28 [01:13<00:00,  2.62s/it]
valid:   0%|                                                                                     | 0/4 [00:00<?, ?it/s]

tensor([[-4.1796e-01, -2.0925e-01, -3.3564e-01,  ...,  2.6719e-01,
         -2.0737e-01,  1.2777e+04],
        [-3.9881e-01, -1.7040e-01, -4.0099e-01,  ...,  2.6061e-01,
         -2.0786e-01,  1.2170e+04],
        [-4.5531e-01, -2.2109e-01, -3.2122e-01,  ...,  1.2727e-01,
         -2.2215e-01,  1.2213e+04],
        ...,
        [-3.5546e-01, -3.7159e-01, -2.5867e-01,  ...,  1.2280e-01,
         -2.5107e-01,  1.8024e+04],
        [-3.3449e-01, -2.8725e-01, -2.9110e-01,  ...,  1.5977e-01,
         -2.7775e-01,  1.2685e+04],
        [-3.1204e-01, -2.3164e-01, -1.6219e-01,  ...,  9.6379e-02,
         -2.4057e-01,  1.1256e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1910],
        [-0.0160],
        [ 0.0362],
        [ 0.0668],
        [ 0.0675],
        [ 0.1197],
        [ 0.2024],
        [ 0.1599],
        [ 0.0838],
        [ 0.2410],
        [ 0.0491],
        [ 0.1735],
        [ 0.1591],
        [ 0.1002],
        [ 0.2053],
        [ 0.1637],
        [ 0.0976],
     

valid:  25%|███████████████████▎                                                         | 1/4 [00:02<00:07,  2.61s/it]

tensor([[-3.2233e-01, -2.8638e-01, -2.5438e-01,  ...,  1.1127e-01,
         -2.4928e-01,  8.2773e+03],
        [-3.6531e-01, -1.8541e-01, -3.2476e-01,  ...,  9.8329e-02,
         -2.3989e-01,  8.2921e+03],
        [-3.7271e-01, -3.1051e-01, -2.6006e-01,  ...,  1.0789e-01,
         -1.8490e-01,  8.4220e+03],
        ...,
        [-3.7780e-01, -2.1708e-01, -3.6814e-01,  ...,  2.7568e-01,
         -1.1368e-01,  7.2709e+03],
        [-3.9939e-01, -2.8329e-01, -2.7683e-01,  ...,  1.3335e-01,
         -2.4340e-01,  7.3509e+03],
        [-3.1813e-01, -1.7252e-01, -3.3864e-01,  ...,  1.8896e-01,
         -2.7303e-01,  7.1148e+03]], device='cuda:0')
tensor([[0.1207],
        [0.1291],
        [0.1304],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [

valid:  50%|██████████████████████████████████████▌                                      | 2/4 [00:05<00:05,  2.61s/it]

tensor([[-2.6459e-01, -1.9816e-01, -2.3166e-01,  ...,  4.3366e-02,
         -2.4275e-01,  7.3657e+03],
        [-4.4351e-01, -1.6997e-01, -2.4599e-01,  ...,  5.0830e-02,
         -2.2045e-01,  7.4660e+03],
        [-3.6358e-01, -2.7964e-01, -2.1717e-01,  ...,  1.4754e-01,
         -2.0238e-01,  7.5556e+03],
        ...,
        [-3.0289e-01, -2.8608e-01, -3.0153e-01,  ...,  9.7022e-02,
         -2.0641e-01,  7.5523e+03],
        [-3.2801e-01, -2.4983e-01, -2.3538e-01,  ...,  1.0617e-01,
         -3.3416e-01,  7.9973e+03],
        [-3.5315e-01, -2.7097e-01, -2.6696e-01,  ...,  1.3142e-01,
         -2.4509e-01,  8.4248e+03]], device='cuda:0')
tensor([[0.1207],
        [0.1291],
        [0.1304],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [

valid:  75%|█████████████████████████████████████████████████████████▊                   | 3/4 [00:07<00:02,  2.61s/it]

tensor([[-4.3092e-01, -2.7025e-01, -3.0428e-01,  ...,  1.7351e-01,
         -2.1021e-01,  8.2736e+03],
        [-3.9687e-01, -1.6447e-01, -3.1085e-01,  ...,  2.0987e-01,
         -1.9773e-01,  8.4973e+03],
        [-4.0648e-01, -2.3350e-01, -3.5499e-01,  ...,  1.4228e-01,
         -2.3323e-01,  8.8352e+03],
        ...,
        [-2.5792e-01, -2.7998e-01, -2.9789e-01,  ...,  6.9041e-02,
         -1.5714e-01,  1.1430e+04],
        [-3.6105e-01, -3.1575e-01, -2.2100e-01,  ...,  1.0072e-01,
         -2.3743e-01,  1.1417e+04],
        [-3.4809e-01, -2.3759e-01, -2.0738e-01,  ...,  1.4380e-01,
         -2.2184e-01,  1.1349e+04]], device='cuda:0')
tensor([[0.1207],
        [0.1291],
        [0.1304],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306],
        [

valid: 100%|█████████████████████████████████████████████████████████████████████████████| 4/4 [00:08<00:00,  2.03s/it]
ep  5:   0%|                                                                                    | 0/28 [00:00<?, ?it/s]

tensor([[-3.0299e-01, -2.0548e-01, -3.7163e-01,  ...,  2.2204e-01,
         -2.4157e-01,  1.1479e+04],
        [-5.2351e-01, -1.3666e-01, -4.0322e-01,  ...,  1.6678e-01,
         -2.3607e-01,  1.1660e+04],
        [-2.8602e-01, -2.1112e-01, -2.7816e-01,  ...,  2.0929e-01,
         -2.4698e-01,  1.1616e+04],
        ...,
        [-4.1360e-01, -3.0982e-01, -2.5829e-01,  ...,  8.7611e-02,
         -2.1257e-01,  1.1642e+04],
        [-4.2525e-01, -2.9543e-01, -3.0947e-01,  ...,  7.3900e-02,
         -2.2249e-01,  1.1782e+04],
        [-2.6573e-01, -2.9025e-01, -3.4377e-01,  ...,  1.3080e-01,
         -1.8334e-01,  1.1734e+04]], device='cuda:0')
tensor([[0.1207],
        [0.1291],
        [0.1304],
        [0.1306],
        [0.1306],
        [0.1306],
        [0.1306]], device='cuda:0')
valid acc 0.5376884422110553


ep  5:   4%|██▋                                                                         | 1/28 [00:02<01:11,  2.63s/it]

tensor([[-3.1702e-01, -2.3586e-01, -2.6527e-01,  ...,  1.1479e-01,
         -2.1072e-01,  1.1445e+04],
        [-3.6336e-01, -3.0126e-01, -2.9737e-01,  ...,  1.0575e-01,
         -2.4669e-01,  1.5327e+04],
        [-3.7143e-01, -3.0776e-01, -2.4387e-01,  ...,  1.6023e-01,
         -2.6944e-01,  1.1126e+04],
        ...,
        [-4.9020e-01, -1.0666e-01, -3.3240e-01,  ...,  3.0624e-01,
         -2.0592e-01,  1.7490e+04],
        [-2.6012e-01, -2.2625e-01, -1.0408e-01,  ...,  1.3860e-01,
         -2.7118e-01,  1.7142e+04],
        [-3.7647e-01, -2.0906e-01, -2.0967e-01,  ...,  1.7943e-01,
         -2.2944e-01,  1.7159e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0848],
        [-0.0248],
        [ 0.0860],
        [ 0.0418],
        [ 0.0983],
        [ 0.1650],
        [ 0.0804],
        [ 0.1034],
        [ 0.0674],
        [ 0.1138],
        [ 0.2733],
        [ 0.1862],
        [ 0.1623],
        [ 0.1859],
        [ 0.0654],
        [ 0.0050],
        [ 0.1857],
     

ep  5:   7%|█████▍                                                                      | 2/28 [00:05<01:08,  2.62s/it]

tensor([[-3.2208e-01, -1.6071e-01, -2.1917e-01,  ...,  1.3239e-01,
         -2.0294e-01,  1.7805e+04],
        [-2.6575e-01, -8.9620e-02, -2.5639e-01,  ...,  2.6353e-01,
         -2.9237e-01,  1.7781e+04],
        [-2.6516e-01, -2.2230e-01, -2.1477e-01,  ...,  1.1484e-01,
         -3.0535e-01,  1.7733e+04],
        ...,
        [-3.5594e-01, -2.9147e-01, -2.9471e-01,  ...,  1.5151e-01,
         -1.5995e-01,  1.7763e+04],
        [-3.3110e-01, -3.2599e-01, -3.4201e-01,  ...,  6.8560e-02,
         -2.4686e-01,  1.2381e+04],
        [-2.7821e-01, -2.3811e-01, -2.2439e-01,  ...,  8.5796e-02,
         -2.5010e-01,  1.2987e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0723],
        [ 0.0758],
        [ 0.2790],
        [ 0.1278],
        [ 0.2101],
        [ 0.0408],
        [ 0.2362],
        [-0.0119],
        [ 0.2247],
        [ 0.0707],
        [ 0.0698],
        [ 0.1111],
        [ 0.0881],
        [ 0.0637],
        [ 0.1388],
        [ 0.1005],
        [ 0.1044],
     

ep  5:  11%|████████▏                                                                   | 3/28 [00:07<01:05,  2.61s/it]

tensor([[-3.7123e-01, -2.7698e-01, -3.6372e-01,  ...,  1.4202e-01,
         -1.9083e-01,  1.2202e+04],
        [-3.7063e-01, -2.3328e-01, -2.2482e-01,  ...,  2.1673e-01,
         -2.3106e-01,  1.7807e+04],
        [-3.6463e-01, -2.6500e-01, -2.7587e-01,  ...,  5.0424e-02,
         -1.7033e-01,  8.7502e+03],
        ...,
        [-3.3553e-01, -2.0631e-01, -3.5493e-01,  ...,  1.5216e-01,
         -2.7690e-01,  1.0855e+04],
        [-3.1893e-01, -2.3390e-01, -2.3986e-01,  ...,  1.0720e-01,
         -2.3060e-01,  1.3207e+04],
        [-3.8282e-01, -2.9445e-01, -2.9687e-01,  ...,  1.7893e-01,
         -2.3478e-01,  1.5258e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 1.6607e-01],
        [ 1.5653e-01],
        [ 1.2896e-01],
        [ 7.1016e-02],
        [-5.8210e-03],
        [ 1.4734e-01],
        [ 9.9339e-02],
        [ 1.6388e-01],
        [ 1.4739e-01],
        [ 1.3285e-01],
        [ 3.1602e-02],
        [ 2.0395e-01],
        [ 1.8433e-01],
        [ 3.0917e-01],
      

ep  5:  14%|██████████▊                                                                 | 4/28 [00:10<01:02,  2.62s/it]

tensor([[-3.2690e-01, -2.4070e-01, -2.8972e-01,  ...,  1.9127e-01,
         -3.2164e-01,  1.7730e+04],
        [-3.6764e-01, -1.1426e-01, -3.0577e-01,  ...,  2.5255e-01,
         -2.5165e-01,  1.6094e+04],
        [-3.5814e-01, -2.4687e-01, -3.0471e-01,  ..., -1.1023e-02,
         -2.7837e-01,  1.0310e+04],
        ...,
        [-3.3167e-01, -2.5996e-01, -2.8100e-01,  ...,  3.4715e-02,
         -2.3055e-01,  1.3239e+04],
        [-2.5010e-01, -3.1880e-01, -1.7650e-01,  ...,  6.0610e-02,
         -2.1590e-01,  1.7760e+04],
        [-3.9119e-01, -2.0580e-01, -2.4603e-01,  ...,  1.3229e-01,
         -2.6461e-01,  1.0297e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0369],
        [ 0.0537],
        [ 0.0863],
        [ 0.2730],
        [ 0.0975],
        [ 0.0672],
        [ 0.0714],
        [ 0.1621],
        [ 0.0900],
        [ 0.0482],
        [ 0.0588],
        [ 0.1197],
        [ 0.1057],
        [ 0.0071],
        [ 0.2214],
        [ 0.1213],
        [ 0.1253],
     

ep  5:  18%|█████████████▌                                                              | 5/28 [00:13<01:00,  2.62s/it]

tensor([[-4.0067e-01, -2.6052e-01, -2.9089e-01,  ...,  1.0600e-01,
         -2.3551e-01,  1.0286e+04],
        [-3.6669e-01, -2.3129e-01, -3.3963e-01,  ...,  1.3087e-01,
         -2.1291e-01,  1.1144e+04],
        [-4.1222e-01, -2.6576e-01, -3.0550e-01,  ...,  2.5008e-01,
         -1.6212e-01,  1.5178e+04],
        ...,
        [-3.4761e-01, -3.7318e-01, -2.3399e-01,  ...,  1.4139e-01,
         -2.5983e-01,  1.6915e+04],
        [-3.5225e-01, -2.8004e-01, -2.2324e-01,  ...,  2.2592e-01,
         -2.3169e-01,  1.6028e+04],
        [-3.5126e-01, -2.6004e-01, -2.8648e-01,  ...,  9.4685e-02,
         -2.3945e-01,  1.8030e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0123],
        [ 0.1871],
        [ 0.0391],
        [ 0.2372],
        [ 0.1945],
        [ 0.1140],
        [ 0.2496],
        [ 0.0663],
        [ 0.0694],
        [ 0.1367],
        [ 0.1453],
        [-0.0012],
        [ 0.0681],
        [ 0.1471],
        [ 0.1426],
        [ 0.0880],
        [ 0.1826],
     

ep  5:  21%|████████████████▎                                                           | 6/28 [00:15<00:57,  2.62s/it]

tensor([[-4.1505e-01, -3.0561e-01, -3.0559e-01,  ...,  1.7189e-01,
         -2.8727e-01,  1.1578e+04],
        [-4.0303e-01, -2.5873e-01, -2.6287e-01,  ...,  1.5724e-01,
         -2.2792e-01,  1.8035e+04],
        [-3.6909e-01, -2.9426e-01, -1.0642e-01,  ...,  1.3271e-01,
         -2.7024e-01,  1.6514e+04],
        ...,
        [-2.9103e-01, -2.7605e-01, -2.0999e-01,  ...,  1.5490e-01,
         -1.9972e-01,  1.6360e+04],
        [-3.2072e-01, -2.7191e-01, -2.4150e-01,  ...,  1.1294e-01,
         -2.6563e-01,  1.5176e+04],
        [-3.8438e-01, -1.8772e-02, -5.4449e-01,  ...,  3.9325e-01,
         -2.2451e-01,  1.1133e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0415],
        [ 0.1685],
        [ 0.1230],
        [ 0.0597],
        [ 0.2478],
        [ 0.0294],
        [ 0.1523],
        [ 0.1357],
        [ 0.2735],
        [ 0.0895],
        [ 0.2004],
        [ 0.1014],
        [ 0.1669],
        [ 0.1210],
        [ 0.0961],
        [ 0.2704],
        [ 0.1528],
     

ep  5:  25%|███████████████████                                                         | 7/28 [00:18<00:55,  2.62s/it]

tensor([[-4.9054e-01, -2.1465e-01, -3.3824e-01,  ...,  2.2324e-01,
         -1.9019e-01,  1.2845e+04],
        [-2.2514e-01, -2.6350e-01, -2.8326e-01,  ...,  1.7943e-01,
         -2.3355e-01,  1.5747e+04],
        [-3.5451e-01, -1.6673e-01, -4.3754e-01,  ...,  3.5644e-01,
         -1.6787e-01,  1.5875e+04],
        ...,
        [-3.5741e-01, -2.6865e-01, -2.6202e-01,  ...,  1.1811e-01,
         -1.8689e-01,  1.7666e+04],
        [-3.2727e-01, -2.7294e-01, -2.6505e-01,  ...,  1.0250e-01,
         -1.7211e-01,  1.5374e+04],
        [-3.7814e-01, -3.3177e-01, -2.6905e-01,  ...,  4.7437e-02,
         -2.8116e-01,  1.6168e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 8.8520e-02],
        [ 1.7617e-01],
        [ 1.7511e-01],
        [ 2.5546e-01],
        [ 2.2563e-01],
        [ 1.2964e-01],
        [ 5.7593e-02],
        [-2.0128e-02],
        [ 4.9764e-02],
        [ 2.3576e-01],
        [ 3.8667e-02],
        [ 2.2894e-02],
        [ 3.1851e-01],
        [ 1.8426e-01],
      

ep  5:  29%|█████████████████████▋                                                      | 8/28 [00:20<00:52,  2.62s/it]

tensor([[-3.4104e-01, -2.6856e-01, -2.7314e-01,  ...,  2.0348e-01,
         -1.6202e-01,  1.4776e+04],
        [-4.3957e-01, -2.5109e-01, -3.4160e-01,  ...,  2.1672e-01,
         -2.1498e-01,  1.6449e+04],
        [-3.8288e-01, -2.4063e-01, -1.9486e-01,  ...,  1.3210e-01,
         -2.8497e-01,  1.0294e+04],
        ...,
        [-3.4850e-01, -2.5510e-01, -2.9092e-01,  ...,  1.6147e-01,
         -2.4226e-01,  1.3001e+04],
        [-4.0864e-01, -2.6857e-01, -2.1168e-01,  ...,  1.9768e-01,
         -1.9762e-01,  1.7805e+04],
        [-3.7953e-01, -2.3303e-01, -3.5064e-01,  ...,  2.3095e-01,
         -1.7769e-01,  1.3973e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 1.5563e-01],
        [ 1.3689e-01],
        [ 1.0932e-01],
        [ 1.7948e-01],
        [-3.0432e-02],
        [ 1.6363e-04],
        [ 1.4743e-01],
        [ 1.9200e-01],
        [ 1.4811e-01],
        [ 1.0976e-01],
        [ 1.3960e-01],
        [ 1.0296e-01],
        [ 1.3469e-02],
        [ 9.6673e-02],
      

ep  5:  32%|████████████████████████▍                                                   | 9/28 [00:23<00:49,  2.61s/it]

tensor([[-3.3067e-01, -2.9677e-01, -2.9047e-01,  ...,  1.2652e-01,
         -2.0939e-01,  1.7373e+04],
        [-3.2971e-01, -2.4861e-01, -2.0659e-01,  ...,  1.0500e-01,
         -2.7375e-01,  1.7087e+04],
        [-3.4446e-01, -1.8069e-01, -2.6802e-01,  ...,  1.2977e-01,
         -2.3758e-01,  1.2090e+04],
        ...,
        [-4.4775e-01, -1.9130e-01, -2.1526e-01,  ...,  1.5381e-01,
         -2.0843e-01,  1.2763e+04],
        [-3.6369e-01, -2.5001e-01, -2.9534e-01,  ...,  9.2417e-02,
         -1.9878e-01,  1.1061e+04],
        [-3.9597e-01, -3.3169e-01, -3.0806e-01,  ...,  1.2890e-01,
         -2.4171e-01,  1.4565e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[0.0600],
        [0.0588],
        [0.1539],
        [0.0610],
        [0.0882],
        [0.1103],
        [0.0680],
        [0.1515],
        [0.2778],
        [0.0553],
        [0.0785],
        [0.2554],
        [0.1603],
        [0.0321],
        [0.1346],
        [0.1719],
        [0.2189],
        [0.1590],
    

ep  5:  36%|██████████████████████████▊                                                | 10/28 [00:26<00:47,  2.61s/it]

tensor([[-4.0020e-01, -3.0560e-01, -2.4594e-01,  ...,  1.0907e-01,
         -2.9776e-01,  1.2922e+04],
        [-3.5743e-01, -2.5349e-01, -2.7726e-01,  ...,  1.5421e-01,
         -2.4196e-01,  1.6154e+04],
        [-2.5108e-01, -2.3112e-01, -2.7575e-01,  ...,  1.0800e-01,
         -2.5529e-01,  1.7077e+04],
        ...,
        [-3.5437e-01, -3.2202e-01, -2.3073e-01,  ...,  2.1523e-01,
         -2.4130e-01,  1.2391e+04],
        [-3.6762e-01, -2.3952e-01, -3.3424e-01,  ...,  2.3648e-01,
         -2.9169e-01,  1.0437e+04],
        [-3.3798e-01, -1.7083e-01, -2.2773e-01,  ...,  1.2580e-01,
         -2.4939e-01,  1.2807e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[0.1169],
        [0.0697],
        [0.0152],
        [0.1103],
        [0.2544],
        [0.0749],
        [0.0713],
        [0.1313],
        [0.0867],
        [0.1322],
        [0.0464],
        [0.1362],
        [0.0598],
        [0.1250],
        [0.0471],
        [0.1444],
        [0.1911],
        [0.1419],
    

ep  5:  39%|█████████████████████████████▍                                             | 11/28 [00:28<00:44,  2.62s/it]

tensor([[-3.1277e-01, -3.1379e-01, -2.6814e-01,  ...,  1.0274e-01,
         -2.4931e-01,  1.4922e+04],
        [-3.8890e-01, -2.7761e-01, -3.4548e-01,  ...,  2.1852e-01,
         -2.3981e-01,  1.7928e+04],
        [-4.1441e-01, -2.3040e-01, -3.0592e-01,  ...,  1.1131e-01,
         -2.8487e-01,  1.3010e+04],
        ...,
        [-3.8326e-01, -2.6951e-01, -2.3874e-01,  ...,  1.4600e-01,
         -2.5077e-01,  1.3596e+04],
        [-4.1995e-01, -2.7379e-01, -2.4317e-01,  ...,  1.6894e-01,
         -1.9164e-01,  1.6472e+04],
        [-3.3166e-01, -2.4775e-01, -2.7082e-01,  ...,  1.3740e-01,
         -2.7136e-01,  1.2090e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1395],
        [ 0.1607],
        [ 0.1859],
        [ 0.1999],
        [ 0.2130],
        [ 0.2981],
        [ 0.2488],
        [ 0.0611],
        [ 0.0668],
        [ 0.0683],
        [ 0.1080],
        [ 0.2616],
        [ 0.0682],
        [ 0.1107],
        [ 0.1709],
        [ 0.0398],
        [ 0.0615],
     

ep  5:  43%|████████████████████████████████▏                                          | 12/28 [00:31<00:41,  2.61s/it]

tensor([[-3.6965e-01, -2.8212e-01, -3.1046e-01,  ...,  1.7965e-01,
         -2.4131e-01,  1.5556e+04],
        [-3.1099e-01, -2.4655e-01, -1.6991e-01,  ...,  9.5612e-02,
         -2.4402e-01,  1.7869e+04],
        [-4.0026e-01, -3.3545e-01, -2.6718e-01,  ...,  2.2542e-01,
         -2.0691e-01,  1.6491e+04],
        ...,
        [-4.1466e-01, -3.2458e-01, -2.7944e-01,  ...,  6.2101e-02,
         -2.6061e-01,  1.3312e+04],
        [-3.4980e-01, -2.9343e-01, -3.1180e-01,  ...,  9.1082e-02,
         -1.4368e-01,  1.1247e+04],
        [-2.8438e-01, -2.6435e-01, -1.9739e-01,  ...,  1.0576e-01,
         -2.6238e-01,  1.2772e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0368],
        [-0.0054],
        [ 0.1216],
        [ 0.1459],
        [ 0.1843],
        [ 0.0274],
        [ 0.1033],
        [ 0.2583],
        [ 0.1719],
        [ 0.3027],
        [ 0.1199],
        [ 0.2108],
        [ 0.1588],
        [ 0.0835],
        [ 0.2331],
        [ 0.1581],
        [ 0.2110],
     

ep  5:  46%|██████████████████████████████████▊                                        | 13/28 [00:34<00:39,  2.62s/it]

tensor([[-3.5178e-01, -3.0722e-01, -2.5514e-01,  ...,  1.2126e-01,
         -2.1136e-01,  1.2605e+04],
        [-3.0861e-01, -2.8911e-01, -2.5154e-01,  ...,  6.2652e-02,
         -3.0264e-01,  1.6340e+04],
        [-3.2678e-01, -2.2474e-01, -3.1223e-01,  ...,  3.1335e-01,
         -2.0264e-01,  1.7216e+04],
        ...,
        [-3.8447e-01, -2.3596e-01, -2.4317e-01,  ...,  5.8713e-02,
         -2.7924e-01,  1.2952e+04],
        [-3.9693e-01, -1.0551e-01, -2.3122e-01,  ...,  1.9632e-01,
         -2.6491e-01,  1.6337e+04],
        [-4.0225e-01, -2.6047e-01, -1.9697e-01,  ...,  1.8973e-01,
         -1.7937e-01,  1.5976e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 1.9498e-01],
        [ 7.6428e-02],
        [ 5.1588e-02],
        [ 1.0986e-01],
        [ 6.2750e-02],
        [ 1.9177e-01],
        [ 1.5154e-01],
        [ 1.8826e-01],
        [ 1.1561e-01],
        [ 2.4921e-01],
        [ 2.1492e-01],
        [ 1.6334e-01],
        [ 2.2268e-01],
        [ 2.1179e-01],
      

ep  5:  50%|█████████████████████████████████████▌                                     | 14/28 [00:36<00:36,  2.62s/it]

tensor([[-3.6648e-01, -2.5281e-01, -3.0037e-01,  ...,  1.1444e-01,
         -2.2664e-01,  1.1809e+04],
        [-3.3540e-01, -2.9936e-01, -2.3458e-01,  ...,  1.0538e-01,
         -2.0126e-01,  1.0002e+04],
        [-4.3942e-01, -2.2854e-01, -4.1302e-01,  ...,  2.4133e-01,
         -1.7411e-01,  1.0836e+04],
        ...,
        [-3.9026e-01, -1.0837e-01, -2.9618e-01,  ...,  9.9716e-02,
         -2.4356e-01,  1.0425e+04],
        [-2.8586e-01, -2.8247e-01, -2.3039e-01,  ...,  3.1547e-02,
         -2.3372e-01,  1.1406e+04],
        [-3.7690e-01, -2.7926e-01, -1.6603e-01,  ...,  9.2709e-02,
         -2.1994e-01,  1.4910e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[-0.0050],
        [ 0.1038],
        [ 0.1418],
        [ 0.1492],
        [ 0.1015],
        [ 0.1721],
        [ 0.1489],
        [ 0.1718],
        [ 0.1077],
        [ 0.1709],
        [ 0.0678],
        [ 0.1354],
        [-0.0166],
        [ 0.0999],
        [ 0.1907],
        [ 0.0367],
        [ 0.0291],
     

ep  5:  54%|████████████████████████████████████████▏                                  | 15/28 [00:39<00:34,  2.62s/it]

tensor([[-3.9415e-01, -3.7556e-01, -2.2855e-01,  ...,  1.2098e-01,
         -2.4382e-01,  1.4526e+04],
        [-3.1369e-01, -2.8385e-01, -2.6461e-01,  ...,  4.5594e-02,
         -2.5789e-01,  1.2966e+04],
        [-3.5548e-01, -1.1612e-01, -2.8163e-01,  ...,  2.1029e-01,
         -1.9561e-01,  1.7888e+04],
        ...,
        [-3.5791e-01, -2.5953e-01, -2.1248e-01,  ...,  1.5193e-01,
         -1.6351e-01,  1.7686e+04],
        [-3.9659e-01, -2.7956e-01, -2.8267e-01,  ...,  1.4917e-01,
         -1.6351e-01,  1.7043e+04],
        [-4.3974e-01, -2.1409e-01, -2.7098e-01,  ...,  1.9843e-01,
         -2.5499e-01,  1.0618e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[0.1018],
        [0.2261],
        [0.1834],
        [0.2888],
        [0.0454],
        [0.1975],
        [0.1749],
        [0.1178],
        [0.2956],
        [0.0506],
        [0.0540],
        [0.1438],
        [0.0851],
        [0.0248],
        [0.0943],
        [0.0848],
        [0.1723],
        [0.1600],
    

ep  5:  57%|██████████████████████████████████████████▊                                | 16/28 [00:41<00:31,  2.62s/it]

tensor([[-2.8543e-01, -2.4393e-01, -3.3339e-01,  ...,  2.3348e-01,
         -2.0390e-01,  1.7581e+04],
        [-4.4518e-01, -3.0828e-01, -3.0077e-01,  ...,  1.4655e-01,
         -2.4827e-01,  1.2801e+04],
        [-3.9028e-01, -1.6770e-01, -3.0187e-01,  ...,  1.2186e-01,
         -2.6733e-01,  9.5093e+03],
        ...,
        [-3.1751e-01, -2.7193e-01, -2.8836e-01,  ...,  1.5326e-01,
         -2.4282e-01,  1.7653e+04],
        [-2.8690e-01, -2.6718e-01, -1.9210e-01,  ...,  1.1798e-01,
         -2.9312e-01,  8.3000e+03],
        [-3.9782e-01, -2.4461e-01, -2.9760e-01,  ...,  1.4869e-01,
         -2.1666e-01,  1.0867e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1093],
        [ 0.2887],
        [ 0.3258],
        [ 0.1443],
        [ 0.0119],
        [ 0.1270],
        [ 0.1104],
        [ 0.1047],
        [ 0.1197],
        [ 0.1057],
        [ 0.1637],
        [ 0.2513],
        [ 0.1411],
        [ 0.2261],
        [ 0.0666],
        [ 0.1057],
        [ 0.2874],
     

ep  5:  61%|█████████████████████████████████████████████▌                             | 17/28 [00:44<00:28,  2.62s/it]

tensor([[-3.8742e-01, -2.8840e-01, -3.4284e-01,  ...,  1.0807e-01,
         -2.3803e-01,  1.2796e+04],
        [-3.3330e-01, -2.4354e-01, -2.6148e-01,  ...,  1.2872e-01,
         -2.5938e-01,  1.7976e+04],
        [-3.5871e-01, -2.1447e-01, -1.9529e-01,  ...,  1.0849e-01,
         -2.7851e-01,  1.8011e+04],
        ...,
        [-3.1379e-01, -3.1297e-01, -2.6608e-01,  ...,  4.6902e-02,
         -2.5979e-01,  1.5530e+04],
        [-3.3120e-01, -3.3713e-01, -1.7041e-01,  ...,  9.4604e-02,
         -2.4949e-01,  1.2967e+04],
        [-4.3183e-01, -2.7388e-01, -2.7309e-01,  ...,  1.0821e-01,
         -2.5013e-01,  9.0707e+03]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[0.1308],
        [0.1193],
        [0.1205],
        [0.2408],
        [0.1600],
        [0.1418],
        [0.1033],
        [0.1416],
        [0.1068],
        [0.1356],
        [0.2171],
        [0.2953],
        [0.0886],
        [0.1012],
        [0.2603],
        [0.1451],
        [0.0948],
        [0.0861],
    

ep  5:  64%|████████████████████████████████████████████████▏                          | 18/28 [00:47<00:26,  2.62s/it]

tensor([[-3.7609e-01, -2.9035e-01, -2.8449e-01,  ...,  1.6658e-01,
         -2.0047e-01,  1.1613e+04],
        [-3.4727e-01, -2.2425e-01, -4.2008e-01,  ...,  2.6411e-01,
         -1.9298e-01,  1.2288e+04],
        [-3.7365e-01, -3.5807e-01, -1.9128e-01,  ...,  1.3924e-01,
         -2.7441e-01,  1.5376e+04],
        ...,
        [-3.9801e-01, -1.3782e-01, -4.5322e-01,  ...,  2.8979e-01,
         -2.1108e-01,  1.0927e+04],
        [-3.9687e-01, -2.7871e-01, -3.2557e-01,  ...,  1.2420e-01,
         -1.8698e-01,  1.0466e+04],
        [-3.8312e-01, -2.6794e-01, -2.3142e-01,  ...,  1.1407e-01,
         -2.3679e-01,  1.1494e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0240],
        [ 0.0698],
        [ 0.2677],
        [ 0.1211],
        [ 0.1958],
        [ 0.2565],
        [ 0.1883],
        [ 0.1465],
        [ 0.2771],
        [ 0.1892],
        [ 0.1548],
        [ 0.1889],
        [ 0.1274],
        [ 0.2061],
        [ 0.0169],
        [ 0.1422],
        [ 0.2075],
     

ep  5:  68%|██████████████████████████████████████████████████▉                        | 19/28 [00:49<00:23,  2.62s/it]

tensor([[-4.6147e-01, -8.7934e-02, -4.3396e-01,  ...,  3.1132e-01,
         -1.3760e-01,  1.0390e+04],
        [-3.9139e-01, -2.0823e-01, -3.3054e-01,  ...,  1.0136e-01,
         -2.1258e-01,  1.2939e+04],
        [-4.2054e-01, -1.2266e-01, -5.2044e-01,  ...,  3.8560e-01,
         -1.9564e-01,  9.8680e+03],
        ...,
        [-3.2784e-01, -2.3775e-01, -3.0466e-01,  ...,  1.2128e-01,
         -2.6499e-01,  1.0040e+04],
        [-3.1132e-01, -2.7820e-01, -2.7719e-01,  ...,  9.1738e-02,
         -2.2302e-01,  9.7894e+03],
        [-2.7272e-01, -1.8165e-01, -2.5444e-01,  ...,  1.5176e-01,
         -3.0181e-01,  1.0968e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0172],
        [ 0.1653],
        [ 0.1684],
        [ 0.2244],
        [ 0.1133],
        [ 0.0883],
        [ 0.1013],
        [ 0.0860],
        [ 0.0986],
        [ 0.2940],
        [ 0.0482],
        [ 0.1452],
        [ 0.0917],
        [ 0.1802],
        [ 0.1217],
        [ 0.0531],
        [ 0.1039],
     

ep  5:  71%|█████████████████████████████████████████████████████▌                     | 20/28 [00:52<00:21,  2.63s/it]

tensor([[-4.3086e-01, -3.1492e-01, -4.0269e-01,  ...,  2.1864e-01,
         -1.9174e-01,  1.3096e+04],
        [-4.2848e-01, -2.0321e-01, -3.1727e-01,  ...,  1.1069e-01,
         -1.3001e-01,  9.9494e+03],
        [-3.6145e-01, -2.5650e-01, -1.7335e-01,  ...,  1.2373e-01,
         -1.8344e-01,  1.7069e+04],
        ...,
        [-4.2750e-01, -1.5470e-01, -2.7529e-01,  ...,  2.1612e-01,
         -1.4510e-01,  1.7663e+04],
        [-4.4842e-01, -2.4791e-01, -3.6640e-01,  ...,  2.2494e-01,
         -2.2570e-01,  1.2850e+04],
        [-3.7217e-01, -3.1657e-01, -2.5453e-01,  ...,  1.1682e-01,
         -2.0253e-01,  1.7173e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[0.0923],
        [0.1903],
        [0.0894],
        [0.1786],
        [0.1765],
        [0.1027],
        [0.1569],
        [0.1257],
        [0.0406],
        [0.0067],
        [0.2696],
        [0.1538],
        [0.1472],
        [0.1410],
        [0.0997],
        [0.0948],
        [0.0390],
        [0.2583],
    

ep  5:  75%|████████████████████████████████████████████████████████▎                  | 21/28 [00:55<00:18,  2.63s/it]

tensor([[-3.2556e-01, -2.2911e-01, -2.4954e-01,  ...,  1.4875e-01,
         -2.9181e-01,  1.7675e+04],
        [-2.8478e-01, -2.9509e-01, -2.7397e-01,  ...,  8.5816e-02,
         -2.5059e-01,  1.6699e+04],
        [-3.3289e-01, -3.0242e-01, -2.8413e-01,  ...,  1.7476e-01,
         -2.3309e-01,  1.2221e+04],
        ...,
        [-3.1785e-01, -3.0195e-01, -3.1306e-01,  ...,  1.0944e-01,
         -2.8388e-01,  1.2021e+04],
        [-4.0241e-01, -2.7450e-01, -2.7862e-01,  ...,  2.1603e-01,
         -2.3960e-01,  1.6485e+04],
        [-3.9588e-01, -2.5914e-01, -3.3163e-01,  ...,  1.6032e-01,
         -2.9270e-01,  1.6419e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1791],
        [ 0.0706],
        [ 0.2477],
        [ 0.2193],
        [ 0.1014],
        [ 0.0717],
        [ 0.1314],
        [ 0.1193],
        [ 0.1225],
        [ 0.0649],
        [ 0.1468],
        [ 0.2553],
        [ 0.1107],
        [ 0.1359],
        [ 0.0909],
        [ 0.2181],
        [ 0.1496],
     

ep  5:  79%|██████████████████████████████████████████████████████████▉                | 22/28 [00:57<00:15,  2.63s/it]

tensor([[-4.6359e-01, -1.4409e-01, -4.5862e-01,  ...,  2.3231e-01,
         -1.3157e-01,  1.3060e+04],
        [-3.5178e-01, -2.7942e-01, -3.0645e-01,  ...,  1.0903e-01,
         -2.2553e-01,  1.2789e+04],
        [-4.1821e-01, -3.2588e-01, -2.9103e-01,  ...,  1.8377e-01,
         -1.5814e-01,  1.2019e+04],
        ...,
        [-3.2784e-01, -2.4362e-01, -2.4788e-01,  ...,  2.2189e-01,
         -2.3008e-01,  1.5767e+04],
        [-3.5999e-01, -3.0043e-01, -3.2091e-01,  ...,  1.7434e-01,
         -2.1616e-01,  1.6531e+04],
        [-3.1728e-01, -2.4756e-01, -2.3630e-01,  ..., -4.3600e-03,
         -2.9286e-01,  1.2835e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1311],
        [ 0.1470],
        [ 0.2663],
        [ 0.1931],
        [ 0.1143],
        [ 0.1050],
        [ 0.0301],
        [ 0.0719],
        [ 0.0264],
        [ 0.2491],
        [ 0.1673],
        [ 0.2756],
        [ 0.1951],
        [-0.0366],
        [ 0.1418],
        [ 0.1393],
        [ 0.1364],
     

ep  5:  82%|█████████████████████████████████████████████████████████████▌             | 23/28 [01:00<00:13,  2.63s/it]

tensor([[-3.2939e-01, -1.3612e-01, -2.9643e-01,  ...,  2.8776e-01,
         -2.0590e-01,  1.7245e+04],
        [-2.5816e-01, -3.8214e-01, -2.7272e-01,  ...,  7.4684e-03,
         -2.1076e-01,  1.6330e+04],
        [-2.9869e-01, -3.6268e-01, -2.3666e-01,  ...,  1.3313e-01,
         -2.3222e-01,  1.6425e+04],
        ...,
        [-3.3228e-01, -2.3648e-01, -3.0979e-01,  ...,  1.6137e-01,
         -2.1787e-01,  1.6937e+04],
        [-4.0045e-01, -2.3125e-01, -3.0155e-01,  ...,  2.3750e-01,
         -2.4532e-01,  1.7448e+04],
        [-3.5866e-01, -2.3821e-01, -2.5212e-01,  ...,  1.0440e-01,
         -2.8745e-01,  1.1370e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.2156],
        [ 0.1607],
        [ 0.1526],
        [ 0.0948],
        [ 0.1057],
        [ 0.1267],
        [ 0.0323],
        [ 0.0297],
        [ 0.0456],
        [ 0.2556],
        [ 0.1483],
        [ 0.1131],
        [ 0.1206],
        [ 0.1392],
        [ 0.0579],
        [ 0.0770],
        [ 0.0675],
     

ep  5:  86%|████████████████████████████████████████████████████████████████▎          | 24/28 [01:02<00:10,  2.63s/it]

tensor([[-4.1184e-01, -2.9544e-01, -2.6392e-01,  ...,  2.0824e-01,
         -2.5319e-01,  1.5822e+04],
        [-3.6768e-01, -2.6759e-01, -2.4751e-01,  ...,  1.2977e-01,
         -2.4302e-01,  9.7917e+03],
        [-3.9671e-01, -3.0882e-01, -2.9262e-01,  ...,  1.4049e-01,
         -2.0590e-01,  1.2418e+04],
        ...,
        [-4.8032e-01, -3.0412e-01, -2.8470e-01,  ...,  1.6406e-01,
         -2.2554e-01,  1.4969e+04],
        [-3.2664e-01, -2.3396e-01, -2.6549e-01,  ...,  9.6552e-02,
         -1.8463e-01,  1.0068e+04],
        [-4.1192e-01, -2.2992e-01, -3.1360e-01,  ...,  1.4005e-01,
         -2.2150e-01,  1.0309e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0931],
        [ 0.0653],
        [ 0.1801],
        [ 0.0971],
        [ 0.0445],
        [ 0.1126],
        [-0.0160],
        [ 0.2534],
        [ 0.1479],
        [ 0.1568],
        [ 0.2355],
        [ 0.1847],
        [ 0.1398],
        [ 0.1676],
        [ 0.2074],
        [ 0.2985],
        [ 0.1229],
     

ep  5:  89%|██████████████████████████████████████████████████████████████████▉        | 25/28 [01:05<00:07,  2.63s/it]

tensor([[-3.6767e-01, -2.2676e-01, -2.9387e-01,  ...,  8.5994e-02,
         -2.6699e-01,  9.2810e+03],
        [-3.8625e-01, -3.1078e-01, -2.1660e-01,  ...,  1.4333e-01,
         -2.5724e-01,  1.7895e+04],
        [-3.0479e-01, -2.6535e-01, -2.3657e-01,  ...,  1.0737e-01,
         -2.2892e-01,  1.4910e+04],
        ...,
        [-3.9960e-01, -2.9212e-01, -3.5465e-01,  ...,  1.0665e-01,
         -2.0778e-01,  1.7056e+04],
        [-3.5990e-01, -1.4359e-01, -4.3203e-01,  ...,  4.1549e-01,
         -1.4268e-01,  1.7706e+04],
        [-3.5877e-01, -3.0451e-01, -3.4644e-01,  ...,  2.3510e-01,
         -2.4941e-01,  1.2756e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1357],
        [ 0.1775],
        [ 0.1409],
        [-0.0460],
        [ 0.0270],
        [ 0.0832],
        [ 0.0876],
        [ 0.2813],
        [ 0.1389],
        [ 0.1260],
        [ 0.1800],
        [ 0.1122],
        [ 0.1174],
        [ 0.1392],
        [ 0.1024],
        [ 0.1208],
        [ 0.2465],
     

ep  5:  93%|█████████████████████████████████████████████████████████████████████▋     | 26/28 [01:08<00:05,  2.63s/it]

tensor([[-4.1796e-01, -2.0925e-01, -3.3564e-01,  ...,  2.6719e-01,
         -2.0737e-01,  1.2777e+04],
        [-3.0294e-01, -2.8291e-01, -2.8944e-01,  ...,  1.5990e-01,
         -2.9811e-01,  1.6717e+04],
        [-3.8042e-01, -2.1796e-01, -2.7671e-01,  ...,  9.1064e-02,
         -2.0955e-01,  1.3307e+04],
        ...,
        [-3.9060e-01, -2.9308e-01, -2.5256e-01,  ...,  1.0533e-01,
         -2.7164e-01,  1.3104e+04],
        [-3.0649e-01, -2.2021e-01, -2.2563e-01,  ...,  7.5228e-02,
         -1.9603e-01,  1.2218e+04],
        [-4.0422e-01, -3.2982e-01, -2.8745e-01,  ...,  2.5830e-02,
         -1.6451e-01,  1.4606e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0251],
        [ 0.1404],
        [ 0.1413],
        [ 0.0870],
        [ 0.1219],
        [ 0.0849],
        [ 0.0611],
        [ 0.0988],
        [ 0.1638],
        [ 0.0982],
        [ 0.2272],
        [ 0.1152],
        [ 0.1063],
        [ 0.0966],
        [ 0.0969],
        [ 0.1500],
        [ 0.2751],
     

ep  5:  96%|████████████████████████████████████████████████████████████████████████▎  | 27/28 [01:10<00:02,  2.64s/it]

tensor([[-3.4653e-01, -3.1790e-01, -2.3846e-01,  ...,  1.2911e-01,
         -3.3560e-01,  1.4452e+04],
        [-3.2713e-01, -1.6660e-01, -2.4351e-01,  ...,  2.4159e-01,
         -2.5983e-01,  1.7706e+04],
        [-3.0552e-01, -2.8250e-01, -2.5913e-01,  ...,  1.7306e-01,
         -2.5231e-01,  1.1585e+04],
        ...,
        [-3.7181e-01, -2.4044e-01, -3.9166e-01,  ...,  2.0085e-01,
         -1.1146e-01,  9.7422e+03],
        [-3.9284e-01, -2.8168e-01, -2.1689e-01,  ...,  1.4804e-01,
         -1.6857e-01,  1.7777e+04],
        [-4.0903e-01, -2.3779e-01, -2.4538e-01,  ...,  1.0086e-01,
         -2.5426e-01,  1.2287e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1517],
        [ 0.0199],
        [ 0.1479],
        [ 0.1585],
        [ 0.0948],
        [ 0.1665],
        [ 0.1275],
        [ 0.2517],
        [ 0.0869],
        [ 0.0952],
        [ 0.0243],
        [ 0.1796],
        [ 0.1678],
        [ 0.0864],
        [ 0.1581],
        [ 0.2035],
        [ 0.0539],
     

ep  5: 100%|███████████████████████████████████████████████████████████████████████████| 28/28 [01:13<00:00,  2.62s/it]
valid:   0%|                                                                                     | 0/4 [00:00<?, ?it/s]

tensor([[-3.5090e-01, -1.8209e-01, -4.7367e-01,  ...,  2.6896e-01,
         -2.4115e-01,  1.2506e+04],
        [-3.9178e-01, -2.6303e-01, -2.8034e-01,  ...,  2.0237e-01,
         -2.2398e-01,  9.6275e+03],
        [-4.0232e-01, -2.5939e-01, -3.0630e-01,  ...,  1.5544e-01,
         -1.9869e-01,  9.7127e+03],
        ...,
        [-3.6615e-01, -2.6552e-01, -3.5836e-01,  ...,  9.1014e-02,
         -2.0625e-01,  1.2695e+04],
        [-4.1679e-01, -2.1557e-01, -4.2028e-01,  ...,  3.2078e-01,
         -1.4070e-01,  1.2595e+04],
        [-2.8214e-01, -3.4035e-01, -2.4910e-01,  ...,  1.4021e-01,
         -2.5478e-01,  1.5011e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[0.1553],
        [0.2862],
        [0.2658],
        [0.1014],
        [0.0438],
        [0.0146],
        [0.0480],
        [0.0918],
        [0.1518],
        [0.0647],
        [0.1652],
        [0.0851],
        [0.1604],
        [0.1432],
        [0.1171],
        [0.1230],
        [0.1557],
        [0.1461],
    

valid:  25%|███████████████████▎                                                         | 1/4 [00:02<00:07,  2.62s/it]

tensor([[-3.2233e-01, -2.8638e-01, -2.5438e-01,  ...,  1.1127e-01,
         -2.4928e-01,  8.2773e+03],
        [-3.6531e-01, -1.8541e-01, -3.2476e-01,  ...,  9.8329e-02,
         -2.3989e-01,  8.2921e+03],
        [-3.7271e-01, -3.1051e-01, -2.6006e-01,  ...,  1.0789e-01,
         -1.8490e-01,  8.4220e+03],
        ...,
        [-3.7780e-01, -2.1708e-01, -3.6814e-01,  ...,  2.7568e-01,
         -1.1368e-01,  7.2709e+03],
        [-3.9939e-01, -2.8329e-01, -2.7683e-01,  ...,  1.3335e-01,
         -2.4340e-01,  7.3509e+03],
        [-3.1813e-01, -1.7252e-01, -3.3864e-01,  ...,  1.8896e-01,
         -2.7303e-01,  7.1148e+03]], device='cuda:0')
tensor([[0.1270],
        [0.1356],
        [0.1370],
        [0.1371],
        [0.1372],
        [0.1372],
        [0.1372],
        [0.1372],
        [0.1372],
        [0.1372],
        [0.1372],
        [0.1372],
        [0.1372],
        [0.1372],
        [0.1372],
        [0.1372],
        [0.1372],
        [0.1372],
        [0.1372],
        [

valid:  50%|██████████████████████████████████████▌                                      | 2/4 [00:05<00:05,  2.64s/it]

tensor([[-2.6459e-01, -1.9816e-01, -2.3166e-01,  ...,  4.3366e-02,
         -2.4275e-01,  7.3657e+03],
        [-4.4351e-01, -1.6997e-01, -2.4599e-01,  ...,  5.0830e-02,
         -2.2045e-01,  7.4660e+03],
        [-3.6358e-01, -2.7964e-01, -2.1717e-01,  ...,  1.4754e-01,
         -2.0238e-01,  7.5556e+03],
        ...,
        [-3.0289e-01, -2.8608e-01, -3.0153e-01,  ...,  9.7022e-02,
         -2.0641e-01,  7.5523e+03],
        [-3.2801e-01, -2.4983e-01, -2.3538e-01,  ...,  1.0617e-01,
         -3.3416e-01,  7.9973e+03],
        [-3.5315e-01, -2.7097e-01, -2.6696e-01,  ...,  1.3142e-01,
         -2.4509e-01,  8.4248e+03]], device='cuda:0')
tensor([[0.1270],
        [0.1356],
        [0.1370],
        [0.1371],
        [0.1372],
        [0.1372],
        [0.1372],
        [0.1372],
        [0.1372],
        [0.1372],
        [0.1372],
        [0.1372],
        [0.1372],
        [0.1372],
        [0.1372],
        [0.1372],
        [0.1372],
        [0.1372],
        [0.1372],
        [

valid:  75%|█████████████████████████████████████████████████████████▊                   | 3/4 [00:07<00:02,  2.64s/it]

tensor([[-4.3092e-01, -2.7025e-01, -3.0428e-01,  ...,  1.7351e-01,
         -2.1021e-01,  8.2736e+03],
        [-3.9687e-01, -1.6447e-01, -3.1085e-01,  ...,  2.0987e-01,
         -1.9773e-01,  8.4973e+03],
        [-4.0648e-01, -2.3350e-01, -3.5499e-01,  ...,  1.4228e-01,
         -2.3323e-01,  8.8352e+03],
        ...,
        [-2.5792e-01, -2.7998e-01, -2.9789e-01,  ...,  6.9041e-02,
         -1.5714e-01,  1.1430e+04],
        [-3.6105e-01, -3.1575e-01, -2.2100e-01,  ...,  1.0072e-01,
         -2.3743e-01,  1.1417e+04],
        [-3.4809e-01, -2.3759e-01, -2.0738e-01,  ...,  1.4380e-01,
         -2.2184e-01,  1.1349e+04]], device='cuda:0')
tensor([[0.1270],
        [0.1356],
        [0.1370],
        [0.1371],
        [0.1372],
        [0.1372],
        [0.1372],
        [0.1372],
        [0.1372],
        [0.1372],
        [0.1372],
        [0.1372],
        [0.1372],
        [0.1372],
        [0.1372],
        [0.1372],
        [0.1372],
        [0.1372],
        [0.1372],
        [

valid: 100%|█████████████████████████████████████████████████████████████████████████████| 4/4 [00:08<00:00,  2.05s/it]
ep  6:   0%|                                                                                    | 0/28 [00:00<?, ?it/s]

tensor([[-3.0299e-01, -2.0548e-01, -3.7163e-01,  ...,  2.2204e-01,
         -2.4157e-01,  1.1479e+04],
        [-5.2351e-01, -1.3666e-01, -4.0322e-01,  ...,  1.6678e-01,
         -2.3607e-01,  1.1660e+04],
        [-2.8602e-01, -2.1112e-01, -2.7816e-01,  ...,  2.0929e-01,
         -2.4698e-01,  1.1616e+04],
        ...,
        [-4.1360e-01, -3.0982e-01, -2.5829e-01,  ...,  8.7611e-02,
         -2.1257e-01,  1.1642e+04],
        [-4.2525e-01, -2.9543e-01, -3.0947e-01,  ...,  7.3900e-02,
         -2.2249e-01,  1.1782e+04],
        [-2.6573e-01, -2.9025e-01, -3.4377e-01,  ...,  1.3080e-01,
         -1.8334e-01,  1.1734e+04]], device='cuda:0')
tensor([[0.1270],
        [0.1356],
        [0.1370],
        [0.1371],
        [0.1372],
        [0.1372],
        [0.1372]], device='cuda:0')
valid acc 0.5376884422110553


ep  6:   4%|██▋                                                                         | 1/28 [00:02<01:11,  2.65s/it]

tensor([[-3.5085e-01, -2.6410e-01, -2.9014e-01,  ...,  1.9831e-01,
         -3.1048e-01,  1.7321e+04],
        [-3.3167e-01, -2.5996e-01, -2.8100e-01,  ...,  3.4715e-02,
         -2.3055e-01,  1.3239e+04],
        [-3.9621e-01, -2.9420e-01, -3.1577e-01,  ...,  1.0237e-01,
         -3.2140e-01,  1.2496e+04],
        ...,
        [-3.5464e-01, -1.8187e-01, -2.0823e-01,  ...,  2.7590e-01,
         -2.3394e-01,  1.6205e+04],
        [-3.7365e-01, -3.5807e-01, -1.9128e-01,  ...,  1.3924e-01,
         -2.7441e-01,  1.5376e+04],
        [-4.3031e-01, -2.4337e-01, -2.5550e-01,  ...,  9.8720e-02,
         -2.2380e-01,  1.0498e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1901],
        [ 0.1409],
        [-0.0093],
        [ 0.1120],
        [ 0.0062],
        [ 0.1421],
        [ 0.1303],
        [ 0.2349],
        [-0.0653],
        [ 0.1564],
        [ 0.2001],
        [ 0.2084],
        [ 0.0194],
        [ 0.0283],
        [ 0.1144],
        [ 0.0617],
        [ 0.3361],
     

ep  6:   7%|█████▍                                                                      | 2/28 [00:05<01:08,  2.63s/it]

tensor([[-3.2742e-01, -2.7813e-01, -3.4496e-01,  ...,  1.7668e-01,
         -2.1424e-01,  1.7169e+04],
        [-3.6974e-01, -2.8062e-01, -2.9029e-01,  ...,  1.4315e-01,
         -2.4764e-01,  1.2261e+04],
        [-3.5208e-01, -2.7221e-01, -2.9426e-01,  ...,  1.4224e-01,
         -2.2485e-01,  1.1897e+04],
        ...,
        [-3.7211e-01, -1.8108e-01, -2.7783e-01,  ...,  6.3773e-02,
         -3.2382e-01,  1.1382e+04],
        [-4.0474e-01, -2.0529e-01, -2.9639e-01,  ...,  3.2189e-01,
         -2.3843e-01,  1.5276e+04],
        [-3.5451e-01, -1.6673e-01, -4.3754e-01,  ...,  3.5644e-01,
         -1.6787e-01,  1.5875e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1006],
        [ 0.2615],
        [ 0.1665],
        [ 0.1402],
        [ 0.1172],
        [ 0.1140],
        [-0.0697],
        [ 0.2078],
        [ 0.1575],
        [ 0.2414],
        [ 0.1783],
        [ 0.1147],
        [ 0.0972],
        [ 0.1003],
        [ 0.0858],
        [ 0.1158],
        [ 0.0826],
     

ep  6:  11%|████████▏                                                                   | 3/28 [00:07<01:05,  2.62s/it]

tensor([[-3.5178e-01, -2.7942e-01, -3.0645e-01,  ...,  1.0903e-01,
         -2.2553e-01,  1.2789e+04],
        [-3.6334e-01, -2.3575e-01, -3.1307e-01,  ...,  2.7783e-01,
         -2.3013e-01,  1.4036e+04],
        [-3.4870e-01, -2.8029e-01, -2.2123e-01,  ...,  2.1715e-01,
         -3.0025e-01,  1.6659e+04],
        ...,
        [-3.6857e-01, -3.6462e-01, -2.4759e-01,  ...,  1.0575e-02,
         -1.9837e-01,  1.7071e+04],
        [-3.8660e-01, -2.7442e-01, -3.1972e-01,  ...,  1.8217e-01,
         -1.8381e-01,  1.6513e+04],
        [-3.4090e-01, -2.5975e-01, -2.8152e-01,  ...,  1.0869e-01,
         -2.6632e-01,  1.2950e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[0.0717],
        [0.0625],
        [0.1234],
        [0.1157],
        [0.0176],
        [0.2150],
        [0.0620],
        [0.0708],
        [0.1731],
        [0.0968],
        [0.0877],
        [0.0360],
        [0.0944],
        [0.1549],
        [0.0691],
        [0.1141],
        [0.2291],
        [0.0548],
    

ep  6:  14%|██████████▊                                                                 | 4/28 [00:10<01:03,  2.63s/it]

tensor([[-4.4120e-01, -2.0841e-01, -2.9606e-01,  ...,  2.0397e-01,
         -2.1738e-01,  1.5885e+04],
        [-3.0649e-01, -2.2021e-01, -2.2563e-01,  ...,  7.5228e-02,
         -1.9603e-01,  1.2218e+04],
        [-3.8586e-01, -2.7884e-01, -3.2816e-01,  ...,  2.5057e-01,
         -2.1443e-01,  1.0451e+04],
        ...,
        [-3.6401e-01, -2.1846e-01, -2.4990e-01,  ...,  1.9481e-01,
         -2.9688e-01,  1.6561e+04],
        [-3.4411e-01, -3.4520e-01, -1.9580e-01,  ...,  1.5451e-01,
         -2.1523e-01,  1.6528e+04],
        [-2.3561e-01, -2.6714e-01, -1.9140e-01,  ...,  3.6359e-02,
         -2.6081e-01,  1.2447e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0390],
        [ 0.2024],
        [ 0.2662],
        [ 0.1757],
        [ 0.0325],
        [ 0.0244],
        [ 0.1755],
        [ 0.1265],
        [ 0.1082],
        [ 0.0422],
        [ 0.0566],
        [ 0.0976],
        [ 0.1032],
        [ 0.0764],
        [-0.0173],
        [-0.0502],
        [ 0.1987],
     

ep  6:  18%|█████████████▌                                                              | 5/28 [00:13<01:00,  2.64s/it]

tensor([[-2.3377e-01, -2.8372e-01, -2.5461e-01,  ...,  2.3917e-02,
         -2.7008e-01,  8.3595e+03],
        [-2.9688e-01, -3.9777e-01, -2.7672e-01,  ...,  2.1638e-01,
         -2.6658e-01,  1.6478e+04],
        [-3.4631e-01, -2.3089e-01, -3.1390e-01,  ...,  1.4256e-01,
         -2.5063e-01,  1.0761e+04],
        ...,
        [-2.9732e-01, -2.2647e-01, -2.6413e-01,  ...,  1.7705e-01,
         -2.6443e-01,  1.6041e+04],
        [-3.0835e-01, -1.7722e-01, -3.3259e-01,  ...,  1.6746e-01,
         -2.6243e-01,  1.2727e+04],
        [-3.4672e-01, -2.4180e-01, -3.1148e-01,  ...,  8.9279e-02,
         -2.5598e-01,  1.0860e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1894],
        [ 0.1518],
        [ 0.1045],
        [ 0.1257],
        [ 0.2493],
        [ 0.1093],
        [-0.0483],
        [ 0.2696],
        [ 0.2922],
        [ 0.2027],
        [ 0.1852],
        [ 0.1807],
        [ 0.2447],
        [ 0.1768],
        [ 0.1860],
        [ 0.1397],
        [ 0.0655],
     

ep  6:  21%|████████████████▎                                                           | 6/28 [00:15<00:57,  2.63s/it]

tensor([[-3.7990e-01, -3.1110e-01, -3.0691e-01,  ...,  1.5624e-01,
         -2.2072e-01,  1.6947e+04],
        [-4.0753e-01, -2.7717e-01, -2.8266e-01,  ...,  1.4739e-01,
         -2.8851e-01,  1.2273e+04],
        [-2.9212e-01, -2.5928e-01, -2.7701e-01,  ...,  5.6322e-02,
         -2.7968e-01,  1.0662e+04],
        ...,
        [-4.0971e-01, -2.0467e-01, -2.5034e-01,  ...,  1.3792e-01,
         -2.2979e-01,  9.3982e+03],
        [-3.4737e-01, -2.7338e-01, -2.5358e-01,  ...,  1.9366e-01,
         -1.9494e-01,  1.0174e+04],
        [-3.4580e-01, -2.6731e-01, -2.6024e-01,  ...,  1.3357e-01,
         -2.7728e-01,  1.3214e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0498],
        [ 0.1073],
        [ 0.0453],
        [ 0.1927],
        [ 0.2318],
        [ 0.1042],
        [ 0.2549],
        [ 0.1223],
        [ 0.0841],
        [ 0.1326],
        [ 0.0182],
        [ 0.0792],
        [ 0.0929],
        [ 0.0641],
        [ 0.1280],
        [ 0.0622],
        [ 0.1560],
     

ep  6:  25%|███████████████████                                                         | 7/28 [00:18<00:55,  2.62s/it]

tensor([[-4.1310e-01, -2.3542e-01, -2.4912e-01,  ...,  1.0974e-01,
         -2.5079e-01,  1.7623e+04],
        [-2.4638e-01, -3.0871e-01, -2.3775e-01,  ...,  9.7940e-02,
         -2.3311e-01,  1.1755e+04],
        [-3.4090e-01, -3.0876e-01, -2.6475e-01,  ...,  1.1471e-01,
         -1.9358e-01,  1.7540e+04],
        ...,
        [-3.7077e-01, -3.0289e-01, -2.4539e-01,  ...,  1.3344e-01,
         -1.6579e-01,  1.7936e+04],
        [-3.7393e-01, -2.9342e-01, -2.3398e-01,  ...,  1.5921e-02,
         -2.3756e-01,  1.3233e+04],
        [-3.3644e-01, -2.8334e-01, -2.3474e-01,  ...,  4.9470e-02,
         -2.8136e-01,  1.2501e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0735],
        [ 0.2508],
        [ 0.1465],
        [-0.0023],
        [ 0.1842],
        [ 0.0862],
        [ 0.2032],
        [ 0.0585],
        [ 0.2031],
        [ 0.0216],
        [ 0.1451],
        [ 0.1379],
        [ 0.0754],
        [ 0.1236],
        [ 0.1274],
        [ 0.0664],
        [ 0.2393],
     

ep  6:  29%|█████████████████████▋                                                      | 8/28 [00:21<00:52,  2.63s/it]

tensor([[-3.4652e-01, -2.9041e-01, -2.6480e-01,  ...,  2.2792e-01,
         -2.5364e-01,  1.7384e+04],
        [-4.4004e-01, -1.9385e-01, -2.5943e-01,  ...,  1.0500e-01,
         -2.4399e-01,  1.5944e+04],
        [-3.1692e-01, -3.4895e-01, -1.8473e-01,  ...,  3.9459e-02,
         -2.7700e-01,  1.5915e+04],
        ...,
        [-2.9989e-01, -1.7895e-01, -2.7354e-01,  ...,  1.7704e-01,
         -2.5332e-01,  1.6351e+04],
        [-3.6165e-01, -2.1392e-01, -2.8349e-01,  ...,  4.2911e-02,
         -2.9529e-01,  1.2170e+04],
        [-3.4637e-01, -2.9712e-01, -2.9307e-01,  ...,  6.4643e-02,
         -2.2825e-01,  1.0897e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1130],
        [ 0.0226],
        [ 0.0493],
        [ 0.1353],
        [ 0.1947],
        [ 0.2120],
        [ 0.1448],
        [ 0.2588],
        [ 0.0464],
        [ 0.1867],
        [ 0.1593],
        [ 0.1883],
        [ 0.2011],
        [ 0.1582],
        [-0.0779],
        [ 0.2774],
        [ 0.0060],
     

ep  6:  32%|████████████████████████▍                                                   | 9/28 [00:23<00:49,  2.62s/it]

tensor([[-4.7838e-01, -1.6113e-01, -2.6515e-01,  ...,  1.1456e-01,
         -2.1854e-01,  1.0363e+04],
        [-3.4225e-01, -2.5272e-01, -2.8888e-01,  ...,  2.2232e-01,
         -2.7486e-01,  1.7281e+04],
        [-4.0297e-01, -1.7084e-01, -2.7890e-01,  ...,  1.4940e-01,
         -2.8129e-01,  1.7695e+04],
        ...,
        [-3.6621e-01, -4.2176e-01, -2.4033e-01,  ...,  1.9047e-01,
         -2.6911e-01,  1.7730e+04],
        [-4.1678e-01, -2.3948e-01, -2.0479e-01,  ...,  1.3788e-01,
         -1.7337e-01,  1.8289e+04],
        [-4.1617e-01, -3.0448e-01, -2.3956e-01,  ...,  1.6618e-01,
         -2.4083e-01,  1.2617e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0900],
        [ 0.2166],
        [ 0.1315],
        [ 0.0777],
        [ 0.3283],
        [ 0.1564],
        [ 0.0946],
        [ 0.1347],
        [ 0.2182],
        [ 0.1921],
        [ 0.2140],
        [ 0.2512],
        [ 0.2905],
        [ 0.0474],
        [ 0.0406],
        [ 0.1617],
        [ 0.1944],
     

ep  6:  36%|██████████████████████████▊                                                | 10/28 [00:26<00:47,  2.62s/it]

tensor([[-3.6366e-01, -2.3304e-01, -2.5642e-01,  ...,  5.0859e-02,
         -2.3503e-01,  1.1362e+04],
        [-3.3021e-01, -2.9705e-01, -3.1483e-01,  ...,  1.0274e-01,
         -2.6637e-01,  1.1985e+04],
        [-4.1704e-01, -3.5060e-01, -3.6578e-01,  ...,  6.8910e-02,
         -2.2357e-01,  9.8299e+03],
        ...,
        [-3.6370e-01, -2.9032e-01, -2.3027e-01,  ...,  1.1408e-01,
         -1.9507e-01,  8.7631e+03],
        [-2.9820e-01, -2.4000e-01, -2.5479e-01,  ...,  1.0228e-01,
         -1.8715e-01,  1.0012e+04],
        [-2.7144e-01, -2.0806e-01, -3.2938e-01,  ...,  2.7738e-01,
         -3.0361e-01,  1.6738e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1743],
        [ 0.0545],
        [ 0.0538],
        [ 0.1737],
        [ 0.0848],
        [ 0.1636],
        [ 0.1459],
        [ 0.0234],
        [ 0.1022],
        [ 0.3550],
        [ 0.0315],
        [ 0.0847],
        [ 0.0430],
        [ 0.1464],
        [ 0.1523],
        [ 0.1926],
        [ 0.0369],
     

ep  6:  39%|█████████████████████████████▍                                             | 11/28 [00:28<00:44,  2.62s/it]

tensor([[-3.9310e-01, -1.7828e-01, -4.0080e-01,  ...,  2.4722e-01,
         -1.6888e-01,  1.5233e+04],
        [-3.7771e-01, -2.3426e-01, -3.2659e-01,  ...,  1.7354e-01,
         -1.8089e-01,  1.6983e+04],
        [-4.3528e-01, -3.8054e-01, -2.9146e-01,  ...,  1.6834e-01,
         -1.4699e-01,  1.2162e+04],
        ...,
        [-2.8631e-01, -2.6224e-01, -2.6302e-01,  ...,  1.1051e-01,
         -3.1048e-01,  8.4470e+03],
        [-2.9666e-01, -2.1421e-01, -2.2482e-01,  ...,  8.0261e-02,
         -1.9242e-01,  1.2760e+04],
        [-2.9683e-01, -1.9551e-01, -3.4187e-01,  ...,  1.2456e-01,
         -2.8549e-01,  1.0406e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.2111],
        [ 0.1949],
        [ 0.1950],
        [ 0.1018],
        [ 0.1100],
        [ 0.1348],
        [ 0.0246],
        [ 0.1174],
        [ 0.1226],
        [ 0.1312],
        [ 0.0304],
        [ 0.2912],
        [ 0.0844],
        [ 0.2239],
        [ 0.2333],
        [ 0.2122],
        [ 0.0272],
     

ep  6:  43%|████████████████████████████████▏                                          | 12/28 [00:31<00:41,  2.62s/it]

tensor([[-2.7821e-01, -2.3811e-01, -2.2439e-01,  ...,  8.5796e-02,
         -2.5010e-01,  1.2987e+04],
        [-3.5843e-01, -2.6380e-01, -2.2807e-01,  ...,  1.0155e-01,
         -2.5886e-01,  1.2409e+04],
        [-3.9702e-01, -2.9096e-01, -2.5862e-01,  ...,  2.2798e-01,
         -2.7268e-01,  1.7959e+04],
        ...,
        [-3.8727e-01, -1.5609e-01, -4.2065e-01,  ...,  2.6407e-01,
         -1.4835e-01,  1.2482e+04],
        [-3.6486e-01, -2.5011e-01, -3.3106e-01,  ...,  9.7190e-02,
         -1.6923e-01,  1.0067e+04],
        [-2.8568e-01, -2.8401e-01, -2.4899e-01,  ...,  1.1756e-01,
         -2.1615e-01,  1.5192e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1376],
        [ 0.1044],
        [ 0.1214],
        [ 0.1263],
        [ 0.1752],
        [ 0.2290],
        [ 0.2439],
        [ 0.1485],
        [ 0.0800],
        [ 0.0711],
        [ 0.1794],
        [ 0.0486],
        [ 0.1687],
        [ 0.0744],
        [ 0.2148],
        [ 0.2761],
        [ 0.1347],
     

ep  6:  46%|██████████████████████████████████▊                                        | 13/28 [00:34<00:39,  2.62s/it]

tensor([[-4.5871e-01,  6.6423e-02, -6.3787e-01,  ...,  5.4805e-01,
         -7.7473e-02,  1.1164e+04],
        [-2.6927e-01, -2.8306e-01, -2.5984e-01,  ...,  1.0288e-01,
         -2.2030e-01,  1.6907e+04],
        [-3.7059e-01, -2.4359e-01, -3.0042e-01,  ...,  2.5463e-02,
         -2.2840e-01,  1.2130e+04],
        ...,
        [-3.2438e-01, -2.9801e-01, -2.4029e-01,  ...,  1.8749e-01,
         -1.7425e-01,  1.5570e+04],
        [-3.8834e-01, -2.6819e-01, -3.7788e-01,  ...,  1.7679e-01,
         -1.2899e-01,  9.2866e+03],
        [-3.3192e-01, -2.1855e-01, -2.1262e-01,  ...,  4.2314e-02,
         -2.3632e-01,  1.2985e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0538],
        [ 0.1431],
        [ 0.4042],
        [ 0.1898],
        [ 0.1797],
        [ 0.1963],
        [ 0.0903],
        [ 0.0790],
        [ 0.1329],
        [ 0.1661],
        [ 0.0464],
        [ 0.0952],
        [ 0.1759],
        [ 0.1299],
        [ 0.0868],
        [ 0.1733],
        [ 0.0991],
     

ep  6:  50%|█████████████████████████████████████▌                                     | 14/28 [00:36<00:36,  2.62s/it]

tensor([[-3.7376e-01, -2.2932e-01, -2.3916e-01,  ...,  7.8433e-02,
         -2.4382e-01,  9.3380e+03],
        [-3.8156e-01, -1.9360e-01, -2.2043e-01,  ...,  1.3772e-01,
         -1.9764e-01,  1.6519e+04],
        [-2.6942e-01, -2.5947e-01, -2.8926e-01,  ...,  5.6003e-02,
         -2.1277e-01,  1.3474e+04],
        ...,
        [-2.8401e-01, -2.0169e-01, -1.3791e-01,  ...,  9.9516e-02,
         -2.0750e-01,  1.7410e+04],
        [-4.1825e-01, -2.5720e-01, -2.1029e-01,  ...,  1.9436e-01,
         -2.3304e-01,  1.2930e+04],
        [-4.0524e-01, -2.1045e-01, -4.4603e-01,  ...,  2.0014e-01,
         -2.2614e-01,  9.3202e+03]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[0.0891],
        [0.0875],
        [0.1271],
        [0.1752],
        [0.1877],
        [0.1826],
        [0.2168],
        [0.1602],
        [0.1339],
        [0.1278],
        [0.1120],
        [0.1293],
        [0.1725],
        [0.2160],
        [0.0693],
        [0.1378],
        [0.1869],
        [0.0782],
    

ep  6:  54%|████████████████████████████████████████▏                                  | 15/28 [00:39<00:34,  2.62s/it]

tensor([[-3.3581e-01, -1.2365e-01, -2.9591e-01,  ...,  3.0886e-01,
         -2.2482e-01,  1.7716e+04],
        [-3.9657e-01, -1.6828e-01, -3.1565e-01,  ...,  6.4122e-02,
         -3.0275e-01,  1.0603e+04],
        [-2.9910e-01, -3.3945e-01, -2.4857e-01,  ...,  7.9395e-02,
         -3.0249e-01,  1.4946e+04],
        ...,
        [-3.5195e-01, -2.5248e-01, -2.6574e-01,  ...,  1.7846e-01,
         -1.3278e-01,  1.6865e+04],
        [-2.8846e-01, -3.1223e-01, -2.8318e-01,  ...,  7.3408e-02,
         -2.3547e-01,  9.7627e+03],
        [-3.3997e-01, -2.7893e-01, -2.0655e-01,  ...,  2.8164e-02,
         -2.0303e-01,  1.3198e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0791],
        [ 0.0839],
        [ 0.1439],
        [ 0.1020],
        [ 0.2064],
        [ 0.1120],
        [ 0.0695],
        [ 0.1178],
        [ 0.2210],
        [ 0.1411],
        [ 0.1999],
        [ 0.1749],
        [ 0.0029],
        [ 0.0611],
        [ 0.1130],
        [ 0.2073],
        [ 0.2140],
     

ep  6:  57%|██████████████████████████████████████████▊                                | 16/28 [00:41<00:31,  2.62s/it]

tensor([[-2.8456e-01, -2.7079e-01, -1.9789e-01,  ...,  1.2428e-01,
         -2.2128e-01,  1.1152e+04],
        [-3.9881e-01, -1.7040e-01, -4.0099e-01,  ...,  2.6061e-01,
         -2.0786e-01,  1.2170e+04],
        [-4.1233e-01, -2.9614e-01, -2.5443e-01,  ...,  1.1757e-01,
         -2.5338e-01,  1.5130e+04],
        ...,
        [-4.0763e-01, -2.4197e-01, -4.5180e-01,  ...,  3.8159e-01,
         -2.1892e-01,  8.3249e+03],
        [-3.4091e-01, -3.1307e-01, -2.0509e-01,  ...,  1.1535e-01,
         -2.2138e-01,  1.3228e+04],
        [-4.2122e-01, -2.5978e-01, -2.8134e-01,  ...,  5.5976e-02,
         -1.2798e-01,  1.2742e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0984],
        [ 0.0527],
        [ 0.1671],
        [ 0.1041],
        [ 0.0646],
        [ 0.0771],
        [ 0.2282],
        [ 0.2553],
        [-0.0221],
        [ 0.1935],
        [ 0.1684],
        [ 0.1885],
        [ 0.2447],
        [ 0.0722],
        [ 0.0487],
        [ 0.0424],
        [ 0.2128],
     

ep  6:  61%|█████████████████████████████████████████████▌                             | 17/28 [00:44<00:28,  2.62s/it]

tensor([[-3.5802e-01, -2.8762e-01, -2.4838e-01,  ...,  1.4790e-01,
         -2.1459e-01,  1.2471e+04],
        [-3.2163e-01, -3.1684e-01, -1.8236e-01,  ...,  2.0404e-01,
         -2.2959e-01,  1.7191e+04],
        [-3.4195e-01, -2.3057e-01, -3.0019e-01,  ...,  1.9021e-01,
         -2.1422e-01,  1.0380e+04],
        ...,
        [-3.5940e-01, -2.6507e-01, -2.9463e-01,  ...,  1.6665e-01,
         -2.2857e-01,  1.0566e+04],
        [-3.8952e-01, -3.1477e-01, -2.4709e-01,  ...,  5.5740e-02,
         -2.6265e-01,  1.3096e+04],
        [-2.7715e-01, -1.5341e-02, -5.3145e-01,  ...,  4.3553e-01,
         -8.8939e-02,  1.1096e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0602],
        [ 0.1215],
        [ 0.2175],
        [ 0.1385],
        [-0.0488],
        [ 0.1484],
        [-0.0184],
        [ 0.1987],
        [ 0.2589],
        [ 0.0974],
        [ 0.2047],
        [ 0.0435],
        [ 0.2352],
        [ 0.2329],
        [ 0.1020],
        [ 0.1852],
        [ 0.2120],
     

ep  6:  64%|████████████████████████████████████████████████▏                          | 18/28 [00:47<00:26,  2.63s/it]

tensor([[-3.8245e-01, -2.7682e-01, -2.2398e-01,  ...,  2.3309e-01,
         -2.1832e-01,  8.5041e+03],
        [-3.5857e-01, -2.2556e-01, -3.1320e-01,  ...,  2.2174e-01,
         -1.9640e-01,  1.1125e+04],
        [-2.6893e-01, -2.7250e-01, -1.9795e-01,  ...,  7.6996e-02,
         -2.5742e-01,  1.2811e+04],
        ...,
        [-3.5558e-01, -1.3614e-01, -2.4997e-01,  ...,  2.3521e-01,
         -2.5932e-01,  1.7501e+04],
        [-3.2223e-01, -2.5085e-01, -2.6893e-01,  ...,  9.8092e-02,
         -2.0445e-01,  8.3317e+03],
        [-3.4192e-01, -2.8557e-01, -2.7824e-01,  ...,  9.6386e-02,
         -2.1693e-01,  1.1955e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0878],
        [-0.0040],
        [ 0.0740],
        [ 0.1229],
        [ 0.1379],
        [ 0.1535],
        [ 0.2204],
        [ 0.1498],
        [ 0.1622],
        [ 0.1792],
        [ 0.0650],
        [ 0.0869],
        [ 0.1465],
        [ 0.2250],
        [ 0.2285],
        [ 0.0876],
        [ 0.1475],
     

ep  6:  68%|██████████████████████████████████████████████████▉                        | 19/28 [00:49<00:23,  2.63s/it]

tensor([[-3.2233e-01, -2.1326e-01, -3.0240e-01,  ...,  1.2443e-01,
         -2.3535e-01,  1.0451e+04],
        [-3.9675e-01, -3.6831e-01, -2.8803e-01,  ...,  1.5334e-01,
         -3.0226e-01,  1.4975e+04],
        [-3.2584e-01, -2.9469e-01, -2.2359e-01,  ...,  1.2720e-01,
         -2.4338e-01,  1.0144e+04],
        ...,
        [-4.1816e-01, -2.1277e-01, -4.8117e-01,  ...,  1.7365e-01,
         -1.7194e-01,  1.1555e+04],
        [-3.1105e-01, -3.3583e-01, -1.2759e-01,  ...,  1.7681e-01,
         -2.4575e-01,  1.2420e+04],
        [-3.2349e-01, -2.3472e-01, -2.6363e-01,  ...,  1.1278e-01,
         -2.9590e-01,  1.2416e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0713],
        [ 0.1665],
        [ 0.2422],
        [ 0.1274],
        [ 0.0079],
        [ 0.0587],
        [-0.0076],
        [ 0.0839],
        [ 0.1011],
        [ 0.0966],
        [ 0.0859],
        [ 0.1726],
        [ 0.1310],
        [ 0.0765],
        [ 0.0309],
        [ 0.0941],
        [ 0.1302],
     

ep  6:  71%|█████████████████████████████████████████████████████▌                     | 20/28 [00:52<00:21,  2.63s/it]

tensor([[-4.1961e-01, -2.3234e-01, -2.0888e-01,  ...,  8.3719e-02,
         -2.2842e-01,  1.0402e+04],
        [-3.4392e-01, -2.5117e-01, -2.4104e-01,  ...,  1.1470e-01,
         -2.1081e-01,  1.0409e+04],
        [-2.6720e-01, -3.1796e-01, -2.8362e-01,  ...,  1.9340e-01,
         -2.5320e-01,  1.7620e+04],
        ...,
        [-3.1228e-01, -1.8379e-01, -4.0735e-01,  ...,  2.5088e-01,
         -2.0779e-01,  1.2190e+04],
        [-3.4126e-01, -2.9451e-01, -2.0882e-01,  ...,  8.6014e-02,
         -2.1372e-01,  1.5318e+04],
        [-3.0276e-01, -3.1938e-01, -2.4883e-01,  ...,  9.7528e-02,
         -2.4566e-01,  1.3495e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[0.1452],
        [0.1805],
        [0.1496],
        [0.1397],
        [0.0754],
        [0.1140],
        [0.1322],
        [0.2444],
        [0.0062],
        [0.1741],
        [0.0944],
        [0.1548],
        [0.0372],
        [0.0905],
        [0.1706],
        [0.2216],
        [0.1184],
        [0.0068],
    

ep  6:  75%|████████████████████████████████████████████████████████▎                  | 21/28 [00:55<00:18,  2.64s/it]

tensor([[-3.7451e-01, -2.1136e-01, -2.8176e-01,  ...,  1.9455e-01,
         -2.3171e-01,  1.7751e+04],
        [-3.2536e-01, -2.2106e-01, -2.6107e-01,  ...,  1.0786e-01,
         -2.7994e-01,  1.1707e+04],
        [-3.4821e-01, -2.8013e-01, -2.7043e-01,  ...,  1.3145e-01,
         -1.6341e-01,  1.8037e+04],
        ...,
        [-2.8478e-01, -2.9509e-01, -2.7397e-01,  ...,  8.5816e-02,
         -2.5059e-01,  1.6699e+04],
        [-4.3549e-01, -3.0192e-01, -2.2939e-01,  ...,  1.5671e-01,
         -2.4108e-01,  1.8144e+04],
        [-3.8531e-01, -3.0977e-01, -2.6267e-01,  ...,  1.3327e-01,
         -2.5768e-01,  1.3565e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0792],
        [ 0.0829],
        [ 0.1397],
        [ 0.2172],
        [ 0.1749],
        [ 0.1612],
        [ 0.1229],
        [ 0.1816],
        [ 0.1655],
        [ 0.0020],
        [ 0.1654],
        [ 0.0814],
        [ 0.3043],
        [ 0.1693],
        [ 0.1169],
        [-0.0073],
        [ 0.1947],
     

ep  6:  79%|██████████████████████████████████████████████████████████▉                | 22/28 [00:57<00:15,  2.63s/it]

tensor([[-3.8745e-01, -2.9612e-01, -2.4204e-01,  ...,  7.5110e-02,
         -2.5498e-01,  1.0572e+04],
        [-3.0266e-01, -3.3814e-01, -2.5776e-01,  ...,  6.8356e-02,
         -1.7547e-01,  1.5451e+04],
        [-3.2119e-01, -2.3294e-01, -2.4555e-01,  ...,  1.3400e-01,
         -2.2440e-01,  8.4735e+03],
        ...,
        [-3.5502e-01, -1.5834e-01, -2.1980e-01,  ...,  1.8159e-01,
         -2.7958e-01,  1.7930e+04],
        [-3.8156e-01, -2.7871e-01, -2.8138e-01,  ...,  1.0885e-01,
         -1.5609e-01,  1.0393e+04],
        [-2.7272e-01, -1.8165e-01, -2.5444e-01,  ...,  1.5176e-01,
         -3.0181e-01,  1.0968e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0644],
        [ 0.3080],
        [ 0.0813],
        [ 0.1665],
        [ 0.1938],
        [ 0.1159],
        [ 0.0749],
        [ 0.0636],
        [ 0.1023],
        [ 0.2385],
        [ 0.0582],
        [ 0.1371],
        [ 0.0384],
        [ 0.1089],
        [ 0.0108],
        [ 0.0468],
        [ 0.1214],
     

ep  6:  82%|█████████████████████████████████████████████████████████████▌             | 23/28 [01:00<00:13,  2.63s/it]

tensor([[-3.8209e-01, -2.7172e-01, -2.0082e-01,  ...,  9.2717e-02,
         -2.6380e-01,  1.1011e+04],
        [-3.5467e-01, -1.8264e-01, -4.7781e-01,  ...,  2.6568e-01,
         -2.4733e-01,  1.1859e+04],
        [-3.9941e-01, -2.5686e-01, -2.1903e-01,  ...,  1.8389e-01,
         -1.2521e-01,  1.7403e+04],
        ...,
        [-4.2370e-01, -1.3270e-01, -3.4587e-01,  ...,  1.2910e-01,
         -2.2732e-01,  1.2042e+04],
        [-3.9586e-01, -1.7648e-01, -3.3350e-01,  ...,  1.5961e-01,
         -1.2337e-01,  1.3557e+04],
        [-4.1933e-01, -1.9364e-01, -2.8014e-01,  ...,  2.0734e-01,
         -2.0869e-01,  1.7070e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0532],
        [ 0.1814],
        [ 0.2166],
        [ 0.0071],
        [ 0.1322],
        [ 0.1898],
        [ 0.1271],
        [ 0.1019],
        [ 0.1318],
        [ 0.1385],
        [ 0.2817],
        [ 0.2276],
        [ 0.1156],
        [ 0.1357],
        [ 0.1194],
        [ 0.1331],
        [ 0.1525],
     

ep  6:  86%|████████████████████████████████████████████████████████████████▎          | 24/28 [01:03<00:10,  2.62s/it]

tensor([[-3.8677e-01, -2.6890e-01, -2.7455e-01,  ...,  1.5549e-01,
         -1.8929e-01,  1.6413e+04],
        [-3.4040e-01, -1.7519e-01, -3.0696e-01,  ...,  9.3887e-02,
         -2.8799e-01,  1.2303e+04],
        [-3.0646e-01, -3.5051e-01, -2.9944e-01,  ...,  1.3598e-01,
         -2.0023e-01,  1.6269e+04],
        ...,
        [-3.4567e-01, -2.9503e-01, -3.4503e-01,  ...,  1.7088e-01,
         -2.0026e-01,  1.6458e+04],
        [-4.0487e-01, -3.0175e-01, -2.4833e-01,  ...,  1.3063e-01,
         -3.4350e-01,  1.2982e+04],
        [-4.0534e-01, -2.9946e-01, -2.3584e-01,  ...,  1.3014e-01,
         -2.4468e-01,  9.3701e+03]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0790],
        [ 0.1971],
        [ 0.1073],
        [ 0.1399],
        [ 0.1922],
        [ 0.0984],
        [ 0.0948],
        [ 0.2436],
        [ 0.0602],
        [ 0.1432],
        [ 0.2287],
        [ 0.1588],
        [ 0.0895],
        [ 0.2216],
        [ 0.0333],
        [ 0.2378],
        [ 0.0738],
     

ep  6:  89%|██████████████████████████████████████████████████████████████████▉        | 25/28 [01:05<00:07,  2.64s/it]

tensor([[-3.2754e-01, -2.7964e-01, -3.8839e-01,  ...,  1.7841e-01,
         -3.0217e-01,  1.2351e+04],
        [-3.4825e-01, -3.0507e-01, -2.1029e-01,  ...,  1.3870e-01,
         -1.8502e-01,  1.2560e+04],
        [-3.5871e-01, -2.1447e-01, -1.9529e-01,  ...,  1.0849e-01,
         -2.7851e-01,  1.8011e+04],
        ...,
        [-4.0161e-01, -2.1939e-01, -3.6434e-01,  ...,  2.3697e-01,
         -1.6834e-01,  9.9858e+03],
        [-3.8693e-01, -1.5302e-01, -3.5033e-01,  ...,  2.5264e-01,
         -2.6726e-01,  1.0680e+04],
        [-4.1505e-01, -3.0561e-01, -3.0559e-01,  ...,  1.7189e-01,
         -2.8727e-01,  1.1578e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1215],
        [ 0.2444],
        [ 0.2140],
        [ 0.1494],
        [ 0.1695],
        [ 0.1799],
        [ 0.1813],
        [ 0.0800],
        [ 0.2425],
        [ 0.0991],
        [ 0.0092],
        [ 0.1442],
        [-0.1177],
        [ 0.0488],
        [ 0.1530],
        [ 0.1029],
        [ 0.0834],
     

ep  6:  93%|█████████████████████████████████████████████████████████████████████▋     | 26/28 [01:08<00:05,  2.64s/it]

tensor([[-3.5546e-01, -3.7159e-01, -2.5867e-01,  ...,  1.2280e-01,
         -2.5107e-01,  1.8024e+04],
        [-3.1369e-01, -2.8385e-01, -2.6461e-01,  ...,  4.5594e-02,
         -2.5789e-01,  1.2966e+04],
        [-3.6457e-01, -1.8432e-01, -2.4524e-01,  ...,  1.6750e-01,
         -2.6046e-01,  1.6977e+04],
        ...,
        [-3.1120e-01, -3.5694e-01, -2.6379e-01,  ...,  1.1928e-01,
         -2.4170e-01,  1.8299e+04],
        [-2.4473e-01, -2.8487e-01, -2.5371e-01,  ...,  6.4757e-02,
         -3.1452e-01,  1.6846e+04],
        [-4.1046e-01, -1.7860e-01, -3.5421e-01,  ...,  1.2686e-01,
         -2.2685e-01,  1.1867e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0118],
        [ 0.0841],
        [ 0.0437],
        [ 0.0822],
        [ 0.1496],
        [ 0.1812],
        [ 0.0862],
        [ 0.1796],
        [ 0.0751],
        [ 0.1613],
        [ 0.1864],
        [ 0.2897],
        [ 0.1011],
        [ 0.1071],
        [ 0.0122],
        [ 0.1328],
        [ 0.1817],
     

ep  6:  96%|████████████████████████████████████████████████████████████████████████▎  | 27/28 [01:10<00:02,  2.64s/it]

tensor([[-3.8723e-01, -1.7627e-01, -2.4344e-01,  ...,  9.0365e-02,
         -2.2824e-01,  1.0121e+04],
        [-3.7621e-01, -3.6584e-01, -2.6072e-01,  ...,  6.8969e-02,
         -2.9231e-01,  1.3009e+04],
        [-3.7953e-01, -2.3303e-01, -3.5064e-01,  ...,  2.3095e-01,
         -1.7769e-01,  1.3973e+04],
        ...,
        [-3.6768e-01, -2.6759e-01, -2.4751e-01,  ...,  1.2977e-01,
         -2.4302e-01,  9.7917e+03],
        [-3.4771e-01, -2.8546e-01, -2.2881e-01,  ...,  5.0797e-02,
         -2.5734e-01,  1.2062e+04],
        [-3.3951e-01, -2.2406e-01, -2.8496e-01,  ...,  1.9445e-01,
         -2.4090e-01,  1.6988e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1866],
        [ 0.0496],
        [ 0.1030],
        [ 0.1037],
        [ 0.1450],
        [ 0.2118],
        [ 0.1923],
        [ 0.2027],
        [ 0.1908],
        [ 0.1489],
        [ 0.1549],
        [ 0.0860],
        [ 0.0532],
        [ 0.2949],
        [ 0.0252],
        [ 0.0669],
        [ 0.1448],
     

ep  6: 100%|███████████████████████████████████████████████████████████████████████████| 28/28 [01:13<00:00,  2.63s/it]
valid:   0%|                                                                                     | 0/4 [00:00<?, ?it/s]

tensor([[-2.8255e-01, -2.4243e-01, -2.0117e-01,  ...,  1.3500e-01,
         -2.4401e-01,  1.7528e+04],
        [-3.1893e-01, -2.3390e-01, -2.3986e-01,  ...,  1.0720e-01,
         -2.3060e-01,  1.3207e+04],
        [-3.9487e-01, -2.3073e-01, -2.6236e-01,  ...,  6.1318e-02,
         -2.2459e-01,  9.5998e+03],
        ...,
        [-3.7181e-01, -2.4044e-01, -3.9166e-01,  ...,  2.0085e-01,
         -1.1146e-01,  9.7422e+03],
        [-3.6208e-01, -2.9434e-01, -2.5774e-01,  ...,  1.5918e-01,
         -2.1579e-01,  1.4254e+04],
        [-3.2270e-01, -1.9479e-01, -2.6459e-01,  ...,  7.8841e-02,
         -2.4780e-01,  8.7214e+03]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1287],
        [ 0.1001],
        [ 0.0684],
        [ 0.1319],
        [ 0.1460],
        [ 0.0890],
        [ 0.0795],
        [ 0.1685],
        [ 0.2690],
        [ 0.2435],
        [ 0.2013],
        [ 0.1476],
        [ 0.2155],
        [ 0.0217],
        [ 0.1109],
        [ 0.0820],
        [ 0.0956],
     

valid:  25%|███████████████████▎                                                         | 1/4 [00:02<00:07,  2.63s/it]

tensor([[-3.2233e-01, -2.8638e-01, -2.5438e-01,  ...,  1.1127e-01,
         -2.4928e-01,  8.2773e+03],
        [-3.6531e-01, -1.8541e-01, -3.2476e-01,  ...,  9.8329e-02,
         -2.3989e-01,  8.2921e+03],
        [-3.7271e-01, -3.1051e-01, -2.6006e-01,  ...,  1.0789e-01,
         -1.8490e-01,  8.4220e+03],
        ...,
        [-3.7780e-01, -2.1708e-01, -3.6814e-01,  ...,  2.7568e-01,
         -1.1368e-01,  7.2709e+03],
        [-3.9939e-01, -2.8329e-01, -2.7683e-01,  ...,  1.3335e-01,
         -2.4340e-01,  7.3509e+03],
        [-3.1813e-01, -1.7252e-01, -3.3864e-01,  ...,  1.8896e-01,
         -2.7303e-01,  7.1148e+03]], device='cuda:0')
tensor([[0.1228],
        [0.1310],
        [0.1323],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [

valid:  50%|██████████████████████████████████████▌                                      | 2/4 [00:05<00:05,  2.63s/it]

tensor([[-2.6459e-01, -1.9816e-01, -2.3166e-01,  ...,  4.3366e-02,
         -2.4275e-01,  7.3657e+03],
        [-4.4351e-01, -1.6997e-01, -2.4599e-01,  ...,  5.0830e-02,
         -2.2045e-01,  7.4660e+03],
        [-3.6358e-01, -2.7964e-01, -2.1717e-01,  ...,  1.4754e-01,
         -2.0238e-01,  7.5556e+03],
        ...,
        [-3.0289e-01, -2.8608e-01, -3.0153e-01,  ...,  9.7022e-02,
         -2.0641e-01,  7.5523e+03],
        [-3.2801e-01, -2.4983e-01, -2.3538e-01,  ...,  1.0617e-01,
         -3.3416e-01,  7.9973e+03],
        [-3.5315e-01, -2.7097e-01, -2.6696e-01,  ...,  1.3142e-01,
         -2.4509e-01,  8.4248e+03]], device='cuda:0')
tensor([[0.1228],
        [0.1310],
        [0.1323],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [

valid:  75%|█████████████████████████████████████████████████████████▊                   | 3/4 [00:07<00:02,  2.65s/it]

tensor([[-4.3092e-01, -2.7025e-01, -3.0428e-01,  ...,  1.7351e-01,
         -2.1021e-01,  8.2736e+03],
        [-3.9687e-01, -1.6447e-01, -3.1085e-01,  ...,  2.0987e-01,
         -1.9773e-01,  8.4973e+03],
        [-4.0648e-01, -2.3350e-01, -3.5499e-01,  ...,  1.4228e-01,
         -2.3323e-01,  8.8352e+03],
        ...,
        [-2.5792e-01, -2.7998e-01, -2.9789e-01,  ...,  6.9041e-02,
         -1.5714e-01,  1.1430e+04],
        [-3.6105e-01, -3.1575e-01, -2.2100e-01,  ...,  1.0072e-01,
         -2.3743e-01,  1.1417e+04],
        [-3.4809e-01, -2.3759e-01, -2.0738e-01,  ...,  1.4380e-01,
         -2.2184e-01,  1.1349e+04]], device='cuda:0')
tensor([[0.1228],
        [0.1310],
        [0.1323],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325],
        [

valid: 100%|█████████████████████████████████████████████████████████████████████████████| 4/4 [00:08<00:00,  2.05s/it]
ep  7:   0%|                                                                                    | 0/28 [00:00<?, ?it/s]

tensor([[-3.0299e-01, -2.0548e-01, -3.7163e-01,  ...,  2.2204e-01,
         -2.4157e-01,  1.1479e+04],
        [-5.2351e-01, -1.3666e-01, -4.0322e-01,  ...,  1.6678e-01,
         -2.3607e-01,  1.1660e+04],
        [-2.8602e-01, -2.1112e-01, -2.7816e-01,  ...,  2.0929e-01,
         -2.4698e-01,  1.1616e+04],
        ...,
        [-4.1360e-01, -3.0982e-01, -2.5829e-01,  ...,  8.7611e-02,
         -2.1257e-01,  1.1642e+04],
        [-4.2525e-01, -2.9543e-01, -3.0947e-01,  ...,  7.3900e-02,
         -2.2249e-01,  1.1782e+04],
        [-2.6573e-01, -2.9025e-01, -3.4377e-01,  ...,  1.3080e-01,
         -1.8334e-01,  1.1734e+04]], device='cuda:0')
tensor([[0.1228],
        [0.1310],
        [0.1323],
        [0.1325],
        [0.1325],
        [0.1325],
        [0.1325]], device='cuda:0')
valid acc 0.5376884422110553


ep  7:   4%|██▋                                                                         | 1/28 [00:02<01:11,  2.64s/it]

tensor([[-3.6669e-01, -2.3129e-01, -3.3963e-01,  ...,  1.3087e-01,
         -2.1291e-01,  1.1144e+04],
        [-2.8569e-01, -1.8513e-01, -2.6081e-01,  ...,  1.0879e-01,
         -2.7146e-01,  1.7641e+04],
        [-3.8447e-01, -2.3596e-01, -2.4317e-01,  ...,  5.8713e-02,
         -2.7924e-01,  1.2952e+04],
        ...,
        [-3.3546e-01, -2.6179e-01, -2.7518e-01,  ...,  9.9212e-02,
         -2.3345e-01,  1.0322e+04],
        [-3.9171e-01, -2.9901e-01, -2.6521e-01,  ...,  1.9280e-01,
         -2.1929e-01,  1.6533e+04],
        [-2.9666e-01, -2.1421e-01, -2.2482e-01,  ...,  8.0261e-02,
         -1.9242e-01,  1.2760e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1904],
        [ 0.1799],
        [ 0.2493],
        [ 0.1147],
        [ 0.0948],
        [ 0.0534],
        [ 0.0931],
        [ 0.1358],
        [ 0.1487],
        [ 0.0626],
        [ 0.0537],
        [ 0.0543],
        [ 0.0434],
        [ 0.1977],
        [ 0.1185],
        [ 0.2690],
        [ 0.0711],
     

ep  7:   7%|█████▍                                                                      | 2/28 [00:05<01:08,  2.65s/it]

tensor([[-3.5604e-01, -1.3520e-01, -2.4088e-01,  ...,  6.0801e-02,
         -2.9261e-01,  1.0501e+04],
        [-3.6079e-01, -2.6774e-01, -2.7303e-01,  ...,  1.3481e-01,
         -2.4168e-01,  1.7195e+04],
        [-3.7740e-01, -2.6072e-01, -2.7376e-01,  ...,  1.5138e-01,
         -2.3412e-01,  1.1855e+04],
        ...,
        [-3.3223e-01, -2.2593e-01, -2.7762e-01,  ...,  2.1311e-01,
         -2.8363e-01,  1.7721e+04],
        [-3.0642e-01, -2.8676e-01, -1.6226e-01,  ...,  1.1029e-01,
         -2.0571e-01,  1.5126e+04],
        [-3.4824e-01, -3.1871e-01, -2.3464e-01,  ...,  1.5996e-01,
         -2.1481e-01,  1.4512e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1315],
        [ 0.0293],
        [ 0.1616],
        [ 0.1969],
        [ 0.1607],
        [ 0.2569],
        [ 0.2211],
        [ 0.1658],
        [ 0.2871],
        [ 0.0759],
        [ 0.2222],
        [ 0.1667],
        [ 0.0976],
        [ 0.2550],
        [ 0.1398],
        [ 0.0972],
        [ 0.0592],
     

ep  7:  11%|████████▏                                                                   | 3/28 [00:07<01:05,  2.63s/it]

tensor([[-3.4224e-01, -2.4455e-01, -2.4663e-01,  ...,  7.7610e-02,
         -2.5749e-01,  1.5549e+04],
        [-3.5639e-01, -1.5628e-01, -4.2518e-01,  ...,  2.9550e-01,
         -1.9895e-01,  8.4724e+03],
        [-3.3368e-01, -3.1857e-01, -3.2887e-01,  ...,  1.6476e-01,
         -2.2321e-01,  1.0686e+04],
        ...,
        [-3.4090e-01, -2.5975e-01, -2.8152e-01,  ...,  1.0869e-01,
         -2.6632e-01,  1.2950e+04],
        [-3.9922e-01, -2.6037e-01, -2.8272e-01,  ...,  1.1946e-01,
         -2.5539e-01,  1.2596e+04],
        [-3.1947e-01, -3.3845e-01, -2.3857e-01,  ...,  3.0203e-02,
         -2.5563e-01,  1.1955e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[0.1104],
        [0.1200],
        [0.1508],
        [0.1060],
        [0.0144],
        [0.1600],
        [0.1220],
        [0.2070],
        [0.1248],
        [0.0255],
        [0.1718],
        [0.2159],
        [0.1672],
        [0.0132],
        [0.0497],
        [0.1379],
        [0.1756],
        [0.2204],
    

ep  7:  14%|██████████▊                                                                 | 4/28 [00:10<01:03,  2.63s/it]

tensor([[-3.4529e-01, -3.2149e-01, -1.6974e-01,  ...,  7.9299e-02,
         -2.7675e-01,  1.3861e+04],
        [-3.8625e-01, -3.1078e-01, -2.1660e-01,  ...,  1.4333e-01,
         -2.5724e-01,  1.7895e+04],
        [-3.5359e-01, -2.9110e-01, -2.4068e-01,  ...,  2.0031e-01,
         -1.4083e-01,  1.7903e+04],
        ...,
        [-2.8193e-01, -2.0411e-01, -2.3748e-01,  ...,  1.9230e-01,
         -1.8821e-01,  1.7848e+04],
        [-3.6166e-01, -2.2267e-01, -3.0686e-01,  ...,  1.4060e-01,
         -1.6788e-01,  1.2884e+04],
        [-4.2820e-01, -2.3291e-01, -4.5784e-01,  ...,  4.0329e-01,
         -1.6441e-01,  1.6179e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1792],
        [ 0.1476],
        [ 0.0699],
        [ 0.0364],
        [ 0.2045],
        [ 0.1843],
        [ 0.0708],
        [ 0.1123],
        [ 0.1235],
        [ 0.1253],
        [ 0.2077],
        [ 0.1695],
        [ 0.0459],
        [ 0.1726],
        [ 0.2466],
        [ 0.2172],
        [ 0.0385],
     

ep  7:  18%|█████████████▌                                                              | 5/28 [00:13<01:00,  2.64s/it]

tensor([[-3.8913e-01, -3.2978e-01, -2.5793e-01,  ...,  2.5072e-01,
         -1.7889e-01,  1.7841e+04],
        [-3.3856e-01, -3.5045e-01, -2.0582e-01,  ...,  1.5379e-01,
         -2.1888e-01,  1.6445e+04],
        [-4.1876e-01, -2.5544e-01, -3.2996e-01,  ...,  1.6674e-01,
         -2.0106e-01,  1.7485e+04],
        ...,
        [-4.2517e-01, -2.4234e-01, -3.0206e-01,  ...,  1.3376e-01,
         -2.1869e-01,  1.6516e+04],
        [-2.9353e-01, -1.8997e-01, -2.4092e-01,  ...,  1.0810e-01,
         -2.4201e-01,  1.2233e+04],
        [-3.5340e-01, -2.9607e-01, -2.1856e-01,  ...,  1.0533e-01,
         -2.6676e-01,  1.1063e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0916],
        [ 0.1150],
        [ 0.1111],
        [ 0.1202],
        [ 0.1838],
        [ 0.1280],
        [ 0.1226],
        [ 0.1320],
        [ 0.1247],
        [ 0.1080],
        [ 0.1007],
        [ 0.1512],
        [ 0.1789],
        [ 0.1260],
        [-0.0107],
        [ 0.0622],
        [ 0.0797],
     

ep  7:  21%|████████████████▎                                                           | 6/28 [00:15<00:57,  2.63s/it]

tensor([[-4.5572e-01, -2.1481e-01, -2.7238e-01,  ...,  1.6399e-01,
         -2.0592e-01,  1.2721e+04],
        [-4.4842e-01, -2.4791e-01, -3.6640e-01,  ...,  2.2494e-01,
         -2.2570e-01,  1.2850e+04],
        [-4.4543e-01, -3.1021e-01, -3.3417e-01,  ...,  1.3935e-01,
         -2.2452e-01,  9.5393e+03],
        ...,
        [-3.9111e-01, -2.7042e-01, -3.1246e-01,  ...,  1.3853e-01,
         -2.5672e-01,  1.3026e+04],
        [-3.8912e-01, -2.7928e-01, -2.9969e-01,  ...,  2.0097e-01,
         -9.1065e-02,  1.7361e+04],
        [-2.3807e-01, -3.0629e-01, -2.8034e-01,  ...,  5.2396e-02,
         -2.7237e-01,  1.6437e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0312],
        [ 0.1159],
        [ 0.1561],
        [ 0.2318],
        [ 0.1506],
        [ 0.1437],
        [ 0.0589],
        [ 0.2810],
        [-0.0296],
        [ 0.1332],
        [ 0.1076],
        [ 0.1336],
        [ 0.1126],
        [ 0.0949],
        [ 0.1553],
        [ 0.1337],
        [-0.0060],
     

ep  7:  25%|███████████████████                                                         | 7/28 [00:18<00:55,  2.64s/it]

tensor([[-4.5306e-01, -2.7869e-01, -3.0196e-01,  ...,  2.7409e-01,
         -1.1064e-01,  1.5373e+04],
        [-2.7420e-01, -1.8211e-01, -2.8865e-01,  ...,  2.1371e-02,
         -3.1516e-01,  1.2226e+04],
        [-3.9867e-01, -3.2953e-01, -2.8759e-01,  ...,  1.4105e-01,
         -1.9136e-01,  9.8649e+03],
        ...,
        [-3.7717e-01, -2.6405e-01, -4.2099e-01,  ...,  2.7098e-01,
         -2.4778e-01,  1.2422e+04],
        [-3.5206e-01, -3.3920e-01, -2.5050e-01,  ...,  2.4911e-01,
         -1.8808e-01,  1.7403e+04],
        [-3.7458e-01, -1.8025e-01, -2.6499e-01,  ...,  8.8469e-02,
         -2.8882e-01,  1.2530e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1618],
        [ 0.0863],
        [ 0.0497],
        [ 0.1661],
        [ 0.0653],
        [ 0.1474],
        [ 0.2238],
        [ 0.1209],
        [ 0.1289],
        [ 0.1053],
        [ 0.0446],
        [ 0.1051],
        [ 0.0628],
        [ 0.0900],
        [ 0.1984],
        [ 0.0458],
        [ 0.2337],
     

ep  7:  29%|█████████████████████▋                                                      | 8/28 [00:21<00:52,  2.63s/it]

tensor([[-4.3128e-01, -3.3070e-01, -3.4736e-01,  ...,  1.4332e-01,
         -2.5336e-01,  1.2535e+04],
        [-3.7597e-01, -3.9165e-02, -2.9239e-01,  ...,  3.5184e-01,
         -1.8047e-01,  1.7732e+04],
        [-3.7244e-01, -2.4053e-01, -2.5935e-01,  ...,  1.0696e-01,
         -2.7419e-01,  1.2213e+04],
        ...,
        [-2.9668e-01, -1.4193e-01, -3.4498e-01,  ...,  2.8129e-01,
         -2.3470e-01,  1.5569e+04],
        [-2.6688e-01, -3.4098e-01, -2.8690e-01,  ...,  1.1983e-01,
         -2.6482e-01,  1.2198e+04],
        [-3.7348e-01, -2.3195e-01, -2.7464e-01,  ...,  1.5379e-01,
         -2.4741e-01,  1.5118e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[-0.0187],
        [ 0.2010],
        [ 0.1956],
        [ 0.2453],
        [ 0.0830],
        [ 0.0292],
        [ 0.2027],
        [ 0.1537],
        [ 0.1526],
        [ 0.1517],
        [ 0.0636],
        [ 0.0345],
        [ 0.0253],
        [ 0.0959],
        [ 0.0332],
        [ 0.0577],
        [ 0.1657],
     

ep  7:  32%|████████████████████████▍                                                   | 9/28 [00:23<00:50,  2.64s/it]

tensor([[-3.5107e-01, -2.7279e-01, -1.7334e-01,  ...,  1.1278e-01,
         -2.4311e-01,  1.3264e+04],
        [-3.8261e-01, -2.5444e-01, -3.4157e-01,  ...,  2.6415e-01,
         -1.0987e-01,  1.3279e+04],
        [-4.1816e-01, -2.1277e-01, -4.8117e-01,  ...,  1.7365e-01,
         -1.7194e-01,  1.1555e+04],
        ...,
        [-3.2707e-01, -2.6592e-01, -2.0380e-01,  ...,  1.2136e-01,
         -1.6707e-01,  1.0016e+04],
        [-4.1838e-01, -2.5019e-01, -2.6436e-01,  ...,  6.3911e-02,
         -2.4020e-01,  1.0610e+04],
        [-4.0192e-01, -2.4517e-01, -2.4261e-01,  ...,  1.1017e-01,
         -2.4420e-01,  1.5500e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1693],
        [ 0.0435],
        [ 0.1647],
        [ 0.0168],
        [ 0.0610],
        [ 0.0812],
        [ 0.1251],
        [ 0.3330],
        [ 0.1940],
        [ 0.0641],
        [ 0.1556],
        [ 0.0803],
        [ 0.1513],
        [ 0.0098],
        [ 0.1563],
        [ 0.2097],
        [ 0.0841],
     

ep  7:  36%|██████████████████████████▊                                                | 10/28 [00:26<00:47,  2.64s/it]

tensor([[-3.4326e-01, -3.0525e-01, -2.8830e-01,  ...,  1.8110e-01,
         -2.3585e-01,  8.5294e+03],
        [-3.8288e-01, -2.4063e-01, -1.9486e-01,  ...,  1.3210e-01,
         -2.8497e-01,  1.0294e+04],
        [-3.7171e-01, -2.6528e-01, -1.9819e-01,  ...,  1.4906e-01,
         -2.1566e-01,  1.0389e+04],
        ...,
        [-3.0878e-01, -2.0853e-01, -3.0833e-01,  ...,  8.3165e-02,
         -2.6406e-01,  1.5294e+04],
        [-5.1620e-01, -2.9768e-01, -3.2489e-01,  ...,  2.0508e-01,
         -1.9727e-01,  1.0699e+04],
        [-3.6323e-01, -2.5868e-01, -2.8219e-01,  ...,  1.9990e-01,
         -2.8093e-01,  1.5619e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.2137],
        [ 0.2408],
        [ 0.2245],
        [ 0.2178],
        [ 0.1673],
        [ 0.1716],
        [ 0.0834],
        [-0.0016],
        [ 0.1020],
        [ 0.1655],
        [ 0.0630],
        [ 0.2499],
        [ 0.0367],
        [ 0.0861],
        [ 0.0748],
        [ 0.0684],
        [ 0.0779],
     

ep  7:  39%|█████████████████████████████▍                                             | 11/28 [00:28<00:44,  2.64s/it]

tensor([[-3.3360e-01, -3.0966e-01, -2.6009e-01,  ...,  1.9135e-01,
         -3.0065e-01,  1.6272e+04],
        [-4.3299e-01, -2.0525e-01, -2.4819e-01,  ...,  1.2748e-01,
         -2.0539e-01,  1.3170e+04],
        [-3.7143e-01, -3.0776e-01, -2.4387e-01,  ...,  1.6023e-01,
         -2.6944e-01,  1.1126e+04],
        ...,
        [-2.9050e-01, -2.2564e-01, -2.7755e-01,  ...,  1.6993e-01,
         -2.7911e-01,  1.5995e+04],
        [-3.3626e-01, -2.7755e-01, -3.5294e-01,  ...,  2.4256e-01,
         -1.8281e-01,  1.7481e+04],
        [-3.0861e-01, -2.8911e-01, -2.5154e-01,  ...,  6.2652e-02,
         -3.0264e-01,  1.6340e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0957],
        [ 0.0994],
        [ 0.2083],
        [ 0.1811],
        [ 0.0790],
        [ 0.1049],
        [ 0.0415],
        [ 0.1723],
        [ 0.1020],
        [ 0.2220],
        [ 0.1661],
        [ 0.0859],
        [ 0.1423],
        [ 0.1717],
        [ 0.2902],
        [ 0.2167],
        [ 0.0912],
     

ep  7:  43%|████████████████████████████████▏                                          | 12/28 [00:31<00:42,  2.64s/it]

tensor([[-4.4745e-01, -1.7281e-01, -2.7232e-01,  ...,  1.5873e-01,
         -1.3313e-01,  9.7839e+03],
        [-3.4531e-01, -2.3031e-01, -2.6036e-01,  ...,  1.8791e-01,
         -2.0748e-01,  1.7542e+04],
        [-3.5815e-01, -2.8249e-01, -2.5363e-01,  ...,  1.4408e-01,
         -2.3218e-01,  1.2573e+04],
        ...,
        [-4.3940e-01, -3.1941e-01, -2.8798e-01,  ...,  1.1959e-01,
         -2.6933e-01,  1.1296e+04],
        [-4.1310e-01, -2.6860e-01, -2.8231e-01,  ...,  8.6699e-02,
         -2.6730e-01,  1.0006e+04],
        [-3.6694e-01, -2.4757e-01, -3.2970e-01,  ...,  9.0981e-02,
         -2.3456e-01,  1.2978e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1444],
        [ 0.0728],
        [ 0.1845],
        [ 0.0522],
        [ 0.1498],
        [ 0.1114],
        [ 0.1342],
        [ 0.1318],
        [ 0.0817],
        [ 0.2087],
        [ 0.0573],
        [ 0.1318],
        [ 0.3071],
        [ 0.1149],
        [ 0.2193],
        [ 0.2604],
        [ 0.0862],
     

ep  7:  46%|██████████████████████████████████▊                                        | 13/28 [00:34<00:39,  2.65s/it]

tensor([[-3.9004e-01, -2.1605e-01, -2.3741e-01,  ...,  1.0650e-01,
         -2.5896e-01,  1.2805e+04],
        [-3.3823e-01, -2.5535e-01, -2.3478e-01,  ...,  4.4139e-02,
         -2.3967e-01,  1.3784e+04],
        [-3.6931e-01, -2.2119e-01, -3.0043e-01,  ...,  2.1542e-01,
         -2.7583e-01,  1.5354e+04],
        ...,
        [-4.1961e-01, -2.3234e-01, -2.0888e-01,  ...,  8.3719e-02,
         -2.2842e-01,  1.0402e+04],
        [-3.0067e-01, -2.1020e-01, -1.9355e-01,  ...,  1.6668e-01,
         -2.6090e-01,  1.7007e+04],
        [-3.4869e-01, -2.4777e-01, -2.1324e-01,  ...,  1.6254e-01,
         -2.3377e-01,  1.6512e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.2155],
        [ 0.0848],
        [ 0.0590],
        [ 0.1907],
        [ 0.1255],
        [ 0.1473],
        [ 0.3143],
        [ 0.0847],
        [ 0.0726],
        [ 0.2163],
        [ 0.1150],
        [ 0.0864],
        [ 0.0841],
        [ 0.1329],
        [ 0.0817],
        [ 0.2051],
        [ 0.1745],
     

ep  7:  50%|█████████████████████████████████████▌                                     | 14/28 [00:36<00:37,  2.65s/it]

tensor([[-3.9329e-01, -2.8470e-01, -2.7204e-01,  ...,  2.1728e-01,
         -1.7186e-01,  1.2983e+04],
        [-3.0951e-01, -2.3068e-01, -2.1789e-01,  ...,  1.1528e-01,
         -3.1005e-01,  1.5974e+04],
        [-3.7615e-01, -2.3265e-01, -2.5026e-01,  ...,  1.2242e-01,
         -1.6281e-01,  1.7959e+04],
        ...,
        [-3.3716e-01, -2.2385e-01, -2.0406e-01,  ...,  1.3599e-01,
         -2.5382e-01,  1.0626e+04],
        [-3.6770e-01, -1.9899e-01, -1.9241e-01,  ...,  1.8070e-01,
         -1.5689e-01,  1.7711e+04],
        [-3.0116e-01, -2.7040e-01, -2.8907e-01,  ...,  1.7651e-01,
         -2.1443e-01,  1.7751e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[0.3119],
        [0.1652],
        [0.3149],
        [0.0621],
        [0.1615],
        [0.0287],
        [0.2153],
        [0.2010],
        [0.0842],
        [0.1363],
        [0.1292],
        [0.1120],
        [0.0610],
        [0.1203],
        [0.1842],
        [0.0500],
        [0.1492],
        [0.0948],
    

ep  7:  54%|████████████████████████████████████████▏                                  | 15/28 [00:39<00:34,  2.65s/it]

tensor([[-4.2574e-01, -3.0816e-01, -2.8709e-01,  ...,  1.0229e-01,
         -2.2106e-01,  1.1205e+04],
        [-3.4653e-01, -3.1790e-01, -2.3846e-01,  ...,  1.2911e-01,
         -3.3560e-01,  1.4452e+04],
        [-3.3529e-01, -3.1260e-01, -2.9522e-01,  ...,  1.7853e-01,
         -1.8610e-01,  1.7420e+04],
        ...,
        [-3.5849e-01, -2.6669e-01, -2.8359e-01,  ...,  1.8483e-01,
         -2.6155e-01,  1.7574e+04],
        [-3.6475e-01, -2.5926e-01, -2.3491e-01,  ...,  2.8774e-02,
         -2.1288e-01,  1.3169e+04],
        [-2.9416e-01, -2.1175e-01, -2.5101e-01,  ...,  7.8270e-02,
         -3.3443e-01,  1.5739e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1235],
        [ 0.1304],
        [ 0.2470],
        [ 0.2710],
        [ 0.2163],
        [ 0.1562],
        [ 0.0320],
        [ 0.1219],
        [ 0.1099],
        [ 0.2058],
        [ 0.1509],
        [ 0.1814],
        [ 0.1092],
        [ 0.2491],
        [ 0.2467],
        [ 0.2290],
        [ 0.1519],
     

ep  7:  57%|██████████████████████████████████████████▊                                | 16/28 [00:42<00:31,  2.65s/it]

tensor([[-3.4535e-01, -2.8672e-01, -2.1426e-01,  ...,  2.1455e-01,
         -2.5627e-01,  1.5105e+04],
        [-3.7816e-01, -2.2300e-01, -2.4253e-01,  ...,  2.2670e-01,
         -2.2647e-01,  1.7252e+04],
        [-4.2606e-01, -2.3836e-01, -2.7931e-01,  ...,  1.5144e-01,
         -1.8829e-01,  1.6502e+04],
        ...,
        [-4.3549e-01, -3.0192e-01, -2.2939e-01,  ...,  1.5671e-01,
         -2.4108e-01,  1.8144e+04],
        [-3.0057e-01, -2.2700e-01, -3.1996e-01,  ...,  1.7807e-01,
         -2.3789e-01,  1.5822e+04],
        [-4.1919e-01, -2.3866e-01, -2.6980e-01,  ...,  3.9768e-02,
         -2.1521e-01,  1.0144e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1296],
        [ 0.1688],
        [ 0.1061],
        [ 0.0860],
        [ 0.2881],
        [ 0.1580],
        [ 0.1043],
        [ 0.1783],
        [ 0.2726],
        [ 0.0937],
        [ 0.2293],
        [ 0.1925],
        [ 0.1159],
        [ 0.1010],
        [ 0.1394],
        [ 0.1043],
        [ 0.1195],
     

ep  7:  61%|█████████████████████████████████████████████▌                             | 17/28 [00:44<00:29,  2.64s/it]

tensor([[-3.9499e-01, -2.0656e-01, -2.4869e-01,  ...,  7.2096e-02,
         -2.2906e-01,  1.1781e+04],
        [-4.2750e-01, -1.5470e-01, -2.7529e-01,  ...,  2.1612e-01,
         -1.4510e-01,  1.7663e+04],
        [-3.8476e-01, -2.6065e-01, -3.1659e-01,  ...,  2.9376e-01,
         -1.9718e-01,  1.7010e+04],
        ...,
        [-3.8410e-01, -3.2138e-01, -2.6191e-01,  ...,  1.6288e-01,
         -2.7330e-01,  1.6907e+04],
        [-3.5082e-01, -3.0206e-01, -3.1905e-01,  ...,  1.7046e-01,
         -2.0711e-01,  1.2403e+04],
        [-4.3645e-01, -1.8698e-01, -4.6236e-01,  ...,  2.8848e-01,
         -2.0286e-01,  1.3146e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1176],
        [ 0.1997],
        [ 0.1609],
        [ 0.0829],
        [ 0.0119],
        [ 0.1311],
        [ 0.1745],
        [ 0.0844],
        [ 0.1328],
        [ 0.0708],
        [ 0.1265],
        [ 0.1976],
        [ 0.1608],
        [ 0.0681],
        [ 0.0363],
        [ 0.0374],
        [ 0.0245],
     

ep  7:  64%|████████████████████████████████████████████████▏                          | 18/28 [00:47<00:26,  2.63s/it]

tensor([[-3.5973e-01, -1.8569e-01, -3.3591e-01,  ...,  1.6568e-01,
         -2.2759e-01,  1.6130e+04],
        [-3.3289e-01, -3.0242e-01, -2.8413e-01,  ...,  1.7476e-01,
         -2.3309e-01,  1.2221e+04],
        [-3.5300e-01, -2.8900e-01, -2.5508e-01,  ...,  1.8285e-01,
         -2.4424e-01,  1.1415e+04],
        ...,
        [-3.6621e-01, -4.2176e-01, -2.4033e-01,  ...,  1.9047e-01,
         -2.6911e-01,  1.7730e+04],
        [-3.8693e-01, -1.5302e-01, -3.5033e-01,  ...,  2.5264e-01,
         -2.6726e-01,  1.0680e+04],
        [-3.0896e-01, -1.9272e-01, -1.9824e-01,  ...,  1.0699e-01,
         -2.7573e-01,  1.8136e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1163],
        [ 0.0682],
        [ 0.0989],
        [ 0.0512],
        [ 0.1510],
        [ 0.2083],
        [ 0.0927],
        [ 0.1068],
        [ 0.1794],
        [ 0.1043],
        [ 0.1926],
        [ 0.1606],
        [ 0.1732],
        [ 0.0713],
        [ 0.1379],
        [ 0.0132],
        [ 0.1056],
     

ep  7:  68%|██████████████████████████████████████████████████▉                        | 19/28 [00:50<00:23,  2.63s/it]

tensor([[-3.4437e-01, -2.4213e-01, -2.5825e-01,  ...,  1.0213e-01,
         -1.8415e-01,  1.2574e+04],
        [-3.5955e-01, -2.9915e-01, -2.2192e-01,  ...,  1.9146e-01,
         -2.3167e-01,  1.7890e+04],
        [-3.1483e-01, -1.6264e-01, -2.2619e-01,  ...,  2.0339e-01,
         -2.6528e-01,  1.7839e+04],
        ...,
        [-3.3456e-01, -2.6161e-01, -3.0208e-01,  ...,  1.1735e-01,
         -2.5364e-01,  1.6295e+04],
        [-3.8762e-01, -3.7258e-02, -5.3454e-01,  ...,  4.2211e-01,
         -8.6074e-02,  1.7280e+04],
        [-3.8813e-01, -2.8072e-01, -2.4111e-01,  ...,  9.0654e-02,
         -2.8414e-01,  1.2096e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[0.1715],
        [0.0585],
        [0.1334],
        [0.0687],
        [0.2249],
        [0.2039],
        [0.2020],
        [0.1381],
        [0.0022],
        [0.0964],
        [0.1001],
        [0.2222],
        [0.2837],
        [0.2580],
        [0.1200],
        [0.2080],
        [0.1086],
        [0.1781],
    

ep  7:  71%|█████████████████████████████████████████████████████▌                     | 20/28 [00:52<00:21,  2.63s/it]

tensor([[-3.1915e-01, -3.1907e-01, -2.5896e-01,  ...,  1.2145e-01,
         -2.4252e-01,  1.3246e+04],
        [-2.9407e-01, -2.4477e-01, -2.9711e-01,  ...,  1.3239e-01,
         -1.9487e-01,  1.0734e+04],
        [-3.2826e-01, -2.7453e-01, -1.7587e-01,  ...,  8.9210e-02,
         -2.8136e-01,  1.1269e+04],
        ...,
        [-3.1502e-01, -2.6497e-01, -2.4307e-01,  ...,  9.6880e-02,
         -2.5296e-01,  1.1118e+04],
        [-3.2431e-01, -1.7918e-01, -2.9542e-01,  ...,  2.0301e-01,
         -2.6104e-01,  1.3982e+04],
        [-3.5465e-01, -1.7072e-01, -2.5735e-01,  ...,  2.5456e-01,
         -1.8728e-01,  9.2563e+03]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[0.1923],
        [0.0808],
        [0.1946],
        [0.1429],
        [0.0834],
        [0.1735],
        [0.2442],
        [0.1365],
        [0.2189],
        [0.0876],
        [0.1983],
        [0.0314],
        [0.1220],
        [0.1014],
        [0.1355],
        [0.1374],
        [0.1242],
        [0.1563],
    

ep  7:  75%|████████████████████████████████████████████████████████▎                  | 21/28 [00:55<00:18,  2.63s/it]

tensor([[-3.2908e-01,  9.4429e-02, -6.0408e-01,  ...,  6.0085e-01,
         -1.6288e-01,  1.0271e+04],
        [-3.4980e-01, -2.9343e-01, -3.1180e-01,  ...,  9.1082e-02,
         -1.4368e-01,  1.1247e+04],
        [-3.6823e-01, -2.5636e-01, -2.8923e-01,  ...,  2.2207e-01,
         -2.6280e-01,  1.7777e+04],
        ...,
        [-4.2721e-01, -3.0027e-01, -2.6535e-01,  ...,  1.5453e-01,
         -1.8394e-01,  1.0318e+04],
        [-3.8984e-01, -3.2699e-01, -2.4948e-01,  ...,  1.9918e-01,
         -1.8639e-01,  1.7107e+04],
        [-4.0161e-01, -2.1939e-01, -3.6434e-01,  ...,  2.3697e-01,
         -1.6834e-01,  9.9858e+03]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1551],
        [ 0.2556],
        [ 0.1110],
        [ 0.1625],
        [ 0.0145],
        [ 0.0996],
        [ 0.0833],
        [ 0.0556],
        [ 0.0446],
        [ 0.1962],
        [ 0.1221],
        [ 0.1269],
        [ 0.1479],
        [ 0.1440],
        [ 0.0844],
        [ 0.2129],
        [ 0.1601],
     

ep  7:  79%|██████████████████████████████████████████████████████████▉                | 22/28 [00:58<00:15,  2.64s/it]

tensor([[-3.7322e-01, -2.6957e-01, -2.9887e-01,  ...,  1.6167e-01,
         -2.5231e-01,  1.5637e+04],
        [-3.4727e-01, -2.2425e-01, -4.2008e-01,  ...,  2.6411e-01,
         -1.9298e-01,  1.2288e+04],
        [-3.0962e-01, -2.4415e-01, -1.4491e-01,  ...,  7.2370e-02,
         -2.9917e-01,  1.7996e+04],
        ...,
        [-3.5203e-01, -2.4756e-01, -2.3417e-01,  ...,  2.3347e-01,
         -2.3446e-01,  1.5794e+04],
        [-3.0657e-01, -2.5187e-01, -2.3090e-01,  ...,  1.3922e-01,
         -2.6210e-01,  1.7828e+04],
        [-3.7033e-01, -2.8218e-01, -3.0349e-01,  ...,  1.7012e-01,
         -1.7532e-01,  1.6734e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0912],
        [ 0.0601],
        [ 0.1915],
        [ 0.2018],
        [ 0.2507],
        [ 0.0975],
        [ 0.0931],
        [ 0.2137],
        [ 0.0850],
        [ 0.0929],
        [ 0.1686],
        [ 0.1874],
        [ 0.1036],
        [ 0.0522],
        [ 0.2072],
        [ 0.0348],
        [ 0.0355],
     

ep  7:  82%|█████████████████████████████████████████████████████████████▌             | 23/28 [01:00<00:13,  2.63s/it]

tensor([[-3.8829e-01, -2.2980e-01, -2.4069e-01,  ...,  1.6717e-01,
         -2.1525e-01,  1.2976e+04],
        [-3.8233e-01, -2.0556e-01, -3.2029e-01,  ...,  7.6558e-02,
         -2.5358e-01,  1.0526e+04],
        [-3.5612e-01, -2.7541e-01, -2.5418e-01,  ...,  7.0719e-02,
         -3.1525e-01,  1.1675e+04],
        ...,
        [-3.4979e-01, -3.1434e-01, -2.5717e-01,  ...,  1.2242e-01,
         -2.7289e-01,  1.5519e+04],
        [-3.4405e-01, -2.3102e-01, -2.1663e-01,  ...,  2.3776e-01,
         -2.4410e-01,  1.6027e+04],
        [-3.6910e-01, -2.5510e-01, -2.8766e-01,  ...,  1.1869e-01,
         -2.4126e-01,  9.8703e+03]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1588],
        [ 0.2298],
        [ 0.0295],
        [ 0.1458],
        [ 0.2026],
        [ 0.1322],
        [ 0.1114],
        [ 0.0836],
        [ 0.0496],
        [ 0.1743],
        [ 0.0962],
        [ 0.1126],
        [ 0.2455],
        [ 0.0965],
        [ 0.1481],
        [ 0.1542],
        [ 0.1074],
     

ep  7:  86%|████████████████████████████████████████████████████████████████▎          | 24/28 [01:03<00:10,  2.63s/it]

tensor([[-3.4417e-01, -2.0074e-01, -2.6702e-01,  ...,  1.4035e-01,
         -2.4776e-01,  1.0949e+04],
        [-3.7843e-01, -3.0283e-01, -1.9406e-01,  ...,  1.3097e-01,
         -1.9620e-01,  1.5081e+04],
        [-3.5126e-01, -2.6004e-01, -2.8648e-01,  ...,  9.4685e-02,
         -2.3945e-01,  1.8030e+04],
        ...,
        [-3.6421e-01, -2.9346e-01, -2.4262e-01,  ...,  1.9759e-01,
         -1.6911e-01,  1.8040e+04],
        [-3.5877e-01, -3.0451e-01, -3.4644e-01,  ...,  2.3510e-01,
         -2.4941e-01,  1.2756e+04],
        [-3.5874e-01, -1.7250e-01, -3.2968e-01,  ...,  3.1223e-01,
         -2.2715e-01,  1.6432e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1074],
        [ 0.2470],
        [ 0.1431],
        [ 0.2438],
        [ 0.1455],
        [ 0.0806],
        [ 0.1248],
        [ 0.0973],
        [ 0.1437],
        [ 0.2163],
        [ 0.2378],
        [ 0.1270],
        [ 0.1357],
        [ 0.0884],
        [ 0.1416],
        [ 0.0942],
        [ 0.0766],
     

ep  7:  89%|██████████████████████████████████████████████████████████████████▉        | 25/28 [01:05<00:07,  2.63s/it]

tensor([[-3.6620e-01, -2.9137e-01, -2.3529e-01,  ...,  1.1600e-01,
         -2.7435e-01,  1.2921e+04],
        [-2.7144e-01, -2.0806e-01, -3.2938e-01,  ...,  2.7738e-01,
         -3.0361e-01,  1.6738e+04],
        [-4.1398e-01, -1.9822e-01, -2.6480e-01,  ...,  6.3111e-02,
         -1.9066e-01,  1.0428e+04],
        ...,
        [-3.7310e-01, -3.0284e-01, -2.9060e-01,  ...,  8.5714e-02,
         -1.7118e-01,  1.2506e+04],
        [-3.5264e-01, -2.4263e-01, -2.7048e-01,  ...,  1.2647e-01,
         -2.7143e-01,  1.3208e+04],
        [-3.7332e-01, -2.6603e-01, -2.5788e-01,  ...,  2.6414e-02,
         -2.2043e-01,  1.1372e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[0.1988],
        [0.2430],
        [0.1552],
        [0.0799],
        [0.1568],
        [0.0639],
        [0.1094],
        [0.1494],
        [0.1014],
        [0.0965],
        [0.1007],
        [0.2807],
        [0.1112],
        [0.0311],
        [0.1492],
        [0.1015],
        [0.1133],
        [0.0902],
    

ep  7:  93%|█████████████████████████████████████████████████████████████████████▋     | 26/28 [01:08<00:05,  2.64s/it]

tensor([[-3.9376e-01, -2.3986e-01, -2.7641e-01,  ...,  1.9607e-01,
         -2.3825e-01,  1.6992e+04],
        [-3.3945e-01, -2.8684e-01, -3.2056e-01,  ...,  2.0349e-01,
         -2.0155e-01,  1.3553e+04],
        [-3.3581e-01, -1.2365e-01, -2.9591e-01,  ...,  3.0886e-01,
         -2.2482e-01,  1.7716e+04],
        ...,
        [-3.8263e-01, -3.0440e-01, -3.2313e-01,  ...,  1.3054e-01,
         -2.6743e-01,  1.3009e+04],
        [-2.6536e-01, -2.6778e-01, -2.6574e-01,  ...,  1.2084e-01,
         -2.9318e-01,  1.3329e+04],
        [-4.9322e-01, -2.3207e-01, -3.2665e-01,  ...,  2.2416e-01,
         -1.3380e-01,  1.1990e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[0.1572],
        [0.2082],
        [0.0729],
        [0.0978],
        [0.0839],
        [0.2191],
        [0.1468],
        [0.1486],
        [0.0692],
        [0.0888],
        [0.2452],
        [0.3417],
        [0.2172],
        [0.1352],
        [0.2273],
        [0.1421],
        [0.1251],
        [0.2091],
    

ep  7:  96%|████████████████████████████████████████████████████████████████████████▎  | 27/28 [01:11<00:02,  2.64s/it]

tensor([[-3.2805e-01, -2.2651e-01, -2.6661e-01,  ...,  1.8661e-01,
         -2.1290e-01,  1.8050e+04],
        [-3.9659e-01, -2.7956e-01, -2.8267e-01,  ...,  1.4917e-01,
         -1.6351e-01,  1.7043e+04],
        [-4.1505e-01, -3.0561e-01, -3.0559e-01,  ...,  1.7189e-01,
         -2.8727e-01,  1.1578e+04],
        ...,
        [-4.1413e-01, -2.5845e-01, -3.0620e-01,  ...,  1.8904e-01,
         -1.3946e-01,  1.1006e+04],
        [-3.5399e-01, -2.6388e-01, -3.9895e-01,  ...,  2.1543e-01,
         -1.6756e-01,  1.7851e+04],
        [-3.2680e-01, -2.9765e-01, -2.5188e-01,  ...,  1.3327e-01,
         -2.6115e-01,  1.7417e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1810],
        [ 0.0614],
        [ 0.1230],
        [ 0.0468],
        [ 0.1938],
        [ 0.0674],
        [ 0.0798],
        [ 0.0820],
        [ 0.1034],
        [-0.0004],
        [ 0.2386],
        [ 0.1310],
        [ 0.0577],
        [ 0.1274],
        [ 0.1612],
        [ 0.0035],
        [ 0.1262],
     

ep  7: 100%|███████████████████████████████████████████████████████████████████████████| 28/28 [01:13<00:00,  2.64s/it]
valid:   0%|                                                                                     | 0/4 [00:00<?, ?it/s]

tensor([[-3.6334e-01, -2.3575e-01, -3.1307e-01,  ...,  2.7783e-01,
         -2.3013e-01,  1.4036e+04],
        [-3.1174e-01, -3.0279e-01, -2.5645e-01,  ...,  1.3253e-01,
         -2.7179e-01,  1.6429e+04],
        [-3.6324e-01, -1.4744e-01, -4.0090e-01,  ...,  2.1833e-01,
         -2.3826e-01,  9.9084e+03],
        ...,
        [-3.8263e-01, -2.2679e-01, -3.4659e-01,  ...,  1.8059e-01,
         -2.7028e-01,  1.2909e+04],
        [-3.6125e-01, -2.4352e-01, -2.4266e-01,  ...,  2.4077e-01,
         -2.7354e-01,  1.6466e+04],
        [-3.5795e-01, -2.3934e-01, -2.7121e-01,  ...,  1.2948e-01,
         -2.6799e-01,  1.2588e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0912],
        [ 0.1361],
        [ 0.2454],
        [ 0.1471],
        [ 0.1909],
        [ 0.0773],
        [ 0.0445],
        [ 0.1803],
        [ 0.1662],
        [ 0.1615],
        [ 0.0859],
        [ 0.0534],
        [ 0.1129],
        [ 0.1770],
        [ 0.1367],
        [ 0.1046],
        [ 0.0950],
     

valid:  25%|███████████████████▎                                                         | 1/4 [00:02<00:07,  2.62s/it]

tensor([[-3.2233e-01, -2.8638e-01, -2.5438e-01,  ...,  1.1127e-01,
         -2.4928e-01,  8.2773e+03],
        [-3.6531e-01, -1.8541e-01, -3.2476e-01,  ...,  9.8329e-02,
         -2.3989e-01,  8.2921e+03],
        [-3.7271e-01, -3.1051e-01, -2.6006e-01,  ...,  1.0789e-01,
         -1.8490e-01,  8.4220e+03],
        ...,
        [-3.7780e-01, -2.1708e-01, -3.6814e-01,  ...,  2.7568e-01,
         -1.1368e-01,  7.2709e+03],
        [-3.9939e-01, -2.8329e-01, -2.7683e-01,  ...,  1.3335e-01,
         -2.4340e-01,  7.3509e+03],
        [-3.1813e-01, -1.7252e-01, -3.3864e-01,  ...,  1.8896e-01,
         -2.7303e-01,  7.1148e+03]], device='cuda:0')
tensor([[0.1250],
        [0.1336],
        [0.1350],
        [0.1351],
        [0.1352],
        [0.1352],
        [0.1352],
        [0.1352],
        [0.1352],
        [0.1352],
        [0.1352],
        [0.1352],
        [0.1352],
        [0.1352],
        [0.1352],
        [0.1352],
        [0.1352],
        [0.1352],
        [0.1352],
        [

valid:  50%|██████████████████████████████████████▌                                      | 2/4 [00:05<00:05,  2.62s/it]

tensor([[-2.6459e-01, -1.9816e-01, -2.3166e-01,  ...,  4.3366e-02,
         -2.4275e-01,  7.3657e+03],
        [-4.4351e-01, -1.6997e-01, -2.4599e-01,  ...,  5.0830e-02,
         -2.2045e-01,  7.4660e+03],
        [-3.6358e-01, -2.7964e-01, -2.1717e-01,  ...,  1.4754e-01,
         -2.0238e-01,  7.5556e+03],
        ...,
        [-3.0289e-01, -2.8608e-01, -3.0153e-01,  ...,  9.7022e-02,
         -2.0641e-01,  7.5523e+03],
        [-3.2801e-01, -2.4983e-01, -2.3538e-01,  ...,  1.0617e-01,
         -3.3416e-01,  7.9973e+03],
        [-3.5315e-01, -2.7097e-01, -2.6696e-01,  ...,  1.3142e-01,
         -2.4509e-01,  8.4248e+03]], device='cuda:0')
tensor([[0.1250],
        [0.1336],
        [0.1350],
        [0.1351],
        [0.1352],
        [0.1352],
        [0.1352],
        [0.1352],
        [0.1352],
        [0.1352],
        [0.1352],
        [0.1352],
        [0.1352],
        [0.1352],
        [0.1352],
        [0.1352],
        [0.1352],
        [0.1352],
        [0.1352],
        [

valid:  75%|█████████████████████████████████████████████████████████▊                   | 3/4 [00:07<00:02,  2.62s/it]

tensor([[-4.3092e-01, -2.7025e-01, -3.0428e-01,  ...,  1.7351e-01,
         -2.1021e-01,  8.2736e+03],
        [-3.9687e-01, -1.6447e-01, -3.1085e-01,  ...,  2.0987e-01,
         -1.9773e-01,  8.4973e+03],
        [-4.0648e-01, -2.3350e-01, -3.5499e-01,  ...,  1.4228e-01,
         -2.3323e-01,  8.8352e+03],
        ...,
        [-2.5792e-01, -2.7998e-01, -2.9789e-01,  ...,  6.9041e-02,
         -1.5714e-01,  1.1430e+04],
        [-3.6105e-01, -3.1575e-01, -2.2100e-01,  ...,  1.0072e-01,
         -2.3743e-01,  1.1417e+04],
        [-3.4809e-01, -2.3759e-01, -2.0738e-01,  ...,  1.4380e-01,
         -2.2184e-01,  1.1349e+04]], device='cuda:0')
tensor([[0.1250],
        [0.1336],
        [0.1350],
        [0.1351],
        [0.1352],
        [0.1352],
        [0.1352],
        [0.1352],
        [0.1352],
        [0.1352],
        [0.1352],
        [0.1352],
        [0.1352],
        [0.1352],
        [0.1352],
        [0.1352],
        [0.1352],
        [0.1352],
        [0.1352],
        [

valid: 100%|█████████████████████████████████████████████████████████████████████████████| 4/4 [00:08<00:00,  2.03s/it]
ep  8:   0%|                                                                                    | 0/28 [00:00<?, ?it/s]

tensor([[-3.0299e-01, -2.0548e-01, -3.7163e-01,  ...,  2.2204e-01,
         -2.4157e-01,  1.1479e+04],
        [-5.2351e-01, -1.3666e-01, -4.0322e-01,  ...,  1.6678e-01,
         -2.3607e-01,  1.1660e+04],
        [-2.8602e-01, -2.1112e-01, -2.7816e-01,  ...,  2.0929e-01,
         -2.4698e-01,  1.1616e+04],
        ...,
        [-4.1360e-01, -3.0982e-01, -2.5829e-01,  ...,  8.7611e-02,
         -2.1257e-01,  1.1642e+04],
        [-4.2525e-01, -2.9543e-01, -3.0947e-01,  ...,  7.3900e-02,
         -2.2249e-01,  1.1782e+04],
        [-2.6573e-01, -2.9025e-01, -3.4377e-01,  ...,  1.3080e-01,
         -1.8334e-01,  1.1734e+04]], device='cuda:0')
tensor([[0.1250],
        [0.1336],
        [0.1350],
        [0.1351],
        [0.1352],
        [0.1352],
        [0.1352]], device='cuda:0')
valid acc 0.5376884422110553


ep  8:   4%|██▋                                                                         | 1/28 [00:02<01:12,  2.67s/it]

tensor([[-3.9001e-01, -2.3780e-01, -2.8001e-01,  ...,  7.8663e-02,
         -2.4655e-01,  1.3194e+04],
        [-3.6289e-01, -2.4024e-01, -2.1403e-01,  ...,  1.6413e-01,
         -2.2618e-01,  1.6943e+04],
        [-3.1174e-01, -3.0279e-01, -2.5645e-01,  ...,  1.3253e-01,
         -2.7179e-01,  1.6429e+04],
        ...,
        [-3.1032e-01, -2.6466e-01, -3.2507e-01,  ...,  1.0027e-01,
         -2.4126e-01,  1.0818e+04],
        [-3.3237e-01, -2.9726e-01, -2.4500e-01,  ..., -9.7729e-03,
         -2.6323e-01,  1.0214e+04],
        [-3.2797e-01, -3.1709e-01, -2.3700e-01,  ...,  1.9530e-01,
         -2.3785e-01,  1.3036e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[0.0783],
        [0.0830],
        [0.2776],
        [0.0925],
        [0.1638],
        [0.0345],
        [0.2258],
        [0.0805],
        [0.1838],
        [0.0727],
        [0.0866],
        [0.1516],
        [0.1776],
        [0.1159],
        [0.1475],
        [0.1261],
        [0.1339],
        [0.0941],
    

ep  8:   7%|█████▍                                                                      | 2/28 [00:05<01:09,  2.68s/it]

tensor([[-4.2103e-01, -2.5585e-01, -2.9564e-01,  ...,  1.0950e-01,
         -2.4357e-01,  1.5521e+04],
        [-4.0487e-01, -3.0175e-01, -2.4833e-01,  ...,  1.3063e-01,
         -3.4350e-01,  1.2982e+04],
        [-2.7445e-01, -3.2888e-01, -2.4196e-01,  ...,  8.9530e-02,
         -2.4701e-01,  1.0444e+04],
        ...,
        [-3.8635e-01, -2.9413e-01, -2.8776e-01,  ...,  6.0888e-02,
         -1.6599e-01,  8.7993e+03],
        [-3.0793e-01, -2.1403e-01, -3.5144e-01,  ...,  1.2687e-01,
         -1.7847e-01,  1.8128e+04],
        [-4.3031e-01, -2.4337e-01, -2.5550e-01,  ...,  9.8720e-02,
         -2.2380e-01,  1.0498e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 1.4032e-01],
        [ 1.4326e-01],
        [ 2.1168e-01],
        [ 2.5365e-01],
        [ 9.4042e-02],
        [ 3.4708e-02],
        [ 1.3340e-01],
        [ 4.1716e-02],
        [ 1.3498e-01],
        [ 1.8875e-01],
        [ 2.9114e-01],
        [ 1.3104e-01],
        [ 2.0273e-01],
        [ 1.8391e-01],
      

ep  8:  11%|████████▏                                                                   | 3/28 [00:08<01:06,  2.67s/it]

tensor([[-3.4686e-01, -3.0520e-01, -2.4244e-01,  ...,  1.0397e-01,
         -2.5482e-01,  1.6839e+04],
        [-3.9329e-01, -2.8470e-01, -2.7204e-01,  ...,  2.1728e-01,
         -1.7186e-01,  1.2983e+04],
        [-4.3423e-01, -1.7755e-01, -4.5232e-01,  ...,  3.0819e-01,
         -1.5367e-01,  1.6722e+04],
        ...,
        [-3.2536e-01, -2.2106e-01, -2.6107e-01,  ...,  1.0786e-01,
         -2.7994e-01,  1.1707e+04],
        [-3.4616e-01, -2.2965e-01, -2.3060e-01,  ...,  1.9353e-01,
         -2.1995e-01,  1.5915e+04],
        [-3.6566e-01, -3.7994e-01, -2.3862e-01,  ...,  1.8230e-01,
         -2.2119e-01,  1.4296e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0897],
        [ 0.2070],
        [ 0.1161],
        [ 0.2071],
        [ 0.1410],
        [ 0.0872],
        [ 0.0124],
        [ 0.0326],
        [ 0.1320],
        [ 0.1460],
        [ 0.0660],
        [ 0.1367],
        [ 0.1436],
        [ 0.1816],
        [ 0.1748],
        [ 0.1383],
        [ 0.0175],
     

ep  8:  14%|██████████▊                                                                 | 4/28 [00:10<01:04,  2.68s/it]

tensor([[-3.5359e-01, -2.9110e-01, -2.4068e-01,  ...,  2.0031e-01,
         -1.4083e-01,  1.7903e+04],
        [-3.9112e-01, -2.0855e-01, -2.8602e-01,  ...,  1.6830e-01,
         -2.2156e-01,  1.7778e+04],
        [-4.0423e-01, -2.1440e-01, -2.5656e-01,  ...,  1.0007e-01,
         -2.1860e-01,  1.2480e+04],
        ...,
        [-3.8640e-01, -2.5571e-01, -2.4144e-01,  ...,  1.7046e-01,
         -2.0258e-01,  1.6577e+04],
        [-3.6340e-01, -2.5250e-01, -2.7729e-01,  ...,  1.7851e-01,
         -2.5763e-01,  1.5180e+04],
        [-4.0975e-01, -2.9565e-01, -2.4610e-01,  ...,  1.3988e-01,
         -2.9211e-01,  1.1697e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1359],
        [ 0.1112],
        [ 0.0897],
        [ 0.0963],
        [ 0.1765],
        [ 0.0869],
        [ 0.0756],
        [ 0.1666],
        [ 0.0563],
        [ 0.1469],
        [ 0.0970],
        [ 0.1555],
        [ 0.0969],
        [ 0.1802],
        [ 0.0186],
        [ 0.0857],
        [-0.0330],
     

ep  8:  18%|█████████████▌                                                              | 5/28 [00:13<01:01,  2.68s/it]

tensor([[-3.2088e-01, -2.2953e-01, -3.1333e-01,  ...,  1.8128e-01,
         -2.4778e-01,  1.6912e+04],
        [-2.0450e-01, -2.3046e-01, -1.5299e-01,  ...,  1.2820e-01,
         -2.0768e-01,  1.7852e+04],
        [-2.8993e-01, -3.0053e-01, -3.3836e-01,  ...,  1.6820e-01,
         -2.7963e-01,  1.5464e+04],
        ...,
        [-3.2414e-01, -2.5885e-01, -1.9833e-01,  ...,  1.6039e-01,
         -2.3915e-01,  1.6675e+04],
        [-3.9105e-01, -3.5010e-01, -3.3185e-01,  ...,  1.7789e-01,
         -1.9210e-01,  1.8214e+04],
        [-4.0525e-01, -4.3343e-02, -5.5586e-01,  ...,  4.8086e-01,
         -6.1038e-02,  1.0511e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1281],
        [ 0.2544],
        [ 0.1515],
        [ 0.0565],
        [ 0.2145],
        [ 0.1112],
        [ 0.1431],
        [ 0.0628],
        [ 0.0333],
        [ 0.1189],
        [ 0.0607],
        [ 0.0630],
        [ 0.1853],
        [ 0.1579],
        [ 0.0591],
        [ 0.1978],
        [ 0.1520],
     

ep  8:  21%|████████████████▎                                                           | 6/28 [00:16<00:58,  2.66s/it]

tensor([[-3.5395e-01, -2.3362e-01, -2.3866e-01,  ...,  2.1006e-01,
         -2.1701e-01,  1.7780e+04],
        [-2.9869e-01, -3.6268e-01, -2.3666e-01,  ...,  1.3313e-01,
         -2.3222e-01,  1.6425e+04],
        [-3.7012e-01, -2.2977e-01, -1.9848e-01,  ...,  1.2403e-01,
         -2.5759e-01,  1.7990e+04],
        ...,
        [-3.7458e-01, -1.8025e-01, -2.6499e-01,  ...,  8.8469e-02,
         -2.8882e-01,  1.2530e+04],
        [-3.6765e-01, -3.4036e-01, -4.6713e-01,  ...,  2.7458e-01,
         -1.9547e-01,  1.5594e+04],
        [-4.0509e-01, -2.4002e-01, -2.9449e-01,  ...,  1.4454e-01,
         -1.8582e-01,  1.0754e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0206],
        [ 0.1332],
        [ 0.1029],
        [ 0.0135],
        [ 0.1241],
        [-0.0220],
        [ 0.1299],
        [ 0.1849],
        [ 0.0648],
        [ 0.1841],
        [ 0.1281],
        [ 0.0985],
        [ 0.0869],
        [ 0.1952],
        [ 0.1605],
        [ 0.1211],
        [ 0.1630],
     

ep  8:  25%|███████████████████                                                         | 7/28 [00:18<00:55,  2.65s/it]

tensor([[-3.6708e-01, -3.2372e-01, -2.3341e-01,  ...,  2.3283e-01,
         -2.7532e-01,  1.6470e+04],
        [-3.7602e-01, -2.0120e-01, -3.2406e-01,  ...,  1.8356e-01,
         -1.8899e-01,  1.0564e+04],
        [-3.3830e-01, -2.5286e-01, -2.4974e-01,  ...,  5.7808e-02,
         -2.2060e-01,  1.2627e+04],
        ...,
        [-3.3103e-01, -2.6450e-01, -2.6556e-01,  ...,  1.3756e-01,
         -2.6256e-01,  1.6414e+04],
        [-3.5345e-01, -2.1345e-01, -1.8156e-01,  ...,  1.6200e-01,
         -2.5427e-01,  1.7810e+04],
        [-4.0047e-01, -3.0697e-01, -2.3178e-01,  ...,  1.1606e-01,
         -2.6212e-01,  1.2462e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0728],
        [ 0.0867],
        [ 0.1353],
        [ 0.1919],
        [ 0.1077],
        [ 0.4492],
        [ 0.1347],
        [ 0.2385],
        [ 0.2482],
        [ 0.0130],
        [ 0.1316],
        [ 0.2173],
        [ 0.1505],
        [ 0.2036],
        [ 0.0369],
        [ 0.1557],
        [ 0.1564],
     

ep  8:  29%|█████████████████████▋                                                      | 8/28 [00:21<00:52,  2.65s/it]

tensor([[-3.7772e-01, -3.1635e-01, -2.3945e-01,  ...,  9.7310e-02,
         -2.6783e-01,  1.1723e+04],
        [-2.9669e-01, -2.4244e-01, -2.8864e-01,  ...,  1.9634e-01,
         -2.6330e-01,  1.8106e+04],
        [-3.9810e-01, -2.9392e-01, -2.5485e-01,  ...,  8.3829e-02,
         -2.5131e-01,  1.7913e+04],
        ...,
        [-3.7117e-01, -3.2220e-01, -1.8632e-01,  ...,  1.0174e-01,
         -2.3872e-01,  1.2691e+04],
        [-3.3895e-01, -3.0830e-01, -3.0326e-01,  ...,  1.2176e-01,
         -2.9058e-01,  1.3391e+04],
        [-3.4195e-01, -2.3057e-01, -3.0019e-01,  ...,  1.9021e-01,
         -2.1422e-01,  1.0380e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.2866],
        [ 0.0208],
        [ 0.0869],
        [ 0.1668],
        [ 0.1042],
        [ 0.1555],
        [ 0.2191],
        [ 0.1517],
        [ 0.1551],
        [ 0.2356],
        [ 0.1580],
        [ 0.0967],
        [ 0.0853],
        [ 0.1818],
        [ 0.1771],
        [ 0.1482],
        [-0.0224],
     

ep  8:  32%|████████████████████████▍                                                   | 9/28 [00:23<00:50,  2.64s/it]

tensor([[-4.2805e-01, -1.8503e-01, -2.1517e-01,  ...,  4.8825e-02,
         -2.7578e-01,  1.3515e+04],
        [-3.4098e-01, -3.5209e-01, -2.4112e-01,  ...,  1.6557e-01,
         -2.4480e-01,  1.6073e+04],
        [-3.1379e-01, -3.1297e-01, -2.6608e-01,  ...,  4.6902e-02,
         -2.5979e-01,  1.5530e+04],
        ...,
        [-3.1120e-01, -3.5694e-01, -2.6379e-01,  ...,  1.1928e-01,
         -2.4170e-01,  1.8299e+04],
        [-3.2223e-01, -2.5085e-01, -2.6893e-01,  ...,  9.8092e-02,
         -2.0445e-01,  8.3317e+03],
        [-3.3838e-01, -2.3723e-01, -2.6595e-01,  ...,  1.8521e-01,
         -2.9566e-01,  1.7928e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1297],
        [ 0.1983],
        [ 0.2026],
        [ 0.0476],
        [ 0.0363],
        [ 0.2195],
        [ 0.2117],
        [ 0.2457],
        [-0.0361],
        [ 0.1465],
        [ 0.1504],
        [ 0.2243],
        [ 0.1230],
        [ 0.2228],
        [ 0.1560],
        [ 0.0670],
        [ 0.1625],
     

ep  8:  36%|██████████████████████████▊                                                | 10/28 [00:26<00:47,  2.65s/it]

tensor([[-5.1620e-01, -2.9768e-01, -3.2489e-01,  ...,  2.0508e-01,
         -1.9727e-01,  1.0699e+04],
        [-2.6688e-01, -3.4098e-01, -2.8690e-01,  ...,  1.1983e-01,
         -2.6482e-01,  1.2198e+04],
        [-3.5298e-01, -1.6399e-01, -2.4613e-01,  ...,  2.3685e-01,
         -1.5084e-01,  1.6346e+04],
        ...,
        [-3.9190e-01, -2.6842e-01, -2.6199e-01,  ...,  1.0918e-01,
         -2.3849e-01,  1.3074e+04],
        [-3.2394e-01, -2.9974e-01, -2.9536e-01,  ...,  5.6627e-02,
         -2.3812e-01,  1.2653e+04],
        [-3.5264e-01, -2.4263e-01, -2.7048e-01,  ...,  1.2647e-01,
         -2.7143e-01,  1.3208e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[0.0423],
        [0.0336],
        [0.2398],
        [0.1343],
        [0.1231],
        [0.2420],
        [0.1359],
        [0.2445],
        [0.1401],
        [0.3201],
        [0.1710],
        [0.1305],
        [0.1657],
        [0.0271],
        [0.2544],
        [0.2177],
        [0.1012],
        [0.0519],
    

ep  8:  39%|█████████████████████████████▍                                             | 11/28 [00:29<00:45,  2.65s/it]

tensor([[-3.0625e-01, -3.3206e-01, -2.1629e-01,  ...,  2.3521e-02,
         -2.7844e-01,  1.8068e+04],
        [-4.0505e-01, -2.6577e-01, -2.7486e-01,  ...,  1.9824e-01,
         -2.6278e-01,  1.4932e+04],
        [-4.3183e-01, -2.7388e-01, -2.7309e-01,  ...,  1.0821e-01,
         -2.5013e-01,  9.0707e+03],
        ...,
        [-3.9720e-01, -2.9734e-01, -2.8515e-01,  ...,  8.4706e-02,
         -2.0492e-01,  1.4995e+04],
        [-3.1398e-01, -2.9709e-01, -1.8814e-01,  ...,  1.3986e-01,
         -3.1197e-01,  1.3171e+04],
        [-2.9563e-01, -3.4143e-01, -2.9219e-01,  ...,  1.2291e-01,
         -2.6646e-01,  1.2240e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1674],
        [ 0.0457],
        [ 0.1544],
        [ 0.1506],
        [ 0.2340],
        [ 0.0156],
        [ 0.1836],
        [ 0.1377],
        [ 0.0101],
        [ 0.0633],
        [ 0.0217],
        [ 0.0400],
        [ 0.0703],
        [ 0.1437],
        [ 0.0985],
        [ 0.1981],
        [ 0.1368],
     

ep  8:  43%|████████████████████████████████▏                                          | 12/28 [00:31<00:42,  2.67s/it]

tensor([[-3.3929e-01, -3.2639e-01, -3.1384e-01,  ...,  1.3083e-01,
         -2.0967e-01,  1.0464e+04],
        [-3.9370e-01, -2.5807e-01, -3.3574e-01,  ...,  9.2763e-02,
         -1.7573e-01,  1.4676e+04],
        [-3.9161e-01,  8.7244e-04, -6.4915e-01,  ...,  4.5016e-01,
          3.7049e-02,  1.5011e+04],
        ...,
        [-4.0473e-01, -3.3952e-01, -2.0683e-01,  ...,  1.1099e-01,
         -2.0715e-01,  1.5133e+04],
        [-3.5866e-01, -2.3821e-01, -2.5212e-01,  ...,  1.0440e-01,
         -2.8745e-01,  1.1370e+04],
        [-3.5410e-01, -2.5532e-01, -2.8729e-01,  ...,  9.8469e-02,
         -2.7305e-01,  1.2605e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 2.2501e-01],
        [-1.6880e-02],
        [ 1.2072e-01],
        [ 1.2421e-01],
        [ 9.1000e-03],
        [ 1.9364e-01],
        [ 1.1336e-01],
        [ 1.1017e-01],
        [ 1.2742e-01],
        [ 1.3792e-01],
        [ 1.8907e-01],
        [ 2.2010e-01],
        [ 1.3309e-01],
        [ 1.9948e-01],
      

ep  8:  46%|██████████████████████████████████▊                                        | 13/28 [00:34<00:39,  2.65s/it]

tensor([[-3.4970e-01, -1.4456e-01, -3.3407e-01,  ...,  1.1422e-01,
         -2.1463e-01,  1.1193e+04],
        [-4.4066e-01, -2.7311e-01, -2.8815e-01,  ...,  1.1339e-01,
         -2.0396e-01,  1.4825e+04],
        [-3.5871e-01, -2.1447e-01, -1.9529e-01,  ...,  1.0849e-01,
         -2.7851e-01,  1.8011e+04],
        ...,
        [-2.2617e-01, -4.1990e-01, -1.8280e-01,  ..., -1.2929e-01,
         -3.1821e-01,  1.1998e+04],
        [-2.9450e-01, -2.1380e-01, -3.0742e-01,  ...,  1.7520e-01,
         -2.0277e-01,  1.6583e+04],
        [-3.6242e-01, -2.8157e-01, -2.6693e-01,  ...,  1.6457e-01,
         -3.1214e-01,  1.7231e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1515],
        [ 0.1059],
        [ 0.1826],
        [ 0.1594],
        [ 0.2293],
        [ 0.0591],
        [ 0.2241],
        [ 0.2456],
        [ 0.1413],
        [ 0.1624],
        [ 0.2059],
        [ 0.0743],
        [ 0.2276],
        [ 0.1284],
        [ 0.1308],
        [ 0.1120],
        [ 0.0820],
     

ep  8:  50%|█████████████████████████████████████▌                                     | 14/28 [00:37<00:37,  2.65s/it]

tensor([[-3.1785e-01, -3.0195e-01, -3.1306e-01,  ...,  1.0944e-01,
         -2.8388e-01,  1.2021e+04],
        [-3.1880e-01, -2.3801e-01, -2.7438e-01,  ...,  1.5946e-01,
         -2.3975e-01,  1.2570e+04],
        [-3.7143e-01, -3.0776e-01, -2.4387e-01,  ...,  1.6023e-01,
         -2.6944e-01,  1.1126e+04],
        ...,
        [-3.8769e-01, -2.8092e-01, -2.1947e-01,  ...,  2.1149e-01,
         -2.3613e-01,  1.7213e+04],
        [-3.6240e-01, -3.0628e-01, -2.5663e-01,  ...,  1.5372e-01,
         -2.4302e-01,  1.4550e+04],
        [-4.1336e-01, -2.6589e-01, -2.7680e-01,  ...,  1.8086e-01,
         -2.5450e-01,  1.2806e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[0.0448],
        [0.0800],
        [0.1834],
        [0.0694],
        [0.0319],
        [0.1114],
        [0.1271],
        [0.1922],
        [0.1719],
        [0.1903],
        [0.1370],
        [0.3167],
        [0.1646],
        [0.1807],
        [0.1664],
        [0.1388],
        [0.1815],
        [0.0334],
    

ep  8:  54%|████████████████████████████████████████▏                                  | 15/28 [00:39<00:34,  2.65s/it]

tensor([[-3.4264e-01, -2.0829e-01, -2.7175e-01,  ...,  1.8134e-01,
         -1.9134e-01,  1.8000e+04],
        [-3.7303e-01, -2.6492e-01, -3.6812e-01,  ...,  2.5513e-01,
         -2.6033e-01,  1.1866e+04],
        [-4.0702e-01, -2.7225e-01, -2.7831e-01,  ...,  9.6236e-02,
         -3.1339e-01,  1.6109e+04],
        ...,
        [-2.7821e-01, -2.3811e-01, -2.2439e-01,  ...,  8.5796e-02,
         -2.5010e-01,  1.2987e+04],
        [-4.1270e-01, -2.4804e-01, -2.9193e-01,  ...,  1.4030e-01,
         -2.2064e-01,  1.6640e+04],
        [-4.3886e-01, -2.6075e-01, -2.9995e-01,  ...,  1.6046e-01,
         -1.9585e-01,  1.7730e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1760],
        [ 0.1689],
        [ 0.1561],
        [ 0.1896],
        [ 0.1536],
        [ 0.0957],
        [ 0.2176],
        [ 0.2693],
        [ 0.2268],
        [ 0.0986],
        [ 0.0906],
        [ 0.1663],
        [ 0.1177],
        [ 0.2486],
        [ 0.1830],
        [-0.0524],
        [-0.0092],
     

ep  8:  57%|██████████████████████████████████████████▊                                | 16/28 [00:42<00:31,  2.65s/it]

tensor([[-3.6387e-01, -3.1218e-01, -3.5990e-01,  ...,  1.9568e-01,
         -1.9268e-01,  1.5844e+04],
        [-4.3154e-01, -2.2596e-01, -2.4224e-01,  ...,  1.7318e-01,
         -1.9913e-01,  1.1823e+04],
        [-2.9519e-01, -1.9977e-01, -2.3093e-01,  ...,  2.1168e-01,
         -2.0507e-01,  1.7535e+04],
        ...,
        [-3.1728e-01, -2.4756e-01, -2.3630e-01,  ..., -4.3600e-03,
         -2.9286e-01,  1.2835e+04],
        [-4.0716e-01, -2.6524e-01, -2.8636e-01,  ...,  1.2351e-01,
         -2.6950e-01,  1.3176e+04],
        [-4.5306e-01, -2.7869e-01, -3.0196e-01,  ...,  2.7409e-01,
         -1.1064e-01,  1.5373e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0713],
        [ 0.0871],
        [ 0.2149],
        [ 0.0716],
        [ 0.0996],
        [ 0.1722],
        [ 0.1980],
        [ 0.1250],
        [ 0.1523],
        [ 0.2060],
        [ 0.0841],
        [ 0.3000],
        [-0.0068],
        [ 0.0159],
        [ 0.1610],
        [ 0.2404],
        [ 0.1293],
     

ep  8:  61%|█████████████████████████████████████████████▌                             | 17/28 [00:45<00:29,  2.66s/it]

tensor([[-3.3863e-01, -2.6541e-01, -3.0847e-01,  ...,  1.1149e-01,
         -2.2171e-01,  1.3987e+04],
        [-3.9690e-01, -2.9280e-01, -3.0419e-01,  ...,  1.5172e-01,
         -2.0801e-01,  1.7899e+04],
        [-4.0715e-01, -3.1949e-01, -2.5746e-01,  ...,  1.3589e-01,
         -2.8805e-01,  1.4810e+04],
        ...,
        [-3.9822e-01, -3.0698e-01, -3.0965e-01,  ...,  1.4726e-01,
         -2.7643e-01,  8.4384e+03],
        [-3.4866e-01, -3.2099e-01, -2.9788e-01,  ...,  1.3172e-01,
         -1.9866e-01,  1.7767e+04],
        [-4.0026e-01, -3.3545e-01, -2.6718e-01,  ...,  2.2542e-01,
         -2.0691e-01,  1.6491e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0689],
        [ 0.1232],
        [ 0.2354],
        [ 0.2438],
        [ 0.1510],
        [ 0.1158],
        [ 0.0635],
        [ 0.2462],
        [ 0.2257],
        [ 0.0829],
        [ 0.1493],
        [ 0.1893],
        [ 0.2031],
        [ 0.1606],
        [ 0.1675],
        [ 0.0603],
        [ 0.0589],
     

ep  8:  64%|████████████████████████████████████████████████▏                          | 18/28 [00:47<00:26,  2.65s/it]

tensor([[-3.8700e-01, -2.5806e-01, -2.4642e-01,  ...,  8.1196e-02,
         -2.0085e-01,  1.0940e+04],
        [-3.5594e-01, -2.9147e-01, -2.9471e-01,  ...,  1.5151e-01,
         -1.5995e-01,  1.7763e+04],
        [-3.0240e-01, -2.4868e-01, -3.8874e-01,  ...,  2.2390e-01,
         -2.7084e-01,  9.7312e+03],
        ...,
        [-3.0505e-01, -2.6449e-01, -1.9916e-01,  ...,  1.3593e-01,
         -2.8050e-01,  1.5699e+04],
        [-3.0635e-01, -2.2245e-01, -1.8073e-01,  ...,  1.5041e-01,
         -2.0460e-01,  1.6925e+04],
        [-3.4652e-01, -2.9041e-01, -2.6480e-01,  ...,  2.2792e-01,
         -2.5364e-01,  1.7384e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1143],
        [ 0.0984],
        [ 0.1261],
        [ 0.1351],
        [ 0.0247],
        [ 0.1398],
        [ 0.1814],
        [ 0.0674],
        [ 0.1951],
        [ 0.1477],
        [ 0.1702],
        [ 0.1170],
        [ 0.2448],
        [ 0.1594],
        [ 0.1440],
        [ 0.0826],
        [ 0.1543],
     

ep  8:  68%|██████████████████████████████████████████████████▉                        | 19/28 [00:50<00:23,  2.65s/it]

tensor([[-2.9772e-01, -2.5351e-01, -2.4807e-01,  ...,  8.1133e-02,
         -2.1788e-01,  9.4973e+03],
        [-3.7066e-01, -2.2727e-01, -2.0458e-01,  ...,  7.2518e-02,
         -2.7090e-01,  1.3597e+04],
        [-3.5415e-01, -2.1510e-01, -2.2039e-01,  ...,  2.1293e-01,
         -2.1816e-01,  1.6070e+04],
        ...,
        [-3.8375e-01, -3.5576e-01, -2.4437e-01,  ...,  1.4666e-01,
         -2.0514e-01,  1.4579e+04],
        [-3.9394e-01, -3.2345e-01, -2.6753e-01,  ...,  1.1090e-01,
         -2.0170e-01,  1.4988e+04],
        [-3.3620e-01, -1.6591e-01, -3.3645e-01,  ...,  1.8875e-01,
         -2.6149e-01,  1.1191e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1120],
        [ 0.0496],
        [ 0.1050],
        [ 0.1294],
        [ 0.1035],
        [ 0.0323],
        [ 0.1417],
        [ 0.2056],
        [ 0.1583],
        [ 0.1773],
        [ 0.1450],
        [ 0.1832],
        [ 0.2211],
        [ 0.1283],
        [ 0.0125],
        [ 0.2329],
        [ 0.1027],
     

ep  8:  71%|█████████████████████████████████████████████████████▌                     | 20/28 [00:53<00:21,  2.64s/it]

tensor([[-4.1684e-01, -2.1007e-01, -2.1573e-01,  ...,  1.5244e-01,
         -2.2402e-01,  1.7891e+04],
        [-4.4406e-01, -2.0456e-01, -2.9012e-01,  ...,  1.6277e-01,
         -3.0401e-01,  1.1838e+04],
        [-4.0281e-01, -2.4027e-01, -2.4441e-01,  ...,  1.9044e-01,
         -2.6107e-01,  1.5883e+04],
        ...,
        [-3.6719e-01, -2.5717e-01, -3.3993e-01,  ...,  1.2309e-01,
         -1.5817e-01,  1.2602e+04],
        [-4.3401e-01, -2.0826e-01, -3.1283e-01,  ...,  2.0738e-01,
         -2.3788e-01,  1.2908e+04],
        [-4.4543e-01, -3.1021e-01, -3.3417e-01,  ...,  1.3935e-01,
         -2.2452e-01,  9.5393e+03]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1402],
        [ 0.1053],
        [ 0.0748],
        [ 0.0682],
        [ 0.2325],
        [ 0.0679],
        [-0.0113],
        [ 0.1646],
        [ 0.1767],
        [ 0.2797],
        [ 0.1690],
        [ 0.0367],
        [ 0.1086],
        [ 0.1198],
        [ 0.1236],
        [ 0.1656],
        [ 0.1606],
     

ep  8:  75%|████████████████████████████████████████████████████████▎                  | 21/28 [00:55<00:18,  2.65s/it]

tensor([[-3.3582e-01, -2.4971e-01, -2.5818e-01,  ...,  1.3553e-01,
         -3.0568e-01,  1.4819e+04],
        [-2.6577e-01, -2.0148e-01, -2.7029e-01,  ...,  2.4395e-01,
         -1.5022e-01,  1.7977e+04],
        [-3.5107e-01, -2.7279e-01, -1.7334e-01,  ...,  1.1278e-01,
         -2.4311e-01,  1.3264e+04],
        ...,
        [-3.2757e-01, -2.2391e-01, -2.5303e-01,  ...,  3.2675e-02,
         -2.7579e-01,  1.2878e+04],
        [-3.7225e-01, -3.2075e-01, -2.2463e-01,  ...,  7.6983e-02,
         -2.6775e-01,  1.3326e+04],
        [-3.6669e-01, -2.3129e-01, -3.3963e-01,  ...,  1.3087e-01,
         -2.1291e-01,  1.1144e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0518],
        [ 0.1119],
        [ 0.1969],
        [ 0.0875],
        [ 0.1202],
        [ 0.1386],
        [ 0.0274],
        [ 0.0563],
        [ 0.1508],
        [ 0.1312],
        [ 0.1083],
        [ 0.0381],
        [ 0.0691],
        [ 0.1877],
        [ 0.1591],
        [ 0.1310],
        [ 0.0444],
     

ep  8:  79%|██████████████████████████████████████████████████████████▉                | 22/28 [00:58<00:15,  2.65s/it]

tensor([[-2.9876e-01, -2.7545e-01, -2.1533e-01,  ...,  1.4495e-01,
         -2.9429e-01,  1.8312e+04],
        [-3.8717e-01, -2.3963e-01, -2.8286e-01,  ...,  1.5296e-01,
         -2.4989e-01,  1.6180e+04],
        [-3.1632e-01, -2.1924e-01, -2.8960e-01,  ...,  2.8670e-01,
         -1.2413e-01,  1.6714e+04],
        ...,
        [-3.2479e-01, -3.1200e-01, -2.4513e-01,  ...,  1.2827e-01,
         -1.8956e-01,  1.6695e+04],
        [-3.9046e-01, -3.2551e-01, -2.0009e-01,  ...,  1.3907e-01,
         -1.9667e-01,  1.7977e+04],
        [-3.8744e-01, -1.9512e-01, -2.7233e-01,  ...,  7.3777e-02,
         -1.9790e-01,  1.0913e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[0.1682],
        [0.1410],
        [0.1479],
        [0.0867],
        [0.0708],
        [0.0588],
        [0.1325],
        [0.2131],
        [0.2930],
        [0.0158],
        [0.1137],
        [0.0629],
        [0.0441],
        [0.0916],
        [0.0097],
        [0.1549],
        [0.1740],
        [0.1325],
    

ep  8:  82%|█████████████████████████████████████████████████████████████▌             | 23/28 [01:01<00:13,  2.65s/it]

tensor([[-3.2826e-01, -2.7453e-01, -1.7587e-01,  ...,  8.9210e-02,
         -2.8136e-01,  1.1269e+04],
        [-3.6203e-01, -3.7601e-01, -2.7310e-01,  ...,  1.3336e-01,
         -2.3560e-01,  1.0383e+04],
        [-4.2451e-01, -2.5996e-01, -2.8474e-01,  ...,  9.1273e-02,
         -2.3376e-01,  1.0812e+04],
        ...,
        [-3.1818e-01, -2.4180e-01, -3.4216e-01,  ...,  1.7456e-01,
         -2.2672e-01,  1.0298e+04],
        [-3.8438e-01, -1.8772e-02, -5.4449e-01,  ...,  3.9325e-01,
         -2.2451e-01,  1.1133e+04],
        [-3.7635e-01, -2.8437e-01, -2.3309e-01,  ...,  1.4942e-01,
         -2.4063e-01,  1.8076e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1300],
        [ 0.0961],
        [ 0.0411],
        [ 0.1044],
        [ 0.0887],
        [ 0.0940],
        [ 0.1882],
        [ 0.0755],
        [ 0.1112],
        [ 0.1478],
        [ 0.0785],
        [ 0.1163],
        [ 0.0731],
        [ 0.0283],
        [ 0.0956],
        [ 0.1173],
        [ 0.1294],
     

ep  8:  86%|████████████████████████████████████████████████████████████████▎          | 24/28 [01:03<00:10,  2.64s/it]

tensor([[-3.0360e-01, -2.2114e-01, -4.1976e-01,  ...,  3.0012e-01,
         -8.6845e-02,  8.7645e+03],
        [-3.4672e-01, -2.4180e-01, -3.1148e-01,  ...,  8.9279e-02,
         -2.5598e-01,  1.0860e+04],
        [-3.8312e-01, -2.6794e-01, -2.3142e-01,  ...,  1.1407e-01,
         -2.3679e-01,  1.1494e+04],
        ...,
        [-4.0899e-01, -2.3999e-01, -3.0928e-01,  ...,  1.4118e-01,
         -2.4670e-01,  1.0255e+04],
        [-4.1441e-01, -2.3040e-01, -3.0592e-01,  ...,  1.1131e-01,
         -2.8487e-01,  1.3010e+04],
        [-3.7493e-01, -1.3067e-01, -2.8090e-01,  ...,  2.8585e-01,
         -1.4647e-01,  1.7661e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0338],
        [ 0.2598],
        [ 0.1457],
        [ 0.1689],
        [ 0.1669],
        [ 0.1347],
        [ 0.1509],
        [ 0.0862],
        [ 0.1642],
        [ 0.2041],
        [ 0.1171],
        [ 0.2380],
        [ 0.0136],
        [ 0.0068],
        [ 0.0895],
        [ 0.0515],
        [ 0.2529],
     

ep  8:  89%|██████████████████████████████████████████████████████████████████▉        | 25/28 [01:06<00:07,  2.65s/it]

tensor([[-3.9891e-01, -2.6663e-01, -3.2669e-01,  ...,  1.9346e-01,
         -2.0591e-01,  1.6912e+04],
        [-3.8263e-01, -3.0440e-01, -3.2313e-01,  ...,  1.3054e-01,
         -2.6743e-01,  1.3009e+04],
        [-2.9785e-01, -1.9677e-01, -2.1012e-01,  ...,  1.0743e-01,
         -3.4530e-01,  1.1560e+04],
        ...,
        [-4.0232e-01, -2.5939e-01, -3.0630e-01,  ...,  1.5544e-01,
         -1.9869e-01,  9.7127e+03],
        [-2.8922e-01, -2.9200e-01, -2.0554e-01,  ...,  2.0597e-01,
         -2.8366e-01,  1.7603e+04],
        [-3.9792e-01, -2.4751e-01, -2.5154e-01,  ...,  2.1127e-01,
         -1.6999e-01,  1.7664e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[0.1550],
        [0.1724],
        [0.1682],
        [0.1074],
        [0.0790],
        [0.2197],
        [0.1494],
        [0.0889],
        [0.0726],
        [0.0596],
        [0.1366],
        [0.1482],
        [0.1562],
        [0.0961],
        [0.0782],
        [0.1940],
        [0.1691],
        [0.0586],
    

ep  8:  93%|█████████████████████████████████████████████████████████████████████▋     | 26/28 [01:09<00:05,  2.65s/it]

tensor([[-2.9212e-01, -2.5928e-01, -2.7701e-01,  ...,  5.6322e-02,
         -2.7968e-01,  1.0662e+04],
        [-3.2101e-01, -2.5890e-01, -2.5667e-01,  ...,  1.4936e-01,
         -2.1622e-01,  1.2150e+04],
        [-3.8785e-01, -2.3561e-01, -2.6039e-01,  ...,  1.8710e-01,
         -2.2448e-01,  1.0044e+04],
        ...,
        [-4.0319e-01, -2.4317e-01, -2.0209e-01,  ...,  1.2309e-01,
         -2.1658e-01,  1.1570e+04],
        [-3.8263e-01, -1.5271e-01, -3.8456e-01,  ...,  1.8018e-01,
         -1.7891e-01,  8.5047e+03],
        [-2.9305e-01, -2.5986e-01, -2.8434e-01,  ...,  8.6801e-02,
         -2.8060e-01,  1.1483e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0512],
        [ 0.1836],
        [ 0.0475],
        [ 0.0671],
        [ 0.1202],
        [ 0.1593],
        [ 0.2128],
        [ 0.1962],
        [ 0.0933],
        [ 0.3174],
        [ 0.0528],
        [ 0.2174],
        [ 0.1830],
        [ 0.1236],
        [-0.0366],
        [ 0.1697],
        [ 0.2323],
     

ep  8:  96%|████████████████████████████████████████████████████████████████████████▎  | 27/28 [01:11<00:02,  2.66s/it]

tensor([[-3.1737e-01, -2.9809e-01, -2.5242e-01,  ...,  9.2956e-02,
         -1.8159e-01,  1.0434e+04],
        [-3.5353e-01, -2.5612e-01, -2.5854e-01,  ...,  1.6687e-01,
         -2.7015e-01,  1.0067e+04],
        [-2.3054e-01, -2.2130e-01, -2.5750e-01,  ...,  1.1368e-01,
         -2.6544e-01,  1.7920e+04],
        ...,
        [-4.2721e-01, -3.0027e-01, -2.6535e-01,  ...,  1.5453e-01,
         -1.8394e-01,  1.0318e+04],
        [-3.4417e-01, -2.0074e-01, -2.6702e-01,  ...,  1.4035e-01,
         -2.4776e-01,  1.0949e+04],
        [-3.4756e-01, -1.9895e-01, -2.5337e-01,  ...,  1.8507e-01,
         -2.2741e-01,  1.6374e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1869],
        [ 0.1035],
        [ 0.1163],
        [ 0.1718],
        [ 0.0333],
        [ 0.2848],
        [ 0.2852],
        [ 0.1409],
        [ 0.1401],
        [ 0.1410],
        [ 0.3110],
        [ 0.1359],
        [ 0.1180],
        [ 0.1261],
        [ 0.1605],
        [ 0.1092],
        [ 0.1341],
     

ep  8: 100%|███████████████████████████████████████████████████████████████████████████| 28/28 [01:14<00:00,  2.65s/it]
valid:   0%|                                                                                     | 0/4 [00:00<?, ?it/s]

tensor([[-3.5486e-01, -1.6327e-01, -3.8985e-01,  ...,  3.0165e-01,
         -1.5519e-01,  1.0010e+04],
        [-3.4835e-01, -2.7303e-01, -3.3321e-01,  ...,  1.4257e-01,
         -2.6973e-01,  1.1732e+04],
        [-3.8996e-01, -2.6153e-01, -3.2837e-01,  ...,  2.3245e-01,
         -2.2377e-01,  1.2496e+04],
        ...,
        [-3.4922e-01, -2.9487e-01, -2.6618e-01,  ...,  1.6611e-01,
         -2.0964e-01,  1.5335e+04],
        [-3.2133e-01, -3.0726e-01, -2.2775e-01,  ...,  1.1292e-01,
         -1.7678e-01,  1.7615e+04],
        [-3.8389e-01, -2.8826e-01, -3.0662e-01,  ...,  1.8645e-01,
         -1.4296e-01,  1.6944e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0708],
        [-0.0385],
        [ 0.1281],
        [ 0.1984],
        [ 0.1046],
        [ 0.0541],
        [ 0.1890],
        [ 0.1589],
        [ 0.0872],
        [ 0.1438],
        [ 0.1642],
        [ 0.1113],
        [ 0.0018],
        [ 0.1990],
        [ 0.2331],
        [ 0.1276],
        [ 0.1551],
     

valid:  25%|███████████████████▎                                                         | 1/4 [00:02<00:07,  2.61s/it]

tensor([[-3.2233e-01, -2.8638e-01, -2.5438e-01,  ...,  1.1127e-01,
         -2.4928e-01,  8.2773e+03],
        [-3.6531e-01, -1.8541e-01, -3.2476e-01,  ...,  9.8329e-02,
         -2.3989e-01,  8.2921e+03],
        [-3.7271e-01, -3.1051e-01, -2.6006e-01,  ...,  1.0789e-01,
         -1.8490e-01,  8.4220e+03],
        ...,
        [-3.7780e-01, -2.1708e-01, -3.6814e-01,  ...,  2.7568e-01,
         -1.1368e-01,  7.2709e+03],
        [-3.9939e-01, -2.8329e-01, -2.7683e-01,  ...,  1.3335e-01,
         -2.4340e-01,  7.3509e+03],
        [-3.1813e-01, -1.7252e-01, -3.3864e-01,  ...,  1.8896e-01,
         -2.7303e-01,  7.1148e+03]], device='cuda:0')
tensor([[0.1249],
        [0.1336],
        [0.1349],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [

valid:  50%|██████████████████████████████████████▌                                      | 2/4 [00:05<00:05,  2.60s/it]

tensor([[-2.6459e-01, -1.9816e-01, -2.3166e-01,  ...,  4.3366e-02,
         -2.4275e-01,  7.3657e+03],
        [-4.4351e-01, -1.6997e-01, -2.4599e-01,  ...,  5.0830e-02,
         -2.2045e-01,  7.4660e+03],
        [-3.6358e-01, -2.7964e-01, -2.1717e-01,  ...,  1.4754e-01,
         -2.0238e-01,  7.5556e+03],
        ...,
        [-3.0289e-01, -2.8608e-01, -3.0153e-01,  ...,  9.7022e-02,
         -2.0641e-01,  7.5523e+03],
        [-3.2801e-01, -2.4983e-01, -2.3538e-01,  ...,  1.0617e-01,
         -3.3416e-01,  7.9973e+03],
        [-3.5315e-01, -2.7097e-01, -2.6696e-01,  ...,  1.3142e-01,
         -2.4509e-01,  8.4248e+03]], device='cuda:0')
tensor([[0.1249],
        [0.1336],
        [0.1349],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [

valid:  75%|█████████████████████████████████████████████████████████▊                   | 3/4 [00:07<00:02,  2.60s/it]

tensor([[-4.3092e-01, -2.7025e-01, -3.0428e-01,  ...,  1.7351e-01,
         -2.1021e-01,  8.2736e+03],
        [-3.9687e-01, -1.6447e-01, -3.1085e-01,  ...,  2.0987e-01,
         -1.9773e-01,  8.4973e+03],
        [-4.0648e-01, -2.3350e-01, -3.5499e-01,  ...,  1.4228e-01,
         -2.3323e-01,  8.8352e+03],
        ...,
        [-2.5792e-01, -2.7998e-01, -2.9789e-01,  ...,  6.9041e-02,
         -1.5714e-01,  1.1430e+04],
        [-3.6105e-01, -3.1575e-01, -2.2100e-01,  ...,  1.0072e-01,
         -2.3743e-01,  1.1417e+04],
        [-3.4809e-01, -2.3759e-01, -2.0738e-01,  ...,  1.4380e-01,
         -2.2184e-01,  1.1349e+04]], device='cuda:0')
tensor([[0.1249],
        [0.1336],
        [0.1349],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351],
        [

valid: 100%|█████████████████████████████████████████████████████████████████████████████| 4/4 [00:08<00:00,  2.02s/it]
ep  9:   0%|                                                                                    | 0/28 [00:00<?, ?it/s]

tensor([[-3.0299e-01, -2.0548e-01, -3.7163e-01,  ...,  2.2204e-01,
         -2.4157e-01,  1.1479e+04],
        [-5.2351e-01, -1.3666e-01, -4.0322e-01,  ...,  1.6678e-01,
         -2.3607e-01,  1.1660e+04],
        [-2.8602e-01, -2.1112e-01, -2.7816e-01,  ...,  2.0929e-01,
         -2.4698e-01,  1.1616e+04],
        ...,
        [-4.1360e-01, -3.0982e-01, -2.5829e-01,  ...,  8.7611e-02,
         -2.1257e-01,  1.1642e+04],
        [-4.2525e-01, -2.9543e-01, -3.0947e-01,  ...,  7.3900e-02,
         -2.2249e-01,  1.1782e+04],
        [-2.6573e-01, -2.9025e-01, -3.4377e-01,  ...,  1.3080e-01,
         -1.8334e-01,  1.1734e+04]], device='cuda:0')
tensor([[0.1249],
        [0.1336],
        [0.1349],
        [0.1351],
        [0.1351],
        [0.1351],
        [0.1351]], device='cuda:0')
valid acc 0.5376884422110553


ep  9:   4%|██▋                                                                         | 1/28 [00:02<01:12,  2.67s/it]

tensor([[-3.7310e-01, -3.0284e-01, -2.9060e-01,  ...,  8.5714e-02,
         -1.7118e-01,  1.2506e+04],
        [-4.1505e-01, -3.0561e-01, -3.0559e-01,  ...,  1.7189e-01,
         -2.8727e-01,  1.1578e+04],
        [-4.0679e-01, -2.6765e-01, -2.8862e-01,  ...,  2.0217e-01,
         -2.5043e-01,  1.2960e+04],
        ...,
        [-4.3772e-01,  9.9345e-03, -5.9190e-01,  ...,  5.7580e-01,
         -9.5587e-02,  1.0120e+04],
        [-4.4440e-01, -2.2733e-01, -2.3925e-01,  ...,  1.5797e-01,
         -2.1545e-01,  1.0742e+04],
        [-4.8348e-01, -3.0758e-01, -2.5925e-01,  ...,  1.0323e-01,
         -2.4398e-01,  1.6086e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[0.0641],
        [0.1125],
        [0.1173],
        [0.0739],
        [0.2271],
        [0.0984],
        [0.1431],
        [0.1693],
        [0.0780],
        [0.0669],
        [0.0671],
        [0.1261],
        [0.1523],
        [0.2419],
        [0.1059],
        [0.1258],
        [0.2138],
        [0.2593],
    

ep  9:   7%|█████▍                                                                      | 2/28 [00:05<01:08,  2.65s/it]

tensor([[-3.3166e-01, -2.4775e-01, -2.7082e-01,  ...,  1.3740e-01,
         -2.7136e-01,  1.2090e+04],
        [-3.4589e-01, -1.9470e-01, -2.8189e-01,  ...,  1.9842e-01,
         -2.3364e-01,  9.9400e+03],
        [-2.0458e-01, -1.4205e-01, -2.5593e-01,  ...,  1.6041e-01,
         -2.2777e-01,  1.7061e+04],
        ...,
        [-3.5078e-01, -2.7579e-01, -2.3494e-01,  ...,  9.0931e-02,
         -2.7445e-01,  1.3172e+04],
        [-3.4232e-01, -2.5003e-01, -3.9384e-01,  ...,  3.0967e-01,
         -2.2447e-01,  1.7885e+04],
        [-3.2208e-01, -1.6071e-01, -2.1917e-01,  ...,  1.3239e-01,
         -2.0294e-01,  1.7805e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1946],
        [ 0.1477],
        [ 0.0920],
        [ 0.1136],
        [ 0.2703],
        [ 0.1289],
        [-0.0122],
        [ 0.1982],
        [ 0.0527],
        [ 0.0904],
        [ 0.1096],
        [ 0.1441],
        [ 0.1038],
        [ 0.0712],
        [ 0.2217],
        [ 0.1342],
        [-0.0041],
     

ep  9:  11%|████████▏                                                                   | 3/28 [00:07<01:06,  2.65s/it]

tensor([[-3.7158e-01, -2.4812e-01, -2.9887e-01,  ...,  2.1151e-01,
         -2.0637e-01,  1.6066e+04],
        [-3.4818e-01, -3.0913e-01, -2.0225e-01,  ...,  2.0458e-01,
         -2.9903e-01,  1.2554e+04],
        [-3.2163e-01, -3.1684e-01, -1.8236e-01,  ...,  2.0404e-01,
         -2.2959e-01,  1.7191e+04],
        ...,
        [-3.7151e-01, -2.8559e-01, -2.4777e-01,  ...,  1.0498e-01,
         -2.7297e-01,  1.7603e+04],
        [-3.4082e-01, -2.9792e-01, -2.5665e-01,  ...,  1.5300e-01,
         -2.8293e-01,  1.0841e+04],
        [-3.8351e-01, -2.0453e-01, -2.5638e-01,  ...,  5.8612e-02,
         -2.2195e-01,  1.3346e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1464],
        [ 0.1660],
        [ 0.1652],
        [ 0.0710],
        [ 0.1973],
        [ 0.1061],
        [ 0.0324],
        [ 0.1503],
        [ 0.0300],
        [ 0.1508],
        [ 0.2605],
        [ 0.1066],
        [ 0.0522],
        [ 0.1650],
        [ 0.0835],
        [ 0.1701],
        [ 0.1295],
     

ep  9:  14%|██████████▊                                                                 | 4/28 [00:10<01:03,  2.64s/it]

tensor([[-4.1822e-01, -2.1209e-01, -3.0067e-01,  ...,  1.7214e-01,
         -2.0962e-01,  1.7372e+04],
        [-3.1947e-01, -3.3845e-01, -2.3857e-01,  ...,  3.0203e-02,
         -2.5563e-01,  1.1955e+04],
        [-2.8183e-01, -2.2842e-01, -2.4400e-01,  ...,  2.3164e-01,
         -2.3515e-01,  1.7633e+04],
        ...,
        [-3.7046e-01, -2.9217e-01, -2.9158e-01,  ...,  1.1045e-01,
         -2.1492e-01,  1.0858e+04],
        [-2.9526e-01, -2.9588e-01, -2.4599e-01,  ...,  7.9702e-02,
         -2.6027e-01,  1.3971e+04],
        [-3.1141e-01, -2.3030e-01, -2.1493e-01,  ...,  2.0532e-01,
         -3.0523e-01,  1.6014e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1376],
        [ 0.1279],
        [ 0.1466],
        [ 0.1731],
        [ 0.2324],
        [ 0.1868],
        [ 0.1360],
        [ 0.1357],
        [-0.0281],
        [ 0.1533],
        [ 0.0554],
        [ 0.1071],
        [ 0.2454],
        [ 0.1026],
        [ 0.0558],
        [ 0.1582],
        [ 0.0722],
     

ep  9:  18%|█████████████▌                                                              | 5/28 [00:13<01:00,  2.64s/it]

tensor([[-3.3929e-01, -3.2639e-01, -3.1384e-01,  ...,  1.3083e-01,
         -2.0967e-01,  1.0464e+04],
        [-3.2460e-01, -2.3794e-01, -3.0125e-01,  ...,  1.4221e-01,
         -2.8319e-01,  1.2080e+04],
        [-3.2277e-01, -3.2995e-01, -2.4227e-01,  ...,  1.5503e-01,
         -2.3114e-01,  1.8080e+04],
        ...,
        [-4.1297e-01, -2.2116e-01, -2.5546e-01,  ...,  1.8096e-01,
         -2.0335e-01,  1.0397e+04],
        [-3.0724e-01, -2.8268e-01, -2.5239e-01,  ...,  1.3388e-01,
         -3.2005e-01,  1.7366e+04],
        [-4.2478e-01, -1.7772e-01, -5.0401e-01,  ...,  4.6006e-01,
         -1.0454e-01,  1.1766e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0466],
        [ 0.2282],
        [ 0.1473],
        [ 0.0845],
        [ 0.1083],
        [ 0.1181],
        [ 0.0641],
        [-0.0137],
        [ 0.0687],
        [ 0.1117],
        [ 0.1110],
        [ 0.2598],
        [ 0.2275],
        [ 0.0158],
        [ 0.2628],
        [ 0.1618],
        [ 0.1133],
     

ep  9:  21%|████████████████▎                                                           | 6/28 [00:15<00:58,  2.65s/it]

tensor([[-3.4125e-01, -2.3741e-01, -2.8924e-01,  ...,  1.0753e-01,
         -2.4061e-01,  1.2705e+04],
        [-3.7807e-01, -3.3162e-01, -2.0949e-01,  ...,  1.2557e-01,
         -1.9927e-01,  1.5024e+04],
        [-3.7273e-01, -2.2576e-01, -3.1687e-01,  ...,  2.5766e-01,
         -1.9006e-01,  1.7068e+04],
        ...,
        [-3.6415e-01, -2.9464e-01, -2.1714e-01,  ...,  1.1396e-01,
         -2.2341e-01,  1.0291e+04],
        [-3.0655e-01, -2.5246e-01, -2.5525e-01,  ...,  1.1972e-01,
         -2.6606e-01,  1.0771e+04],
        [-3.2797e-01, -3.1709e-01, -2.3700e-01,  ...,  1.9530e-01,
         -2.3785e-01,  1.3036e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0608],
        [ 0.1007],
        [ 0.2157],
        [ 0.1623],
        [ 0.0297],
        [ 0.2553],
        [ 0.2138],
        [-0.0406],
        [ 0.0070],
        [ 0.1620],
        [ 0.1231],
        [ 0.1648],
        [ 0.2777],
        [ 0.0043],
        [ 0.1338],
        [ 0.0523],
        [ 0.0439],
     

ep  9:  25%|███████████████████                                                         | 7/28 [00:18<00:55,  2.65s/it]

tensor([[-3.8960e-01, -3.4833e-01, -2.7972e-01,  ...,  1.1803e-01,
         -1.9698e-01,  1.4010e+04],
        [-3.6708e-01, -3.2372e-01, -2.3341e-01,  ...,  2.3283e-01,
         -2.7532e-01,  1.6470e+04],
        [-2.1182e-01, -1.6575e-01, -2.9628e-01,  ...,  2.0684e-01,
         -1.8657e-01,  1.7983e+04],
        ...,
        [-3.8619e-01, -2.9432e-01, -2.7822e-01,  ...,  2.1613e-01,
         -1.9002e-01,  1.5901e+04],
        [-3.8717e-01, -2.3963e-01, -2.8286e-01,  ...,  1.5296e-01,
         -2.4989e-01,  1.6180e+04],
        [-3.8531e-01, -3.0977e-01, -2.6267e-01,  ...,  1.3327e-01,
         -2.5768e-01,  1.3565e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1546],
        [ 0.1593],
        [ 0.2180],
        [ 0.2164],
        [ 0.1591],
        [ 0.2244],
        [ 0.1084],
        [ 0.1373],
        [ 0.1320],
        [ 0.1453],
        [-0.0428],
        [ 0.0890],
        [ 0.2033],
        [ 0.0788],
        [ 0.1961],
        [ 0.2530],
        [ 0.1669],
     

ep  9:  29%|█████████████████████▋                                                      | 8/28 [00:21<00:52,  2.65s/it]

tensor([[-3.3109e-01, -4.0199e-01, -2.9982e-01,  ...,  8.8311e-02,
         -2.4402e-01,  1.0785e+04],
        [-2.8029e-01, -2.7048e-01, -2.4660e-01,  ...,  1.9114e-01,
         -2.3730e-01,  1.0197e+04],
        [-3.6401e-01, -2.1846e-01, -2.4990e-01,  ...,  1.9481e-01,
         -2.9688e-01,  1.6561e+04],
        ...,
        [-2.8478e-01, -2.9509e-01, -2.7397e-01,  ...,  8.5816e-02,
         -2.5059e-01,  1.6699e+04],
        [-4.1025e-01, -2.4555e-01, -2.1465e-01,  ...,  1.6131e-01,
         -1.4455e-01,  1.5328e+04],
        [-4.1267e-01, -1.8431e-01, -2.4053e-01,  ...,  1.4198e-01,
         -2.5031e-01,  8.2999e+03]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.3292],
        [ 0.2020],
        [ 0.1086],
        [ 0.1334],
        [ 0.1571],
        [ 0.0972],
        [ 0.0373],
        [ 0.1644],
        [ 0.0673],
        [ 0.3248],
        [ 0.1530],
        [ 0.1773],
        [ 0.1868],
        [ 0.1871],
        [ 0.0931],
        [ 0.0383],
        [ 0.0640],
     

ep  9:  32%|████████████████████████▍                                                   | 9/28 [00:23<00:50,  2.64s/it]

tensor([[-3.9028e-01, -1.6770e-01, -3.0187e-01,  ...,  1.2186e-01,
         -2.6733e-01,  9.5093e+03],
        [-3.9211e-01, -2.1298e-01, -2.9768e-01,  ...,  1.9585e-01,
         -2.2380e-01,  1.1052e+04],
        [-3.7901e-01, -2.5520e-01, -3.4872e-01,  ...,  1.3156e-01,
         -2.6338e-01,  1.2781e+04],
        ...,
        [-3.8223e-01, -2.4435e-01, -2.7752e-01,  ...,  2.1455e-01,
         -2.7440e-01,  1.7326e+04],
        [-4.3401e-01, -2.0826e-01, -3.1283e-01,  ...,  2.0738e-01,
         -2.3788e-01,  1.2908e+04],
        [-3.5833e-01, -2.8241e-01, -2.5474e-01,  ...,  1.8088e-01,
         -2.7653e-01,  1.5802e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0845],
        [ 0.1888],
        [ 0.1267],
        [ 0.0342],
        [ 0.0830],
        [ 0.0346],
        [ 0.1172],
        [-0.0043],
        [ 0.0950],
        [ 0.2169],
        [ 0.1688],
        [-0.0863],
        [ 0.1220],
        [ 0.0379],
        [ 0.1580],
        [ 0.1638],
        [ 0.1670],
     

ep  9:  36%|██████████████████████████▊                                                | 10/28 [00:26<00:47,  2.65s/it]

tensor([[-3.9122e-01, -2.2630e-01, -3.3689e-01,  ...,  3.0330e-01,
         -1.7760e-01,  1.1523e+04],
        [-3.2189e-01, -2.6349e-01, -2.7989e-01,  ...,  1.2000e-01,
         -2.5506e-01,  1.6535e+04],
        [-3.8282e-01, -2.9445e-01, -2.9687e-01,  ...,  1.7893e-01,
         -2.3478e-01,  1.5258e+04],
        ...,
        [-4.1466e-01, -3.2458e-01, -2.7944e-01,  ...,  6.2101e-02,
         -2.6061e-01,  1.3312e+04],
        [-2.8631e-01, -2.6224e-01, -2.6302e-01,  ...,  1.1051e-01,
         -3.1048e-01,  8.4470e+03],
        [-3.5385e-01, -2.7218e-01, -2.6614e-01,  ...,  1.3347e-01,
         -3.0293e-01,  1.5401e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1245],
        [ 0.1540],
        [ 0.1217],
        [ 0.0261],
        [ 0.1562],
        [ 0.0667],
        [ 0.2053],
        [ 0.1472],
        [ 0.2197],
        [ 0.0074],
        [ 0.0706],
        [-0.0020],
        [ 0.1083],
        [-0.0026],
        [ 0.0933],
        [ 0.0532],
        [ 0.1426],
     

ep  9:  39%|█████████████████████████████▍                                             | 11/28 [00:29<00:44,  2.65s/it]

tensor([[-3.5632e-01, -2.3109e-01, -2.6569e-01,  ...,  1.0418e-01,
         -1.9828e-01,  1.1123e+04],
        [-3.8361e-01, -3.1339e-01, -2.8429e-01,  ...,  1.4958e-01,
         -1.7615e-01,  1.5666e+04],
        [-3.5815e-01, -2.8249e-01, -2.5363e-01,  ...,  1.4408e-01,
         -2.3218e-01,  1.2573e+04],
        ...,
        [-4.0192e-01, -2.4517e-01, -2.4261e-01,  ...,  1.1017e-01,
         -2.4420e-01,  1.5500e+04],
        [-3.8263e-01, -1.5271e-01, -3.8456e-01,  ...,  1.8018e-01,
         -1.7891e-01,  8.5047e+03],
        [-3.2809e-01, -2.6925e-01, -2.5427e-01,  ...,  7.8577e-02,
         -2.3456e-01,  1.2267e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1406],
        [ 0.1067],
        [ 0.2338],
        [ 0.0739],
        [ 0.0948],
        [ 0.1717],
        [ 0.0524],
        [ 0.2227],
        [ 0.1502],
        [ 0.1289],
        [ 0.1656],
        [ 0.1280],
        [ 0.1040],
        [ 0.0887],
        [ 0.1140],
        [ 0.1934],
        [ 0.0923],
     

ep  9:  43%|████████████████████████████████▏                                          | 12/28 [00:31<00:42,  2.65s/it]

tensor([[-4.4001e-01, -7.0377e-02, -5.8032e-01,  ...,  3.5792e-01,
         -6.6737e-02,  1.0654e+04],
        [-3.7219e-01, -2.7997e-01, -2.6461e-01,  ...,  1.2341e-01,
         -1.3931e-01,  1.6559e+04],
        [-2.7272e-01, -1.8165e-01, -2.5444e-01,  ...,  1.5176e-01,
         -3.0181e-01,  1.0968e+04],
        ...,
        [-2.6147e-01, -3.1422e-01, -2.5149e-01,  ...,  1.3051e-01,
         -2.2106e-01,  1.6201e+04],
        [-3.7468e-01, -2.9700e-01, -2.4095e-01,  ...,  2.0647e-01,
         -2.2961e-01,  1.7828e+04],
        [-3.6377e-01, -2.9825e-01, -2.8866e-01,  ...,  2.5904e-01,
         -1.9935e-01,  1.6401e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0141],
        [ 0.1861],
        [ 0.1246],
        [ 0.1531],
        [ 0.1940],
        [ 0.2280],
        [ 0.0287],
        [ 0.0471],
        [ 0.2262],
        [ 0.2121],
        [ 0.1276],
        [-0.0133],
        [ 0.1590],
        [ 0.0189],
        [ 0.1311],
        [-0.0017],
        [ 0.0576],
     

ep  9:  46%|██████████████████████████████████▊                                        | 13/28 [00:34<00:39,  2.64s/it]

tensor([[-3.3529e-01, -3.1260e-01, -2.9522e-01,  ...,  1.7853e-01,
         -1.8610e-01,  1.7420e+04],
        [-3.7626e-01, -3.4573e-01, -2.7808e-01,  ...,  7.9544e-02,
         -1.7599e-01,  9.7123e+03],
        [-2.3858e-01, -2.4301e-01, -2.3312e-01,  ...,  1.5578e-01,
         -2.4925e-01,  1.4450e+04],
        ...,
        [-3.5178e-01, -2.7942e-01, -3.0645e-01,  ...,  1.0903e-01,
         -2.2553e-01,  1.2789e+04],
        [-4.5572e-01, -2.1481e-01, -2.7238e-01,  ...,  1.6399e-01,
         -2.0592e-01,  1.2721e+04],
        [-3.9098e-01, -2.3460e-01, -3.6924e-01,  ...,  2.5732e-01,
         -2.0912e-01,  1.0151e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1231],
        [ 0.2007],
        [ 0.0746],
        [ 0.0456],
        [ 0.1440],
        [ 0.1733],
        [ 0.1541],
        [ 0.1716],
        [ 0.0744],
        [-0.0334],
        [ 0.0725],
        [ 0.0257],
        [ 0.1436],
        [ 0.0247],
        [ 0.2169],
        [ 0.0744],
        [ 0.1246],
     

ep  9:  50%|█████████████████████████████████████▌                                     | 14/28 [00:37<00:36,  2.64s/it]

tensor([[-2.7215e-01, -2.7996e-01, -2.0304e-01,  ...,  1.6132e-01,
         -2.3406e-01,  1.7966e+04],
        [-3.5802e-01, -2.8762e-01, -2.4838e-01,  ...,  1.4790e-01,
         -2.1459e-01,  1.2471e+04],
        [-4.0440e-01, -3.2320e-01, -2.3069e-01,  ...,  1.1013e-01,
         -3.0038e-01,  1.7006e+04],
        ...,
        [-4.3821e-01, -2.9287e-01, -3.6068e-01,  ...,  2.8552e-01,
         -1.8130e-01,  1.3038e+04],
        [-4.3659e-01, -1.1693e-01, -5.0763e-01,  ...,  3.4034e-01,
         -1.5983e-01,  1.3881e+04],
        [-4.4971e-01, -2.0917e-01, -3.5736e-01,  ...,  1.6181e-01,
         -2.2850e-01,  1.0173e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0686],
        [ 0.2006],
        [ 0.3146],
        [ 0.1575],
        [ 0.1639],
        [ 0.1149],
        [ 0.3198],
        [ 0.0739],
        [ 0.1347],
        [ 0.1591],
        [ 0.2313],
        [ 0.1957],
        [ 0.2350],
        [ 0.1288],
        [ 0.1930],
        [ 0.2466],
        [ 0.1499],
     

ep  9:  54%|████████████████████████████████████████▏                                  | 15/28 [00:39<00:34,  2.64s/it]

tensor([[-3.6249e-01, -2.9110e-01, -2.0651e-01,  ...,  1.1376e-01,
         -1.9613e-01,  1.2449e+04],
        [-3.4445e-01, -1.2365e-01, -3.7383e-01,  ...,  2.9008e-01,
         -1.9202e-01,  1.6995e+04],
        [-3.5147e-01, -3.0185e-01, -3.0369e-01,  ...,  2.1647e-01,
         -1.7794e-01,  1.8039e+04],
        ...,
        [-3.5298e-01, -1.6399e-01, -2.4613e-01,  ...,  2.3685e-01,
         -1.5084e-01,  1.6346e+04],
        [-3.4904e-01, -2.3148e-01, -3.4298e-01,  ...,  1.3009e-01,
         -1.6722e-01,  9.8711e+03],
        [-3.3034e-01, -2.6911e-01, -2.6549e-01,  ...,  1.2024e-01,
         -2.0961e-01,  1.5215e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1498],
        [ 0.1613],
        [ 0.2227],
        [ 0.2085],
        [ 0.2251],
        [-0.0068],
        [ 0.0783],
        [ 0.2366],
        [ 0.1243],
        [ 0.0946],
        [ 0.2650],
        [ 0.1376],
        [ 0.2924],
        [ 0.2024],
        [ 0.1390],
        [-0.0295],
        [ 0.0226],
     

ep  9:  57%|██████████████████████████████████████████▊                                | 16/28 [00:42<00:31,  2.64s/it]

tensor([[-3.3225e-01, -2.9320e-01, -3.9016e-01,  ...,  2.0294e-01,
         -1.7647e-01,  9.7869e+03],
        [-3.7741e-01, -3.1422e-01, -2.7449e-01,  ...,  1.1584e-01,
         -2.0631e-01,  1.3139e+04],
        [-3.5629e-01, -2.5100e-01, -2.8360e-01,  ...,  1.2610e-01,
         -2.3643e-01,  1.6117e+04],
        ...,
        [-3.6440e-01, -2.5059e-01, -2.6056e-01,  ...,  1.7672e-01,
         -2.5406e-01,  1.7731e+04],
        [-3.8343e-01, -1.9531e-01, -3.0533e-01,  ...,  1.8378e-01,
         -3.2077e-01,  8.4972e+03],
        [-3.9499e-01, -2.0656e-01, -2.4869e-01,  ...,  7.2096e-02,
         -2.2906e-01,  1.1781e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1055],
        [ 0.1294],
        [ 0.1171],
        [ 0.1356],
        [ 0.0700],
        [ 0.1973],
        [ 0.0655],
        [ 0.0953],
        [ 0.2092],
        [ 0.1508],
        [ 0.0486],
        [ 0.3341],
        [ 0.0592],
        [ 0.1785],
        [ 0.0488],
        [ 0.0811],
        [ 0.1061],
     

ep  9:  61%|█████████████████████████████████████████████▌                             | 17/28 [00:44<00:29,  2.64s/it]

tensor([[-3.8586e-01, -2.7884e-01, -3.2816e-01,  ...,  2.5057e-01,
         -2.1443e-01,  1.0451e+04],
        [-3.1510e-01, -2.6024e-01, -2.6087e-01,  ...,  2.2011e-01,
         -2.1986e-01,  1.5440e+04],
        [-3.0730e-01, -2.5806e-01, -3.0354e-01,  ...,  8.1933e-02,
         -2.2969e-01,  1.5962e+04],
        ...,
        [-3.5827e-01, -2.2541e-01, -2.9891e-01,  ...,  9.3634e-03,
         -2.6574e-01,  1.2632e+04],
        [-3.4090e-01, -3.0876e-01, -2.6475e-01,  ...,  1.1471e-01,
         -1.9358e-01,  1.7540e+04],
        [-3.7803e-01, -3.2387e-01, -2.8597e-01,  ...,  1.8241e-01,
         -1.8146e-01,  1.5762e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1520],
        [ 0.1532],
        [ 0.1819],
        [ 0.2373],
        [ 0.3271],
        [ 0.0634],
        [ 0.1481],
        [ 0.0650],
        [ 0.0335],
        [ 0.1240],
        [ 0.2554],
        [ 0.2284],
        [ 0.1173],
        [ 0.1301],
        [ 0.0861],
        [ 0.2232],
        [ 0.1332],
     

ep  9:  64%|████████████████████████████████████████████████▏                          | 18/28 [00:47<00:26,  2.64s/it]

tensor([[-3.5203e-01, -2.4756e-01, -2.3417e-01,  ...,  2.3347e-01,
         -2.3446e-01,  1.5794e+04],
        [-3.9478e-01, -2.7988e-01, -2.2436e-01,  ...,  8.2911e-02,
         -2.5135e-01,  1.1407e+04],
        [-2.9353e-01, -1.8997e-01, -2.4092e-01,  ...,  1.0810e-01,
         -2.4201e-01,  1.2233e+04],
        ...,
        [-2.9556e-01, -3.2445e-01, -2.5020e-01,  ...,  1.6173e-01,
         -2.6765e-01,  1.1477e+04],
        [-3.6403e-01, -1.0659e-01, -3.7077e-01,  ...,  2.3405e-01,
         -2.0374e-01,  1.0198e+04],
        [-3.9801e-01,  4.0682e-02, -6.5775e-01,  ...,  5.3318e-01,
         -1.2505e-01,  1.1134e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0997],
        [ 0.2038],
        [ 0.1338],
        [-0.0883],
        [ 0.0906],
        [ 0.0327],
        [ 0.0450],
        [ 0.0936],
        [ 0.0901],
        [ 0.0314],
        [ 0.1859],
        [ 0.0091],
        [ 0.0980],
        [ 0.1914],
        [ 0.1811],
        [ 0.1623],
        [ 0.2123],
     

ep  9:  68%|██████████████████████████████████████████████████▉                        | 19/28 [00:50<00:23,  2.63s/it]

tensor([[-3.8693e-01, -1.5302e-01, -3.5033e-01,  ...,  2.5264e-01,
         -2.6726e-01,  1.0680e+04],
        [-3.8279e-01, -2.2418e-01, -2.7767e-01,  ...,  5.8433e-02,
         -2.2990e-01,  1.2587e+04],
        [-3.3561e-01, -3.0287e-01, -2.0940e-01,  ...,  4.5192e-02,
         -2.4661e-01,  1.3323e+04],
        ...,
        [-3.8912e-01, -2.7928e-01, -2.9969e-01,  ...,  2.0097e-01,
         -9.1065e-02,  1.7361e+04],
        [-3.6834e-01, -2.3179e-01, -2.7412e-01,  ...,  1.2985e-01,
         -2.6961e-01,  1.2654e+04],
        [-2.9979e-01,  5.4374e-02, -6.5402e-01,  ...,  5.3085e-01,
         -5.0550e-02,  1.0374e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1031],
        [ 0.1263],
        [ 0.0851],
        [ 0.0769],
        [ 0.1268],
        [ 0.1445],
        [ 0.1702],
        [ 0.0788],
        [ 0.1459],
        [ 0.1612],
        [ 0.1671],
        [ 0.1866],
        [ 0.2120],
        [ 0.0578],
        [ 0.2043],
        [ 0.1439],
        [ 0.1250],
     

ep  9:  71%|█████████████████████████████████████████████████████▌                     | 20/28 [00:52<00:21,  2.64s/it]

tensor([[-3.5656e-01, -2.9509e-01, -3.3995e-01,  ...,  2.6484e-01,
         -1.1858e-01,  1.2724e+04],
        [-2.9133e-01, -2.7430e-01, -3.1104e-01,  ...,  2.0555e-01,
         -2.2196e-01,  1.6543e+04],
        [-4.9020e-01, -1.0666e-01, -3.3240e-01,  ...,  3.0624e-01,
         -2.0592e-01,  1.7490e+04],
        ...,
        [-3.6665e-01, -1.9486e-01, -2.3571e-01,  ...,  4.3970e-02,
         -2.4500e-01,  1.2657e+04],
        [-3.4706e-01, -2.2168e-01, -2.5283e-01,  ...,  2.4839e-01,
         -2.5068e-01,  1.6167e+04],
        [-4.0474e-01, -2.0529e-01, -2.9639e-01,  ...,  3.2189e-01,
         -2.3843e-01,  1.5276e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0454],
        [ 0.2744],
        [ 0.0959],
        [ 0.1868],
        [ 0.2784],
        [ 0.0457],
        [ 0.1400],
        [ 0.1823],
        [-0.0066],
        [ 0.0485],
        [ 0.1838],
        [ 0.3326],
        [ 0.1742],
        [ 0.1338],
        [ 0.1582],
        [ 0.1682],
        [ 0.1612],
     

ep  9:  75%|████████████████████████████████████████████████████████▎                  | 21/28 [00:55<00:18,  2.64s/it]

tensor([[-3.1566e-01, -2.9976e-01, -2.8828e-01,  ...,  1.7149e-01,
         -2.2327e-01,  1.1204e+04],
        [-4.1406e-01, -3.2810e-01, -2.4289e-01,  ...,  1.9869e-01,
         -2.0560e-01,  1.2571e+04],
        [-3.8342e-01, -3.3156e-01, -3.4470e-01,  ...,  2.2620e-01,
         -2.5638e-01,  1.8252e+04],
        ...,
        [-4.1488e-01, -2.9495e-01, -3.6600e-01,  ...,  1.9946e-01,
         -2.1680e-01,  1.0332e+04],
        [-3.9960e-01, -2.9212e-01, -3.5465e-01,  ...,  1.0665e-01,
         -2.0778e-01,  1.7056e+04],
        [-3.1586e-01, -3.0913e-01, -1.9099e-01,  ...,  1.7557e-01,
         -2.0668e-01,  1.7776e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1536],
        [ 0.0543],
        [ 0.2618],
        [ 0.2416],
        [ 0.2182],
        [ 0.1588],
        [ 0.1464],
        [ 0.1274],
        [ 0.1786],
        [ 0.0065],
        [ 0.2134],
        [ 0.1530],
        [-0.1219],
        [ 0.0984],
        [ 0.0762],
        [ 0.1507],
        [ 0.1756],
     

ep  9:  79%|██████████████████████████████████████████████████████████▉                | 22/28 [00:58<00:15,  2.64s/it]

tensor([[-2.9683e-01, -1.9551e-01, -3.4187e-01,  ...,  1.2456e-01,
         -2.8549e-01,  1.0406e+04],
        [-3.7872e-01, -2.5139e-01, -2.5074e-01,  ...,  4.6811e-02,
         -2.6901e-01,  1.3351e+04],
        [-3.8542e-01, -2.2897e-01, -3.7290e-01,  ...,  2.7450e-01,
         -1.8366e-01,  1.0345e+04],
        ...,
        [-4.2054e-01, -1.2266e-01, -5.2044e-01,  ...,  3.8560e-01,
         -1.9564e-01,  9.8680e+03],
        [-3.8586e-01, -2.5187e-01, -1.5689e-01,  ...,  4.2151e-02,
         -3.1371e-01,  1.5755e+04],
        [-3.5178e-01, -3.0722e-01, -2.5514e-01,  ...,  1.2126e-01,
         -2.1136e-01,  1.2605e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1145],
        [ 0.1271],
        [ 0.1310],
        [ 0.2575],
        [ 0.2050],
        [ 0.1809],
        [ 0.1135],
        [ 0.2648],
        [ 0.2310],
        [ 0.1410],
        [ 0.1455],
        [ 0.0388],
        [ 0.1371],
        [ 0.0957],
        [-0.0561],
        [ 0.0795],
        [ 0.1500],
     

ep  9:  82%|█████████████████████████████████████████████████████████████▌             | 23/28 [01:00<00:13,  2.64s/it]

tensor([[-3.1318e-01, -2.2005e-01, -3.1124e-01,  ...,  2.4468e-01,
         -2.7365e-01,  1.1993e+04],
        [-4.2881e-01, -2.5989e-01, -2.0768e-01,  ...,  6.7363e-02,
         -3.6985e-01,  1.5544e+04],
        [-4.1133e-01, -2.7734e-01, -3.6478e-01,  ...,  1.0248e-01,
         -2.1866e-01,  1.0416e+04],
        ...,
        [-3.6387e-01, -3.1218e-01, -3.5990e-01,  ...,  1.9568e-01,
         -1.9268e-01,  1.5844e+04],
        [-4.2565e-01, -2.4287e-01, -4.7643e-01,  ...,  2.3749e-01,
         -1.4830e-01,  1.1139e+04],
        [-3.0575e-01, -3.1580e-01, -2.9379e-01,  ...,  1.1958e-01,
         -2.7292e-01,  1.4421e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1651],
        [ 0.2241],
        [ 0.0758],
        [ 0.3044],
        [ 0.0896],
        [ 0.1514],
        [ 0.0336],
        [ 0.1887],
        [ 0.0103],
        [ 0.0761],
        [ 0.0779],
        [ 0.0429],
        [ 0.0952],
        [ 0.2213],
        [ 0.1468],
        [ 0.0237],
        [ 0.0708],
     

ep  9:  86%|████████████████████████████████████████████████████████████████▎          | 24/28 [01:03<00:10,  2.64s/it]

tensor([[-1.5473e-01, -1.8530e-01, -2.5116e-01,  ...,  6.1066e-02,
         -2.5187e-01,  9.2415e+03],
        [-2.8024e-01, -2.3913e-01, -1.9905e-01,  ...,  8.6798e-02,
         -2.1740e-01,  1.7496e+04],
        [-3.7814e-01, -3.3177e-01, -2.6905e-01,  ...,  4.7437e-02,
         -2.8116e-01,  1.6168e+04],
        ...,
        [-2.7420e-01, -1.8211e-01, -2.8865e-01,  ...,  2.1371e-02,
         -3.1516e-01,  1.2226e+04],
        [-3.1120e-01, -1.6614e-01, -3.7460e-01,  ...,  2.9546e-01,
         -1.9251e-01,  1.8286e+04],
        [-3.4854e-01, -2.6576e-01, -3.1358e-01,  ...,  8.4111e-02,
         -2.0287e-01,  1.0608e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0466],
        [ 0.0719],
        [ 0.2091],
        [ 0.0397],
        [ 0.2004],
        [ 0.1204],
        [ 0.1152],
        [ 0.1077],
        [ 0.1771],
        [ 0.2606],
        [ 0.0826],
        [ 0.1818],
        [ 0.1488],
        [ 0.1431],
        [ 0.2456],
        [ 0.1476],
        [ 0.1776],
     

ep  9:  89%|██████████████████████████████████████████████████████████████████▉        | 25/28 [01:06<00:07,  2.64s/it]

tensor([[-2.9910e-01, -3.3945e-01, -2.4857e-01,  ...,  7.9395e-02,
         -3.0249e-01,  1.4946e+04],
        [-4.3634e-01, -1.4242e-01, -4.4456e-01,  ...,  2.2481e-01,
         -2.4052e-01,  1.3928e+04],
        [-3.4271e-01, -2.0203e-01, -2.9406e-01,  ...,  1.9426e-01,
         -1.4554e-01,  1.0191e+04],
        ...,
        [-3.5713e-01, -2.7528e-01, -2.1745e-01,  ...,  1.5995e-01,
         -2.2625e-01,  1.6417e+04],
        [-2.4297e-01, -2.8506e-01, -2.5773e-01,  ...,  1.7914e-01,
         -2.9691e-01,  1.7615e+04],
        [-3.5354e-01, -2.6654e-01, -2.9147e-01,  ...,  2.7048e-01,
         -1.9014e-01,  1.6280e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0743],
        [ 0.0427],
        [ 0.1602],
        [ 0.1929],
        [ 0.2023],
        [ 0.2714],
        [ 0.2027],
        [ 0.0192],
        [ 0.0969],
        [ 0.1443],
        [ 0.1053],
        [ 0.0326],
        [ 0.1669],
        [-0.0239],
        [ 0.1608],
        [ 0.1599],
        [ 0.0280],
     

ep  9:  93%|█████████████████████████████████████████████████████████████████████▋     | 26/28 [01:08<00:05,  2.65s/it]

tensor([[-3.9857e-01, -2.6936e-01, -2.4885e-01,  ...,  9.0627e-02,
         -2.1842e-01,  8.1465e+03],
        [-3.2513e-01, -2.1235e-01, -2.2293e-01,  ...,  1.2823e-01,
         -2.2946e-01,  1.6437e+04],
        [-2.9122e-01, -1.6229e-01, -3.2924e-01,  ...,  2.3002e-01,
         -1.8098e-01,  1.1359e+04],
        ...,
        [-4.8538e-01, -2.2028e-01, -4.8658e-01,  ...,  2.8853e-01,
         -1.6132e-01,  1.2108e+04],
        [-3.6370e-01, -2.9032e-01, -2.3027e-01,  ...,  1.1408e-01,
         -1.9507e-01,  8.7631e+03],
        [-4.3289e-01, -2.6735e-01, -3.8398e-01,  ...,  1.7091e-01,
         -2.1533e-01,  1.2229e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[0.1959],
        [0.2145],
        [0.0779],
        [0.1558],
        [0.2337],
        [0.2378],
        [0.2225],
        [0.1848],
        [0.1635],
        [0.1412],
        [0.0388],
        [0.0586],
        [0.2785],
        [0.1918],
        [0.1662],
        [0.0625],
        [0.0660],
        [0.1047],
    

ep  9:  96%|████████████████████████████████████████████████████████████████████████▎  | 27/28 [01:11<00:02,  2.65s/it]

tensor([[-2.9943e-01, -2.3502e-01, -2.3397e-01,  ...,  1.1117e-01,
         -2.8190e-01,  1.0444e+04],
        [-3.3664e-01, -1.9020e-01, -4.3378e-01,  ...,  3.9663e-01,
         -2.4718e-01,  9.8202e+03],
        [-3.3856e-01, -3.5045e-01, -2.0582e-01,  ...,  1.5379e-01,
         -2.1888e-01,  1.6445e+04],
        ...,
        [-3.3636e-01, -2.4060e-01, -2.2590e-01,  ...,  1.9761e-01,
         -1.9552e-01,  1.7908e+04],
        [-3.9684e-01, -2.4521e-01, -3.0563e-01,  ...,  1.2455e-01,
         -2.4932e-01,  9.7256e+03],
        [-3.9710e-01, -3.2317e-01, -3.2503e-01,  ...,  1.2561e-01,
         -2.6032e-01,  1.6899e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 1.7153e-01],
        [ 1.3811e-01],
        [ 9.3948e-02],
        [ 3.3819e-03],
        [ 1.0716e-01],
        [ 1.2378e-01],
        [ 1.6876e-01],
        [ 9.7547e-02],
        [-2.1330e-02],
        [ 1.1736e-01],
        [ 1.6714e-01],
        [ 2.3797e-01],
        [ 9.5097e-02],
        [ 7.7098e-02],
      

ep  9: 100%|███████████████████████████████████████████████████████████████████████████| 28/28 [01:13<00:00,  2.64s/it]
valid:   0%|                                                                                     | 0/4 [00:00<?, ?it/s]

tensor([[-3.4811e-01, -3.0575e-01, -2.2737e-01,  ...,  1.4580e-01,
         -2.6121e-01,  1.6480e+04],
        [-4.7082e-01, -2.6833e-01, -2.9708e-01,  ...,  1.2594e-01,
         -1.8253e-01,  9.6865e+03],
        [-3.8984e-01, -3.2699e-01, -2.4948e-01,  ...,  1.9918e-01,
         -1.8639e-01,  1.7107e+04],
        ...,
        [-3.5437e-01, -3.2202e-01, -2.3073e-01,  ...,  2.1523e-01,
         -2.4130e-01,  1.2391e+04],
        [-4.3415e-01, -2.3583e-01, -3.5289e-01,  ...,  1.6644e-01,
         -2.5910e-01,  1.4964e+04],
        [-4.2466e-01, -2.2249e-01, -1.8340e-01,  ...,  1.1842e-01,
         -1.6583e-01,  1.7801e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1589],
        [-0.0019],
        [ 0.0769],
        [ 0.0733],
        [ 0.0853],
        [ 0.1402],
        [ 0.0789],
        [ 0.0846],
        [ 0.0939],
        [ 0.1708],
        [ 0.0959],
        [ 0.1827],
        [ 0.1232],
        [ 0.1852],
        [ 0.0435],
        [-0.0382],
        [ 0.0813],
     

valid:  25%|███████████████████▎                                                         | 1/4 [00:02<00:07,  2.65s/it]

tensor([[-3.2233e-01, -2.8638e-01, -2.5438e-01,  ...,  1.1127e-01,
         -2.4928e-01,  8.2773e+03],
        [-3.6531e-01, -1.8541e-01, -3.2476e-01,  ...,  9.8329e-02,
         -2.3989e-01,  8.2921e+03],
        [-3.7271e-01, -3.1051e-01, -2.6006e-01,  ...,  1.0789e-01,
         -1.8490e-01,  8.4220e+03],
        ...,
        [-3.7780e-01, -2.1708e-01, -3.6814e-01,  ...,  2.7568e-01,
         -1.1368e-01,  7.2709e+03],
        [-3.9939e-01, -2.8329e-01, -2.7683e-01,  ...,  1.3335e-01,
         -2.4340e-01,  7.3509e+03],
        [-3.1813e-01, -1.7252e-01, -3.3864e-01,  ...,  1.8896e-01,
         -2.7303e-01,  7.1148e+03]], device='cuda:0')
tensor([[0.1264],
        [0.1353],
        [0.1365],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [

valid:  50%|██████████████████████████████████████▌                                      | 2/4 [00:05<00:05,  2.65s/it]

tensor([[-2.6459e-01, -1.9816e-01, -2.3166e-01,  ...,  4.3366e-02,
         -2.4275e-01,  7.3657e+03],
        [-4.4351e-01, -1.6997e-01, -2.4599e-01,  ...,  5.0830e-02,
         -2.2045e-01,  7.4660e+03],
        [-3.6358e-01, -2.7964e-01, -2.1717e-01,  ...,  1.4754e-01,
         -2.0238e-01,  7.5556e+03],
        ...,
        [-3.0289e-01, -2.8608e-01, -3.0153e-01,  ...,  9.7022e-02,
         -2.0641e-01,  7.5523e+03],
        [-3.2801e-01, -2.4983e-01, -2.3538e-01,  ...,  1.0617e-01,
         -3.3416e-01,  7.9973e+03],
        [-3.5315e-01, -2.7097e-01, -2.6696e-01,  ...,  1.3142e-01,
         -2.4509e-01,  8.4248e+03]], device='cuda:0')
tensor([[0.1264],
        [0.1353],
        [0.1365],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [

valid:  75%|█████████████████████████████████████████████████████████▊                   | 3/4 [00:07<00:02,  2.64s/it]

tensor([[-4.3092e-01, -2.7025e-01, -3.0428e-01,  ...,  1.7351e-01,
         -2.1021e-01,  8.2736e+03],
        [-3.9687e-01, -1.6447e-01, -3.1085e-01,  ...,  2.0987e-01,
         -1.9773e-01,  8.4973e+03],
        [-4.0648e-01, -2.3350e-01, -3.5499e-01,  ...,  1.4228e-01,
         -2.3323e-01,  8.8352e+03],
        ...,
        [-2.5792e-01, -2.7998e-01, -2.9789e-01,  ...,  6.9041e-02,
         -1.5714e-01,  1.1430e+04],
        [-3.6105e-01, -3.1575e-01, -2.2100e-01,  ...,  1.0072e-01,
         -2.3743e-01,  1.1417e+04],
        [-3.4809e-01, -2.3759e-01, -2.0738e-01,  ...,  1.4380e-01,
         -2.2184e-01,  1.1349e+04]], device='cuda:0')
tensor([[0.1264],
        [0.1353],
        [0.1365],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367],
        [

valid: 100%|█████████████████████████████████████████████████████████████████████████████| 4/4 [00:08<00:00,  2.05s/it]
ep 10:   0%|                                                                                    | 0/28 [00:00<?, ?it/s]

tensor([[-3.0299e-01, -2.0548e-01, -3.7163e-01,  ...,  2.2204e-01,
         -2.4157e-01,  1.1479e+04],
        [-5.2351e-01, -1.3666e-01, -4.0322e-01,  ...,  1.6678e-01,
         -2.3607e-01,  1.1660e+04],
        [-2.8602e-01, -2.1112e-01, -2.7816e-01,  ...,  2.0929e-01,
         -2.4698e-01,  1.1616e+04],
        ...,
        [-4.1360e-01, -3.0982e-01, -2.5829e-01,  ...,  8.7611e-02,
         -2.1257e-01,  1.1642e+04],
        [-4.2525e-01, -2.9543e-01, -3.0947e-01,  ...,  7.3900e-02,
         -2.2249e-01,  1.1782e+04],
        [-2.6573e-01, -2.9025e-01, -3.4377e-01,  ...,  1.3080e-01,
         -1.8334e-01,  1.1734e+04]], device='cuda:0')
tensor([[0.1264],
        [0.1353],
        [0.1365],
        [0.1367],
        [0.1367],
        [0.1367],
        [0.1367]], device='cuda:0')
valid acc 0.5376884422110553


ep 10:   4%|██▋                                                                         | 1/28 [00:02<01:11,  2.64s/it]

tensor([[-4.1961e-01, -2.3234e-01, -2.0888e-01,  ...,  8.3719e-02,
         -2.2842e-01,  1.0402e+04],
        [-3.0479e-01, -2.6535e-01, -2.3657e-01,  ...,  1.0737e-01,
         -2.2892e-01,  1.4910e+04],
        [-3.6499e-01, -1.7875e-01, -2.1814e-01,  ...,  5.1647e-02,
         -2.2522e-01,  9.1716e+03],
        ...,
        [-3.9499e-01, -2.0656e-01, -2.4869e-01,  ...,  7.2096e-02,
         -2.2906e-01,  1.1781e+04],
        [-3.2681e-01, -2.8299e-01, -2.4543e-01,  ...,  1.5832e-01,
         -2.1534e-01,  1.6790e+04],
        [-3.2460e-01, -2.3794e-01, -3.0125e-01,  ...,  1.4221e-01,
         -2.8319e-01,  1.2080e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1345],
        [ 0.1222],
        [ 0.1154],
        [ 0.2366],
        [ 0.1417],
        [ 0.0826],
        [ 0.2378],
        [ 0.1537],
        [ 0.1849],
        [-0.0012],
        [ 0.1086],
        [ 0.1718],
        [ 0.2502],
        [ 0.2719],
        [ 0.0609],
        [ 0.0664],
        [ 0.1799],
     

ep 10:   7%|█████▍                                                                      | 2/28 [00:05<01:09,  2.66s/it]

tensor([[-3.0793e-01, -2.1403e-01, -3.5144e-01,  ...,  1.2687e-01,
         -1.7847e-01,  1.8128e+04],
        [-5.0595e-01, -2.3932e-01, -3.3185e-01,  ...,  2.2144e-01,
         -2.1409e-01,  1.3118e+04],
        [-5.1620e-01, -2.9768e-01, -3.2489e-01,  ...,  2.0508e-01,
         -1.9727e-01,  1.0699e+04],
        ...,
        [-3.5242e-01,  1.0940e-02, -5.3704e-01,  ...,  4.3950e-01,
         -2.2586e-01,  1.7640e+04],
        [-3.2148e-01, -2.9720e-01, -2.7438e-01,  ...,  1.4778e-01,
         -2.5255e-01,  1.3093e+04],
        [-3.4500e-01, -1.9417e-01, -2.4647e-01,  ...,  2.0517e-01,
         -2.2131e-01,  1.6379e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.2085],
        [-0.0223],
        [ 0.1762],
        [ 0.1262],
        [-0.0482],
        [ 0.1412],
        [ 0.2622],
        [ 0.0104],
        [ 0.0988],
        [-0.0983],
        [ 0.1300],
        [ 0.1536],
        [ 0.1209],
        [ 0.2125],
        [ 0.1374],
        [ 0.1566],
        [ 0.1151],
     

ep 10:  11%|████████▏                                                                   | 3/28 [00:08<01:06,  2.67s/it]

tensor([[-3.5866e-01, -3.2195e-01, -2.4707e-01,  ...,  2.3799e-01,
         -2.3182e-01,  1.0407e+04],
        [-3.0704e-01, -3.1550e-02, -6.0952e-01,  ...,  4.6285e-01,
         -3.0047e-02,  1.2258e+04],
        [-3.7887e-01, -2.3717e-01, -2.8090e-01,  ...,  1.4309e-01,
         -2.5729e-01,  1.8203e+04],
        ...,
        [-2.6010e-01, -1.4958e-01, -3.7532e-01,  ...,  3.6609e-01,
         -1.7422e-01,  1.7624e+04],
        [-2.9669e-01, -2.4244e-01, -2.8864e-01,  ...,  1.9634e-01,
         -2.6330e-01,  1.8106e+04],
        [-3.3456e-01, -2.6161e-01, -3.0208e-01,  ...,  1.1735e-01,
         -2.5364e-01,  1.6295e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.2373],
        [ 0.0491],
        [ 0.1125],
        [ 0.1067],
        [ 0.1014],
        [ 0.2169],
        [ 0.1949],
        [ 0.0999],
        [ 0.1879],
        [ 0.1083],
        [ 0.1366],
        [ 0.1747],
        [ 0.0738],
        [ 0.0526],
        [ 0.1552],
        [ 0.2950],
        [ 0.1318],
     

ep 10:  14%|██████████▊                                                                 | 4/28 [00:10<01:03,  2.66s/it]

tensor([[-4.0336e-01, -2.5930e-01, -2.8184e-01,  ...,  2.0432e-01,
         -1.8244e-01,  1.1775e+04],
        [-3.6388e-01, -3.0117e-01, -3.1616e-01,  ...,  9.5336e-02,
         -2.4665e-01,  1.0321e+04],
        [-2.8029e-01, -2.7048e-01, -2.4660e-01,  ...,  1.9114e-01,
         -2.3730e-01,  1.0197e+04],
        ...,
        [-4.1505e-01, -2.7436e-01, -3.2870e-01,  ...,  7.2937e-02,
         -1.8180e-01,  1.3471e+04],
        [-3.9101e-01, -2.6144e-01, -2.7765e-01,  ...,  1.5017e-01,
         -2.7654e-01,  1.7790e+04],
        [-3.8342e-01, -3.3156e-01, -3.4470e-01,  ...,  2.2620e-01,
         -2.5638e-01,  1.8252e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1265],
        [ 0.2020],
        [ 0.1396],
        [ 0.0614],
        [ 0.0807],
        [ 0.1696],
        [ 0.1348],
        [ 0.1033],
        [ 0.1828],
        [ 0.1855],
        [ 0.1215],
        [ 0.1230],
        [ 0.1430],
        [ 0.2395],
        [ 0.1082],
        [ 0.0341],
        [ 0.1065],
     

ep 10:  18%|█████████████▌                                                              | 5/28 [00:13<01:01,  2.66s/it]

tensor([[-3.4367e-01, -2.9089e-01, -2.5346e-01,  ...,  1.8456e-01,
         -1.8029e-01,  1.7492e+04],
        [-3.3392e-01, -2.6067e-01, -2.1885e-01,  ...,  5.4176e-02,
         -3.0223e-01,  1.1020e+04],
        [-4.4494e-01, -3.0656e-01, -2.2652e-01,  ...,  1.8660e-01,
         -1.9929e-01,  1.8038e+04],
        ...,
        [-3.8848e-01, -2.7490e-01, -2.5968e-01,  ...,  1.5026e-01,
         -2.4890e-01,  1.2461e+04],
        [-4.0887e-01, -2.8313e-01, -2.3913e-01,  ...,  1.3526e-01,
         -1.3380e-01,  1.7122e+04],
        [-2.2953e-01, -2.9267e-01, -2.4731e-01,  ...,  4.6101e-02,
         -2.8078e-01,  1.2735e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.2315],
        [ 0.1447],
        [ 0.0993],
        [ 0.1287],
        [ 0.1305],
        [ 0.0837],
        [ 0.1593],
        [-0.0748],
        [ 0.1190],
        [ 0.1370],
        [ 0.0390],
        [ 0.1724],
        [ 0.1583],
        [ 0.1869],
        [ 0.1701],
        [ 0.0782],
        [ 0.2149],
     

ep 10:  21%|████████████████▎                                                           | 6/28 [00:15<00:58,  2.65s/it]

tensor([[-2.2419e-01, -1.4146e-01, -3.0499e-01,  ...,  5.6024e-02,
         -2.5358e-01,  1.0783e+04],
        [-2.5360e-01, -2.3709e-01, -3.4516e-01,  ...,  1.0822e-01,
         -2.1340e-01,  1.1962e+04],
        [-4.1614e-01, -2.2166e-01, -2.4655e-01,  ...,  8.5096e-02,
         -2.6832e-01,  1.0620e+04],
        ...,
        [-3.2476e-01, -2.5880e-01, -3.1514e-01,  ...,  1.5303e-01,
         -2.4269e-01,  1.6264e+04],
        [-3.7901e-01, -2.5520e-01, -3.4872e-01,  ...,  1.3156e-01,
         -2.6338e-01,  1.2781e+04],
        [-3.8401e-01, -1.7940e-01, -3.9021e-01,  ...,  3.3458e-01,
         -1.3967e-01,  1.7720e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1295],
        [ 0.0835],
        [ 0.2341],
        [ 0.0990],
        [ 0.2736],
        [ 0.0826],
        [ 0.1243],
        [ 0.1640],
        [ 0.1824],
        [ 0.2264],
        [ 0.3165],
        [ 0.0834],
        [ 0.1504],
        [ 0.1266],
        [ 0.1138],
        [ 0.1065],
        [ 0.1303],
     

ep 10:  25%|███████████████████                                                         | 7/28 [00:18<00:55,  2.65s/it]

tensor([[-3.6336e-01, -3.0126e-01, -2.9737e-01,  ...,  1.0575e-01,
         -2.4669e-01,  1.5327e+04],
        [-3.7202e-01, -2.7327e-01, -2.6090e-01,  ...,  1.5885e-01,
         -2.3751e-01,  1.4456e+04],
        [-3.4126e-01, -2.9451e-01, -2.0882e-01,  ...,  8.6014e-02,
         -2.1372e-01,  1.5318e+04],
        ...,
        [-3.0067e-01, -2.1020e-01, -1.9355e-01,  ...,  1.6668e-01,
         -2.6090e-01,  1.7007e+04],
        [-3.9478e-01, -2.7988e-01, -2.2436e-01,  ...,  8.2911e-02,
         -2.5135e-01,  1.1407e+04],
        [-3.9070e-01, -2.6128e-01, -2.9004e-01,  ...,  1.0558e-01,
         -2.6413e-01,  1.0359e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1077],
        [ 0.1823],
        [ 0.2365],
        [ 0.1794],
        [ 0.0368],
        [ 0.2192],
        [ 0.3590],
        [ 0.1458],
        [ 0.0682],
        [ 0.2222],
        [ 0.1149],
        [ 0.0419],
        [ 0.2569],
        [ 0.0883],
        [ 0.0925],
        [ 0.1778],
        [ 0.2781],
     

ep 10:  29%|█████████████████████▋                                                      | 8/28 [00:21<00:53,  2.65s/it]

tensor([[-3.7705e-01, -2.8034e-01, -3.4579e-01,  ...,  2.1953e-01,
         -2.4024e-01,  9.2792e+03],
        [-3.6157e-01, -3.0798e-01, -2.6040e-01,  ...,  1.4971e-01,
         -2.4728e-01,  1.7950e+04],
        [-2.4721e-01, -2.6725e-01, -1.4778e-01,  ...,  1.6771e-01,
         -3.1246e-01,  1.6258e+04],
        ...,
        [-3.9811e-01, -2.7188e-01, -2.5854e-01,  ...,  9.6671e-02,
         -2.0376e-01,  1.7924e+04],
        [-4.1151e-01, -2.5520e-01, -3.2343e-01,  ...,  1.0959e-01,
         -2.1659e-01,  1.6493e+04],
        [-3.8531e-01, -3.0977e-01, -2.6267e-01,  ...,  1.3327e-01,
         -2.5768e-01,  1.3565e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[0.1524],
        [0.1169],
        [0.0585],
        [0.1405],
        [0.1445],
        [0.1614],
        [0.1129],
        [0.2110],
        [0.1111],
        [0.1569],
        [0.2409],
        [0.0583],
        [0.0431],
        [0.1388],
        [0.2170],
        [0.2442],
        [0.1759],
        [0.1393],
    

ep 10:  32%|████████████████████████▍                                                   | 9/28 [00:23<00:50,  2.65s/it]

tensor([[-3.7039e-01, -2.5987e-01, -3.9825e-01,  ...,  2.5497e-01,
         -1.8402e-01,  1.5303e+04],
        [-2.3858e-01, -2.4301e-01, -2.3312e-01,  ...,  1.5578e-01,
         -2.4925e-01,  1.4450e+04],
        [-3.1563e-01, -2.6474e-01, -3.1360e-01,  ...,  1.2401e-01,
         -2.4361e-01,  1.3077e+04],
        ...,
        [-3.6685e-01, -3.6977e-01, -2.5776e-01,  ...,  1.6199e-01,
         -2.6201e-01,  1.7511e+04],
        [-3.3330e-01, -2.4354e-01, -2.6148e-01,  ...,  1.2872e-01,
         -2.5938e-01,  1.7976e+04],
        [-2.9605e-01, -3.0309e-01, -1.8609e-01,  ...,  1.0760e-01,
         -2.5633e-01,  1.8036e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0914],
        [ 0.0969],
        [ 0.0647],
        [ 0.1637],
        [ 0.1669],
        [ 0.0125],
        [ 0.0820],
        [ 0.1129],
        [ 0.0989],
        [ 0.0854],
        [ 0.2407],
        [ 0.1382],
        [ 0.1368],
        [ 0.1483],
        [ 0.2137],
        [ 0.1384],
        [ 0.1045],
     

ep 10:  36%|██████████████████████████▊                                                | 10/28 [00:26<00:47,  2.65s/it]

tensor([[-3.5464e-01, -1.8187e-01, -2.0823e-01,  ...,  2.7590e-01,
         -2.3394e-01,  1.6205e+04],
        [-3.6165e-01, -2.1392e-01, -2.8349e-01,  ...,  4.2911e-02,
         -2.9529e-01,  1.2170e+04],
        [-3.0465e-01, -2.5120e-01, -2.4301e-01,  ...,  1.2616e-01,
         -2.3614e-01,  1.6827e+04],
        ...,
        [-3.2479e-01, -3.1200e-01, -2.4513e-01,  ...,  1.2827e-01,
         -1.8956e-01,  1.6695e+04],
        [-3.1502e-01, -2.6497e-01, -2.4307e-01,  ...,  9.6880e-02,
         -2.5296e-01,  1.1118e+04],
        [-2.9981e-01, -1.5060e-01, -2.7210e-01,  ...,  1.0929e-01,
         -1.8988e-01,  1.3345e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 1.3616e-01],
        [ 1.5892e-01],
        [ 1.0013e-01],
        [ 6.8792e-02],
        [ 1.8732e-01],
        [ 2.0528e-01],
        [ 2.2094e-01],
        [ 2.0886e-01],
        [ 7.5484e-02],
        [ 2.2945e-01],
        [ 3.5738e-02],
        [ 1.7110e-01],
        [-3.5864e-03],
        [ 1.5769e-01],
      

ep 10:  39%|█████████████████████████████▍                                             | 11/28 [00:29<00:45,  2.66s/it]

tensor([[-4.5050e-01, -2.5158e-01, -3.1677e-01,  ...,  1.8283e-01,
         -1.5187e-01,  1.8232e+04],
        [-4.1767e-01, -2.8347e-01, -2.8023e-01,  ...,  1.1432e-01,
         -2.3428e-01,  1.0303e+04],
        [-3.1204e-01, -2.3164e-01, -1.6219e-01,  ...,  9.6379e-02,
         -2.4057e-01,  1.1256e+04],
        ...,
        [-3.5656e-01, -2.5345e-01, -1.8035e-01,  ...,  1.9261e-01,
         -2.0344e-01,  1.6621e+04],
        [-3.1757e-01, -2.3622e-01, -2.0895e-01,  ...,  1.6893e-01,
         -2.6632e-01,  1.8005e+04],
        [-3.7147e-01, -2.0860e-01, -4.1357e-01,  ...,  2.3719e-01,
         -2.2152e-01,  1.5967e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0755],
        [ 0.0485],
        [ 0.2306],
        [ 0.0650],
        [ 0.0883],
        [ 0.0059],
        [ 0.0538],
        [ 0.1378],
        [ 0.0423],
        [-0.0034],
        [ 0.0453],
        [ 0.0179],
        [ 0.0690],
        [ 0.1915],
        [ 0.0890],
        [ 0.1671],
        [ 0.1426],
     

ep 10:  43%|████████████████████████████████▏                                          | 12/28 [00:31<00:42,  2.66s/it]

tensor([[-3.8647e-01, -1.8614e-01, -2.7695e-01,  ...,  2.4617e-01,
         -2.4639e-01,  1.6016e+04],
        [-3.1338e-01, -2.4661e-01, -1.9801e-01,  ...,  1.3050e-01,
         -3.2597e-01,  1.7535e+04],
        [-3.6815e-01, -2.6328e-01, -2.6864e-01,  ...,  2.7225e-01,
         -1.9569e-01,  1.6979e+04],
        ...,
        [-3.8996e-01, -2.6153e-01, -3.2837e-01,  ...,  2.3245e-01,
         -2.2377e-01,  1.2496e+04],
        [-3.9897e-01, -1.2447e-01, -2.0418e-01,  ...,  1.7286e-01,
         -2.9900e-01,  1.0574e+04],
        [-4.0465e-01, -3.0605e-01, -2.9405e-01,  ...,  1.6832e-01,
         -1.9848e-01,  1.7684e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[0.0488],
        [0.0575],
        [0.1070],
        [0.1312],
        [0.0871],
        [0.0013],
        [0.0957],
        [0.0667],
        [0.0485],
        [0.2881],
        [0.2219],
        [0.0811],
        [0.0916],
        [0.0764],
        [0.1620],
        [0.0721],
        [0.1444],
        [0.1804],
    

ep 10:  46%|██████████████████████████████████▊                                        | 13/28 [00:34<00:39,  2.66s/it]

tensor([[-3.6648e-01, -2.0910e-01, -2.4405e-01,  ...,  1.1508e-01,
         -3.1906e-01,  1.4701e+04],
        [-3.2988e-01, -1.9750e-01, -3.1683e-01,  ...,  1.4667e-01,
         -2.5520e-01,  1.0367e+04],
        [-2.9215e-01, -2.5637e-01, -2.7438e-01,  ...,  1.5287e-01,
         -2.2617e-01,  1.6277e+04],
        ...,
        [-3.4199e-01, -2.1212e-01, -2.3939e-01,  ...,  7.9091e-02,
         -2.7641e-01,  1.3252e+04],
        [-2.6495e-01, -2.6889e-01, -2.0221e-01,  ...,  6.1020e-02,
         -2.0123e-01,  1.5739e+04],
        [-3.6519e-01, -2.3122e-01, -2.8495e-01,  ...,  1.1149e-01,
         -1.9146e-01,  1.2871e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1002],
        [ 0.2131],
        [ 0.0461],
        [ 0.0774],
        [ 0.1541],
        [ 0.1336],
        [ 0.0889],
        [ 0.1225],
        [ 0.0688],
        [ 0.2135],
        [ 0.1183],
        [ 0.0275],
        [ 0.0560],
        [ 0.2670],
        [ 0.1918],
        [ 0.1045],
        [ 0.0737],
     

ep 10:  50%|█████████████████████████████████████▌                                     | 14/28 [00:37<00:37,  2.66s/it]

tensor([[-3.2937e-01, -1.4645e-01, -2.8335e-01,  ...,  1.3363e-01,
         -2.5827e-01,  1.1906e+04],
        [-2.9856e-01, -3.1340e-01, -2.8324e-01,  ...,  2.0545e-01,
         -2.0023e-01,  1.7698e+04],
        [-3.5558e-01, -1.3614e-01, -2.4997e-01,  ...,  2.3521e-01,
         -2.5932e-01,  1.7501e+04],
        ...,
        [-4.3476e-01, -2.4391e-01, -3.8671e-01,  ...,  3.0905e-01,
         -2.1421e-01,  1.6781e+04],
        [-3.6981e-01, -3.1122e-01, -2.8601e-01,  ...,  1.4610e-01,
         -1.9858e-01,  1.2128e+04],
        [-4.1301e-01, -3.1866e-01, -2.7580e-01,  ...,  1.6043e-01,
         -2.2359e-01,  1.2626e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[0.1494],
        [0.2545],
        [0.1960],
        [0.1046],
        [0.0691],
        [0.0903],
        [0.1179],
        [0.0787],
        [0.1359],
        [0.1216],
        [0.1242],
        [0.1441],
        [0.0700],
        [0.1840],
        [0.0577],
        [0.1265],
        [0.1445],
        [0.1862],
    

ep 10:  54%|████████████████████████████████████████▏                                  | 15/28 [00:39<00:34,  2.65s/it]

tensor([[-3.4169e-01, -9.9232e-02, -3.7249e-01,  ...,  3.7404e-01,
         -1.6907e-01,  1.7793e+04],
        [-2.6685e-01, -3.0251e-01, -2.6052e-01,  ...,  1.1535e-01,
         -3.5282e-01,  1.4054e+04],
        [-3.4467e-01, -2.8231e-01, -2.6908e-01,  ...,  1.1465e-01,
         -1.8580e-01,  1.6197e+04],
        ...,
        [-3.7602e-01, -2.0120e-01, -3.2406e-01,  ...,  1.8356e-01,
         -1.8899e-01,  1.0564e+04],
        [-3.6242e-01, -2.8157e-01, -2.6693e-01,  ...,  1.6457e-01,
         -3.1214e-01,  1.7231e+04],
        [-4.8032e-01, -3.0412e-01, -2.8470e-01,  ...,  1.6406e-01,
         -2.2554e-01,  1.4969e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[0.0501],
        [0.2101],
        [0.1839],
        [0.2712],
        [0.0367],
        [0.1274],
        [0.1928],
        [0.1970],
        [0.1227],
        [0.2013],
        [0.0563],
        [0.1218],
        [0.0496],
        [0.0792],
        [0.1043],
        [0.1115],
        [0.1138],
        [0.0969],
    

ep 10:  57%|██████████████████████████████████████████▊                                | 16/28 [00:42<00:31,  2.65s/it]

tensor([[-3.3053e-01, -2.9479e-01, -2.9646e-01,  ...,  1.1262e-01,
         -1.9015e-01,  1.7758e+04],
        [-2.7272e-01, -1.8165e-01, -2.5444e-01,  ...,  1.5176e-01,
         -3.0181e-01,  1.0968e+04],
        [-3.3430e-01, -2.5984e-01, -2.6950e-01,  ...,  5.4039e-02,
         -2.8737e-01,  1.2837e+04],
        ...,
        [-3.0447e-01, -2.8756e-02, -5.9845e-01,  ...,  4.7398e-01,
         -1.2268e-01,  9.8858e+03],
        [-4.0161e-01, -2.1939e-01, -3.6434e-01,  ...,  2.3697e-01,
         -1.6834e-01,  9.9858e+03],
        [-4.3367e-01, -1.6017e-01, -4.0959e-01,  ...,  2.4805e-01,
         -1.7386e-01,  1.6409e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 1.0089e-01],
        [ 4.3376e-02],
        [ 1.1167e-01],
        [ 1.9502e-01],
        [ 1.7224e-01],
        [ 1.4987e-01],
        [ 3.1439e-02],
        [ 2.1948e-01],
        [ 2.4300e-01],
        [ 1.4802e-01],
        [ 1.3181e-01],
        [ 1.1229e-01],
        [ 5.3239e-02],
        [ 1.6844e-01],
      

ep 10:  61%|█████████████████████████████████████████████▌                             | 17/28 [00:45<00:29,  2.66s/it]

tensor([[-3.6204e-01, -2.8229e-01, -2.9786e-01,  ...,  1.1430e-01,
         -2.6426e-01,  1.6133e+04],
        [-4.0157e-01, -1.9299e-01, -3.1764e-01,  ...,  1.1570e-01,
         -3.2807e-01,  1.6273e+04],
        [-4.0534e-01, -2.9946e-01, -2.3584e-01,  ...,  1.3014e-01,
         -2.4468e-01,  9.3701e+03],
        ...,
        [-3.8103e-01, -2.3883e-01, -3.8590e-01,  ...,  2.3015e-01,
         -2.6414e-01,  1.2480e+04],
        [-3.8180e-01, -1.6480e-01, -2.5899e-01,  ...,  1.6884e-01,
         -2.1091e-01,  1.6002e+04],
        [-3.8254e-01, -2.2668e-01, -2.9977e-01,  ...,  3.5069e-01,
         -2.6992e-01,  1.6400e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1882],
        [ 0.1182],
        [ 0.1968],
        [ 0.2777],
        [ 0.2004],
        [ 0.0151],
        [ 0.2171],
        [ 0.0795],
        [ 0.1225],
        [ 0.1089],
        [ 0.1560],
        [ 0.1760],
        [ 0.1959],
        [ 0.1767],
        [ 0.0491],
        [ 0.1174],
        [ 0.0119],
     

ep 10:  64%|████████████████████████████████████████████████▏                          | 18/28 [00:47<00:26,  2.67s/it]

tensor([[-4.0726e-01, -2.4532e-01, -4.7755e-01,  ...,  3.8868e-01,
         -1.9176e-01,  1.3112e+04],
        [-3.6767e-01, -2.2676e-01, -2.9387e-01,  ...,  8.5994e-02,
         -2.6699e-01,  9.2810e+03],
        [-3.9370e-01, -2.5807e-01, -3.3574e-01,  ...,  9.2763e-02,
         -1.7573e-01,  1.4676e+04],
        ...,
        [-4.4206e-01, -9.2986e-02, -4.8497e-01,  ...,  3.4418e-01,
         -7.6963e-02,  1.0059e+04],
        [-2.8990e-01, -2.8739e-01, -2.4120e-01,  ...,  1.9121e-01,
         -1.9309e-01,  1.7630e+04],
        [-3.8286e-01, -3.6032e-01, -2.4154e-01,  ...,  9.1512e-02,
         -2.2298e-01,  1.2660e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1361],
        [ 0.0572],
        [ 0.1178],
        [ 0.0773],
        [ 0.1574],
        [ 0.1425],
        [ 0.1138],
        [ 0.1861],
        [ 0.2518],
        [ 0.0517],
        [ 0.0569],
        [ 0.1295],
        [ 0.2236],
        [ 0.1210],
        [ 0.1007],
        [ 0.1057],
        [ 0.0967],
     

ep 10:  68%|██████████████████████████████████████████████████▉                        | 19/28 [00:50<00:23,  2.67s/it]

tensor([[-3.6823e-01, -2.5636e-01, -2.8923e-01,  ...,  2.2207e-01,
         -2.6280e-01,  1.7777e+04],
        [-3.4629e-01, -2.0388e-01, -3.1545e-01,  ...,  1.9942e-01,
         -1.8436e-01,  1.7830e+04],
        [-2.9718e-01, -2.5863e-01, -1.9804e-01,  ...,  1.2711e-01,
         -2.6341e-01,  1.6514e+04],
        ...,
        [-3.1880e-01, -2.3801e-01, -2.7438e-01,  ...,  1.5946e-01,
         -2.3975e-01,  1.2570e+04],
        [-3.1203e-01, -1.7950e-01, -2.1350e-01,  ...,  1.7090e-01,
         -2.4303e-01,  1.0211e+04],
        [-3.8469e-01, -2.7792e-01, -2.2506e-01,  ...,  1.4637e-01,
         -2.1714e-01,  1.2862e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0718],
        [ 0.0677],
        [ 0.1811],
        [ 0.0310],
        [ 0.0341],
        [ 0.0545],
        [ 0.0600],
        [ 0.0739],
        [ 0.0015],
        [ 0.1087],
        [ 0.1406],
        [ 0.2000],
        [ 0.1931],
        [ 0.1159],
        [ 0.0944],
        [ 0.1936],
        [ 0.1508],
     

ep 10:  71%|█████████████████████████████████████████████████████▌                     | 20/28 [00:53<00:21,  2.66s/it]

tensor([[-3.7569e-01, -2.4126e-01, -3.4337e-01,  ...,  1.6244e-01,
         -2.4565e-01,  1.4996e+04],
        [-3.3744e-01, -2.0880e-01, -2.6536e-01,  ...,  1.4940e-01,
         -2.5503e-01,  9.1085e+03],
        [-2.9785e-01, -1.9677e-01, -2.1012e-01,  ...,  1.0743e-01,
         -3.4530e-01,  1.1560e+04],
        ...,
        [-2.7333e-01, -1.8306e-01, -2.4039e-01,  ...,  9.4023e-02,
         -2.3144e-01,  1.7938e+04],
        [-3.9363e-01, -3.1716e-01, -2.3258e-01,  ...,  5.6375e-02,
         -2.4253e-01,  1.2821e+04],
        [-2.2331e-01, -1.7701e-01, -2.1452e-01,  ...,  1.6958e-01,
         -2.7653e-01,  1.7756e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0804],
        [ 0.1501],
        [ 0.0743],
        [ 0.2294],
        [ 0.1381],
        [-0.0125],
        [ 0.2669],
        [ 0.0578],
        [ 0.1087],
        [ 0.0185],
        [ 0.1356],
        [ 0.2261],
        [ 0.1598],
        [ 0.2401],
        [ 0.2026],
        [ 0.2254],
        [ 0.1849],
     

ep 10:  75%|████████████████████████████████████████████████████████▎                  | 21/28 [00:55<00:18,  2.67s/it]

tensor([[-3.2999e-01, -2.5099e-01, -2.4826e-01,  ...,  1.3556e-01,
         -3.0062e-01,  1.7857e+04],
        [-4.0423e-01, -2.1440e-01, -2.5656e-01,  ...,  1.0007e-01,
         -2.1860e-01,  1.2480e+04],
        [-4.3486e-01, -2.8073e-01, -2.5811e-01,  ...,  3.5349e-02,
         -2.8360e-01,  1.5056e+04],
        ...,
        [-4.1310e-01, -2.3542e-01, -2.4912e-01,  ...,  1.0974e-01,
         -2.5079e-01,  1.7623e+04],
        [-3.5534e-01, -2.2710e-01, -2.6643e-01,  ...,  1.4792e-01,
         -1.8963e-01,  1.0337e+04],
        [-3.4595e-01, -2.2762e-01, -2.7537e-01,  ...,  1.8788e-01,
         -2.6112e-01,  1.6449e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[0.1350],
        [0.1699],
        [0.1665],
        [0.0452],
        [0.1695],
        [0.0157],
        [0.0681],
        [0.0333],
        [0.0402],
        [0.1308],
        [0.2300],
        [0.0052],
        [0.0407],
        [0.2270],
        [0.1888],
        [0.1054],
        [0.1947],
        [0.1972],
    

ep 10:  79%|██████████████████████████████████████████████████████████▉                | 22/28 [00:58<00:16,  2.67s/it]

tensor([[-3.7041e-01, -3.0064e-01, -2.6173e-01,  ...,  1.9359e-01,
         -1.6904e-01,  1.8038e+04],
        [-3.4401e-01, -3.1200e-01, -3.0935e-01,  ...,  4.4304e-02,
         -2.0337e-01,  1.0636e+04],
        [-4.0517e-01, -2.4901e-01, -2.8521e-01,  ...,  2.0705e-01,
         -1.9891e-01,  1.8019e+04],
        ...,
        [-3.9801e-01,  4.0682e-02, -6.5775e-01,  ...,  5.3318e-01,
         -1.2505e-01,  1.1134e+04],
        [-3.2805e-01, -2.2651e-01, -2.6661e-01,  ...,  1.8661e-01,
         -2.1290e-01,  1.8050e+04],
        [-2.8613e-01, -1.7168e-01, -2.6973e-01,  ...,  5.5392e-02,
         -2.2505e-01,  1.2572e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1623],
        [ 0.1405],
        [ 0.1663],
        [ 0.0856],
        [ 0.1723],
        [ 0.1976],
        [ 0.1335],
        [ 0.0947],
        [ 0.2132],
        [ 0.1371],
        [ 0.2049],
        [ 0.0603],
        [ 0.0206],
        [ 0.1935],
        [ 0.0723],
        [ 0.1019],
        [ 0.0410],
     

ep 10:  82%|█████████████████████████████████████████████████████████████▌             | 23/28 [01:01<00:13,  2.67s/it]

tensor([[-3.3802e-01, -2.4708e-01, -2.4564e-01,  ...,  1.2539e-01,
         -2.4984e-01,  1.7349e+04],
        [-3.8994e-01, -3.3908e-01, -2.4932e-01,  ...,  7.2639e-02,
         -1.9911e-01,  1.8058e+04],
        [-3.3150e-01, -2.1284e-01, -2.4758e-01,  ...,  2.2206e-01,
         -2.6302e-01,  1.7039e+04],
        ...,
        [-3.2707e-01, -2.6592e-01, -2.0380e-01,  ...,  1.2136e-01,
         -1.6707e-01,  1.0016e+04],
        [-3.8666e-01, -1.0770e-01, -3.5033e-01,  ...,  3.4674e-01,
         -1.9605e-01,  1.0308e+04],
        [-3.7181e-01, -2.4044e-01, -3.9166e-01,  ...,  2.0085e-01,
         -1.1146e-01,  9.7422e+03]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1536],
        [ 0.0520],
        [ 0.1543],
        [ 0.1454],
        [ 0.1136],
        [ 0.0261],
        [ 0.0895],
        [ 0.1452],
        [ 0.0799],
        [ 0.1004],
        [ 0.0992],
        [ 0.2804],
        [ 0.0962],
        [ 0.1377],
        [ 0.1675],
        [ 0.2057],
        [ 0.1567],
     

ep 10:  86%|████████████████████████████████████████████████████████████████▎          | 24/28 [01:03<00:10,  2.66s/it]

tensor([[-3.1510e-01, -2.6024e-01, -2.6087e-01,  ...,  2.2011e-01,
         -2.1986e-01,  1.5440e+04],
        [-3.8028e-01, -2.9210e-01, -2.2280e-01,  ...,  1.0941e-01,
         -2.6782e-01,  1.2503e+04],
        [-4.1398e-01, -1.9822e-01, -2.6480e-01,  ...,  6.3111e-02,
         -1.9066e-01,  1.0428e+04],
        ...,
        [-3.2403e-01, -3.2230e-01, -2.9101e-01,  ...,  1.3252e-01,
         -2.7626e-01,  1.2932e+04],
        [-3.5410e-01, -2.5532e-01, -2.8729e-01,  ...,  9.8469e-02,
         -2.7305e-01,  1.2605e+04],
        [-4.3832e-01, -1.5977e-01, -3.1844e-01,  ...,  2.1437e-01,
         -2.4763e-01,  1.2442e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.1599],
        [ 0.1044],
        [ 0.2342],
        [ 0.2083],
        [ 0.1507],
        [ 0.0672],
        [ 0.1686],
        [ 0.0501],
        [ 0.1616],
        [ 0.1652],
        [ 0.1925],
        [ 0.0908],
        [ 0.0899],
        [ 0.0885],
        [ 0.0821],
        [ 0.0929],
        [ 0.1349],
     

ep 10:  89%|██████████████████████████████████████████████████████████████████▉        | 25/28 [01:06<00:07,  2.66s/it]

tensor([[-2.7144e-01, -2.0806e-01, -3.2938e-01,  ...,  2.7738e-01,
         -3.0361e-01,  1.6738e+04],
        [-4.1313e-01, -2.3705e-01, -3.7843e-01,  ...,  2.4157e-01,
         -2.1135e-01,  1.8120e+04],
        [-3.9957e-01, -2.8424e-01, -2.0503e-01,  ...,  1.6325e-01,
         -1.7560e-01,  1.5254e+04],
        ...,
        [-3.4744e-01, -2.4599e-01, -2.8923e-01,  ...,  1.0845e-01,
         -2.7819e-01,  1.3173e+04],
        [-2.9563e-01, -3.4143e-01, -2.9219e-01,  ...,  1.2291e-01,
         -2.6646e-01,  1.2240e+04],
        [-3.5651e-01, -2.8056e-01, -2.3647e-01,  ...,  1.4962e-01,
         -2.3528e-01,  1.6315e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0643],
        [ 0.0823],
        [ 0.2350],
        [ 0.3373],
        [ 0.0735],
        [ 0.1196],
        [ 0.2571],
        [ 0.3463],
        [ 0.1779],
        [ 0.0933],
        [ 0.1794],
        [ 0.1413],
        [-0.0228],
        [ 0.3016],
        [ 0.2446],
        [ 0.1935],
        [ 0.1904],
     

ep 10:  93%|█████████████████████████████████████████████████████████████████████▋     | 26/28 [01:09<00:05,  2.66s/it]

tensor([[-3.0301e-01, -1.8538e-01, -2.1006e-01,  ...,  1.7466e-01,
         -2.6990e-01,  1.7926e+04],
        [-3.7628e-01, -2.1160e-01, -3.3304e-01,  ...,  1.8846e-01,
         -1.8618e-01,  9.9745e+03],
        [-3.5289e-01,  2.9603e-02, -6.5299e-01,  ...,  5.4676e-01,
         -1.2911e-01,  1.4840e+04],
        ...,
        [-3.4702e-01, -2.7168e-01, -2.5805e-01,  ...,  8.6747e-02,
         -2.1042e-01,  1.4832e+04],
        [-3.8156e-01, -2.7871e-01, -2.8138e-01,  ...,  1.0885e-01,
         -1.5609e-01,  1.0393e+04],
        [-4.1078e-01, -2.2713e-01, -2.5976e-01,  ...,  1.4149e-01,
         -1.5978e-01,  1.3272e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0504],
        [ 0.1297],
        [ 0.0741],
        [ 0.1172],
        [ 0.1353],
        [ 0.0508],
        [ 0.0909],
        [ 0.0614],
        [ 0.0367],
        [ 0.1259],
        [ 0.1648],
        [ 0.1093],
        [ 0.1136],
        [ 0.1516],
        [ 0.0692],
        [ 0.0420],
        [ 0.1508],
     

ep 10:  96%|████████████████████████████████████████████████████████████████████████▎  | 27/28 [01:11<00:02,  2.66s/it]

tensor([[-4.0407e-01, -2.6428e-01, -2.8870e-01,  ...,  2.1023e-01,
         -2.1916e-01,  1.6417e+04],
        [-3.6866e-01, -2.0793e-01, -3.1074e-01,  ...,  1.6657e-01,
         -2.3793e-01,  1.7132e+04],
        [-3.1105e-01, -3.3583e-01, -1.2759e-01,  ...,  1.7681e-01,
         -2.4575e-01,  1.2420e+04],
        ...,
        [-2.3377e-01, -2.8372e-01, -2.5461e-01,  ...,  2.3917e-02,
         -2.7008e-01,  8.3595e+03],
        [-2.6379e-01, -2.7067e-01, -3.0095e-01,  ...,  1.5841e-01,
         -2.4826e-01,  1.1796e+04],
        [-4.5510e-01, -2.7395e-01, -2.7608e-01,  ...,  1.0589e-01,
         -2.2151e-01,  1.5484e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[ 0.0897],
        [ 0.1080],
        [ 0.0507],
        [ 0.0318],
        [ 0.1522],
        [ 0.1048],
        [ 0.2640],
        [ 0.1135],
        [ 0.2609],
        [ 0.1513],
        [ 0.1977],
        [ 0.1944],
        [ 0.0751],
        [ 0.1205],
        [ 0.0348],
        [ 0.2800],
        [ 0.1237],
     

ep 10: 100%|███████████████████████████████████████████████████████████████████████████| 28/28 [01:14<00:00,  2.66s/it]
valid:   0%|                                                                                     | 0/4 [00:00<?, ?it/s]

tensor([[-3.5632e-01, -2.3109e-01, -2.6569e-01,  ...,  1.0418e-01,
         -1.9828e-01,  1.1123e+04],
        [-3.4392e-01, -2.5117e-01, -2.4104e-01,  ...,  1.1470e-01,
         -2.1081e-01,  1.0409e+04],
        [-3.8863e-01, -1.5658e-01, -3.1093e-01,  ...,  1.4145e-01,
         -1.9731e-01,  1.1577e+04],
        ...,
        [-2.8631e-01, -2.6224e-01, -2.6302e-01,  ...,  1.1051e-01,
         -3.1048e-01,  8.4470e+03],
        [-3.5969e-01, -1.2759e-01, -1.7561e-01,  ...,  1.4041e-01,
         -2.2655e-01,  1.0024e+04],
        [-3.0655e-01, -2.5246e-01, -2.5525e-01,  ...,  1.1972e-01,
         -2.6606e-01,  1.0771e+04]], device='cuda:0', grad_fn=<CatBackward>)
tensor([[-0.0083],
        [ 0.0548],
        [ 0.0819],
        [ 0.0703],
        [ 0.0860],
        [ 0.1333],
        [ 0.1748],
        [ 0.0644],
        [ 0.0489],
        [ 0.1560],
        [-0.0359],
        [ 0.1935],
        [ 0.2687],
        [ 0.1220],
        [ 0.1462],
        [ 0.2774],
        [ 0.2614],
     

valid:  25%|███████████████████▎                                                         | 1/4 [00:02<00:07,  2.60s/it]

tensor([[-3.2233e-01, -2.8638e-01, -2.5438e-01,  ...,  1.1127e-01,
         -2.4928e-01,  8.2773e+03],
        [-3.6531e-01, -1.8541e-01, -3.2476e-01,  ...,  9.8329e-02,
         -2.3989e-01,  8.2921e+03],
        [-3.7271e-01, -3.1051e-01, -2.6006e-01,  ...,  1.0789e-01,
         -1.8490e-01,  8.4220e+03],
        ...,
        [-3.7780e-01, -2.1708e-01, -3.6814e-01,  ...,  2.7568e-01,
         -1.1368e-01,  7.2709e+03],
        [-3.9939e-01, -2.8329e-01, -2.7683e-01,  ...,  1.3335e-01,
         -2.4340e-01,  7.3509e+03],
        [-3.1813e-01, -1.7252e-01, -3.3864e-01,  ...,  1.8896e-01,
         -2.7303e-01,  7.1148e+03]], device='cuda:0')
tensor([[0.1251],
        [0.1333],
        [0.1346],
        [0.1347],
        [0.1348],
        [0.1348],
        [0.1348],
        [0.1348],
        [0.1348],
        [0.1348],
        [0.1348],
        [0.1348],
        [0.1348],
        [0.1348],
        [0.1348],
        [0.1348],
        [0.1348],
        [0.1348],
        [0.1348],
        [

valid:  50%|██████████████████████████████████████▌                                      | 2/4 [00:05<00:05,  2.60s/it]

tensor([[-2.6459e-01, -1.9816e-01, -2.3166e-01,  ...,  4.3366e-02,
         -2.4275e-01,  7.3657e+03],
        [-4.4351e-01, -1.6997e-01, -2.4599e-01,  ...,  5.0830e-02,
         -2.2045e-01,  7.4660e+03],
        [-3.6358e-01, -2.7964e-01, -2.1717e-01,  ...,  1.4754e-01,
         -2.0238e-01,  7.5556e+03],
        ...,
        [-3.0289e-01, -2.8608e-01, -3.0153e-01,  ...,  9.7022e-02,
         -2.0641e-01,  7.5523e+03],
        [-3.2801e-01, -2.4983e-01, -2.3538e-01,  ...,  1.0617e-01,
         -3.3416e-01,  7.9973e+03],
        [-3.5315e-01, -2.7097e-01, -2.6696e-01,  ...,  1.3142e-01,
         -2.4509e-01,  8.4248e+03]], device='cuda:0')
tensor([[0.1251],
        [0.1333],
        [0.1346],
        [0.1347],
        [0.1348],
        [0.1348],
        [0.1348],
        [0.1348],
        [0.1348],
        [0.1348],
        [0.1348],
        [0.1348],
        [0.1348],
        [0.1348],
        [0.1348],
        [0.1348],
        [0.1348],
        [0.1348],
        [0.1348],
        [

valid:  75%|█████████████████████████████████████████████████████████▊                   | 3/4 [00:07<00:02,  2.60s/it]

tensor([[-4.3092e-01, -2.7025e-01, -3.0428e-01,  ...,  1.7351e-01,
         -2.1021e-01,  8.2736e+03],
        [-3.9687e-01, -1.6447e-01, -3.1085e-01,  ...,  2.0987e-01,
         -1.9773e-01,  8.4973e+03],
        [-4.0648e-01, -2.3350e-01, -3.5499e-01,  ...,  1.4228e-01,
         -2.3323e-01,  8.8352e+03],
        ...,
        [-2.5792e-01, -2.7998e-01, -2.9789e-01,  ...,  6.9041e-02,
         -1.5714e-01,  1.1430e+04],
        [-3.6105e-01, -3.1575e-01, -2.2100e-01,  ...,  1.0072e-01,
         -2.3743e-01,  1.1417e+04],
        [-3.4809e-01, -2.3759e-01, -2.0738e-01,  ...,  1.4380e-01,
         -2.2184e-01,  1.1349e+04]], device='cuda:0')
tensor([[0.1251],
        [0.1333],
        [0.1346],
        [0.1347],
        [0.1348],
        [0.1348],
        [0.1348],
        [0.1348],
        [0.1348],
        [0.1348],
        [0.1348],
        [0.1348],
        [0.1348],
        [0.1348],
        [0.1348],
        [0.1348],
        [0.1348],
        [0.1348],
        [0.1348],
        [

valid: 100%|█████████████████████████████████████████████████████████████████████████████| 4/4 [00:08<00:00,  2.02s/it]

tensor([[-3.0299e-01, -2.0548e-01, -3.7163e-01,  ...,  2.2204e-01,
         -2.4157e-01,  1.1479e+04],
        [-5.2351e-01, -1.3666e-01, -4.0322e-01,  ...,  1.6678e-01,
         -2.3607e-01,  1.1660e+04],
        [-2.8602e-01, -2.1112e-01, -2.7816e-01,  ...,  2.0929e-01,
         -2.4698e-01,  1.1616e+04],
        ...,
        [-4.1360e-01, -3.0982e-01, -2.5829e-01,  ...,  8.7611e-02,
         -2.1257e-01,  1.1642e+04],
        [-4.2525e-01, -2.9543e-01, -3.0947e-01,  ...,  7.3900e-02,
         -2.2249e-01,  1.1782e+04],
        [-2.6573e-01, -2.9025e-01, -3.4377e-01,  ...,  1.3080e-01,
         -1.8334e-01,  1.1734e+04]], device='cuda:0')
tensor([[0.1251],
        [0.1333],
        [0.1346],
        [0.1347],
        [0.1348],
        [0.1348],
        [0.1348]], device='cuda:0')
valid acc 0.5376884422110553
Finished Training





## Record

In [9]:
records_p = data_dir + f'results/records-{num_epochs}.json'

with open(records_p, 'r') as f:
    records = json.loads(f.read())
records = [json.loads(record) for record in records]
records = {(r['hidden_size'], r['num_linear']) : r for r in records}

fig, axs = plt.subplots(len(hidden_sizes), len(num_linears), sharex=True, sharey=True, figsize=(12,8))
axs = axs if len(hidden_sizes) == 1 and len(num_linears) == 1 else axs[ihs, inl]
for ihs, hidden_size in enumerate(hidden_sizes):
    for inl, num_linear in enumerate(num_linears):
        r = records[(hidden_size, num_linear)]
        axs.plot([i for i in range(num_epochs)], r['acc_train'])
        axs.plot([i for i in range(0, num_epochs, 3)], r['acc_valid'], 'tab:orange')
        axs.set_title('hs={}, nl={} ({:.1f})'.format(hidden_size, num_linear, r['acc_valid'][-1]*100))

# for ax in axs.flat:
#     ax.set(xlabel='epochs', ylabel='accuracy')
#     # Hide x labels and tick labels for top plots and y ticks for right plots.
#     ax.label_outer()

fig.savefig(data_dir + f'results/plt-{num_epochs}.png')
fig.show()

FileNotFoundError: [Errno 2] No such file or directory: 'D:\\COMP5214 WORKSPACE\\project1\\COMP5214_Tradingresults/records-10.json'