<a href="https://colab.research.google.com/github/xSakix/AI_colab_notebooks/blob/master/reformer_pytorch_cuda.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install torch
!pip install reformer_pytorch==0.12.7 --force-reinstall 
!pip install transformers

Collecting reformer_pytorch==0.12.7
  Downloading https://files.pythonhosted.org/packages/c7/76/e16c3f0904011223e8c4a853d3b08a300db74c4a90a4a983f1a7d934fd63/reformer_pytorch-0.12.7.tar.gz
Collecting revtorch>=0.2.4
  Downloading https://files.pythonhosted.org/packages/7b/7f/6b2247e5ce4b8969dedfcaec064c59ce0417cddbe638bfa6169ff586eaea/revtorch-0.2.4.tar.gz
Collecting torch
[?25l  Downloading https://files.pythonhosted.org/packages/24/19/4804aea17cd136f1705a5e98a00618cb8f6ccc375ad8bfa437408e09d058/torch-1.4.0-cp36-cp36m-manylinux1_x86_64.whl (753.4MB)
[K     |████████████████████████████████| 753.4MB 22kB/s 
[?25hBuilding wheels for collected packages: reformer-pytorch, revtorch
  Building wheel for reformer-pytorch (setup.py) ... [?25l[?25hdone
  Created wheel for reformer-pytorch: filename=reformer_pytorch-0.12.7-cp36-none-any.whl size=8720 sha256=b1c45487c4369d4a96259978a3121f38836018059f163ce352a02ef38974df1f
  Stored in directory: /root/.cache/pip/wheels/61/b8/d4/a72dab74c922c6

In [2]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime → "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

Thu Feb 27 12:44:43 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.48.02    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   40C    P0    27W / 250W |      0MiB / 16280MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [4]:
# load model file and epoch
import os
import re
files = [f for f in os.listdir('/content/drive/My Drive/model_saves') if f.startswith('epoch')]
last_model_file = None
epochs_run = 0
if len(files) > 0:
  files.sort(reverse=True)
  last_model_file = os.path.join('/content/drive/My Drive/model_saves',files[0])
  print(last_model_file)
  epochs = re.findall(r'\d+',files[0])
  epochs_run = 0
  if len(epochs) == 1:
    epochs_run = int(epochs[0])
  print('number of epochs run:',epochs_run)


/content/drive/My Drive/model_saves/epoch-91700.pt
number of epochs run: 91700


In [0]:
from reformer_pytorch import ReformerLM

import random
import tqdm
import gzip
import numpy as np
import torch
import torch.optim as optim
from torch.nn import functional as F
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
import os
from transformers import BertTokenizer, AdamW, get_linear_schedule_with_warmup

# constants

NUM_BATCHES = int(1e5)
BATCH_SIZE = 8
GRADIENT_ACCUMULATE_EVERY = 4
LEARNING_RATE = 3e-4
VALIDATE_EVERY  = 100
GENERATE_EVERY  = 500
GENERATE_LENGTH = 512
SEQ_LEN = 4096

# helpers

def cycle(loader):
    while True:
        for data in loader:
            yield data

def get_top_p(logits, top_p=0.9):
    sorted_logits, sorted_indices = torch.sort(logits, descending=True)
    cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)

    sorted_indices_to_remove = cumulative_probs > top_p
    sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
    sorted_indices_to_remove[..., 0] = 0

    indices_to_remove = sorted_indices[sorted_indices_to_remove]
    logits[indices_to_remove] = float('-inf')
    return logits

def sample_next_token(logits, top_p=0.9, temperature = 1.0):
    logits = logits[0, -1, :] / temperature
    filtered_logits = get_top_p(logits, top_p=top_p)

    probs = F.softmax(filtered_logits, dim=-1)
    return torch.multinomial(probs, 1)

def decode_token(token):
    return str(chr(token))

def decode_tokens(tokens):
    return ''.join(list(map(decode_token, tokens)))

# instantiate model

model = ReformerLM(
    dim = 512,
    depth = 6,
    max_seq_len = SEQ_LEN,
    num_tokens = 256,
    heads = 8,
    bucket_size = 64,
    n_hashes = 8,
    ff_chunks = 10,
    lsh_dropout = 0.1,
    weight_tie = True,
    causal = True,
    use_full_attn = False # set this to true for comparison with full attention
)

# model = ReformerLM(
#     dim = 512,
#     depth = 6,
#     max_seq_len = SEQ_LEN,
#     num_tokens = 256,
#     heads = 8,
#     bucket_size = 64,
#     n_hashes = 4,
#     ff_chunks = 10,
#     lsh_dropout = 0.1,
#     weight_tie = True,
#     causal = True,
#     use_full_attn = False # set this to true for comparison with full attention
# )

if last_model_file is not None:
  model.load_state_dict(torch.load(last_model_file ))

model.cuda()


# prepare enwik8 data

with gzip.open('/content/drive/My Drive/model_data/merged.gz') as file:
    X = np.array([int(c) for c in file.read()])
    si = int(len(X)-len(X)*0.2)
    trX, vaX = np.split(X, [si])
    data_train, data_val = torch.from_numpy(trX), torch.from_numpy(vaX)

class TextSamplerDataset(Dataset):
    def __init__(self, data, seq_len):
        super().__init__()
        self.data = data
        self.seq_len = seq_len

    def __getitem__(self, index):
        rand_start = torch.randint(0, self.data.size(0) - self.seq_len - 1, (1,))
        full_seq = self.data[rand_start: rand_start + self.seq_len + 1].long()
        return full_seq[0:-1].cuda(), full_seq[1:].cuda()

    def __len__(self):
        return self.data.size(0) // self.seq_len

train_dataset = TextSamplerDataset(data_train, SEQ_LEN)
val_dataset   = TextSamplerDataset(data_val, SEQ_LEN)
train_loader  = cycle(DataLoader(train_dataset, batch_size = BATCH_SIZE))
val_loader    = cycle(DataLoader(val_dataset, batch_size = BATCH_SIZE))

print(len(train_dataset))
print(len(val_dataset))

# optimizer
# optimizer.load_state_dict(torch.load('optimizer.pt'))
# scheduler.load_state_dict(torch.load('scheduler.pt'))

# optim = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE,amsgrad=True)
optim = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE,momentum=0.9,weight_decay=1e-3)

if os.path.exists('/content/drive/My Drive/model_saves/optim.pt'):
  optim.load_state_dict(torch.load('/content/drive/My Drive/model_saves/optim.pt'))

#scheduler

# scheduler = torch.optim.lr_scheduler.StepLR(optim, step_size=VALIDATE_EVERY, gamma=0.1)

scheduler = get_linear_schedule_with_warmup(
            optim,
            num_warmup_steps=VALIDATE_EVERY,
            num_training_steps=len(train_dataset) // GRADIENT_ACCUMULATE_EVERY * NUM_BATCHES
        )

if os.path.exists('/content/drive/My Drive/model_saves/scheduler.pt'):
  scheduler.load_state_dict(torch.load('/content/drive/My Drive/model_saves/scheduler.pt'))

# training

def get_batch_loss(model, data):
    x, y = data
    pred = model(x)
    return F.cross_entropy(pred.transpose(1, 2), y, reduction='mean')

for i in tqdm.tqdm(range(epochs_run, NUM_BATCHES), mininterval=10., desc='training'):
    model.train()

    for __ in range(GRADIENT_ACCUMULATE_EVERY):
        loss = get_batch_loss(model, next(train_loader))
        loss.backward()

    print(f'training loss: {loss.item()}')
    torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
    optim.step()
    optim.zero_grad()
    scheduler.step()

    if i % VALIDATE_EVERY == 0:
        torch.save(model.state_dict(), os.path.join('/content/drive/My Drive/model_saves', 'epoch-{}.pt'.format(i)))
        torch.save(optim.state_dict(),'/content/drive/My Drive/model_saves/optim.pt')
        torch.save(scheduler.state_dict(),'/content/drive/My Drive/model_saves/scheduler.pt')
        model.eval()
        with torch.no_grad():
            loss = get_batch_loss(model, next(val_loader))
            print(f'validation loss: {loss.item()}')

    if i % GENERATE_EVERY == 0:
        model.eval()
        with torch.no_grad():
            inp, _ = random.choice(val_dataset)
            output_str = ''
            prime = decode_tokens(inp)

            # print(f'%s \n\n %s', (prime, '*' * 100))
            print(prime)
            print('*'*100)

            for _ in tqdm.tqdm(range(GENERATE_LENGTH), desc='generating'):
                logits = model(inp[None, :])
                next_token = sample_next_token(logits)
                output_str += decode_token(next_token)
                inp = torch.cat((inp[1:], next_token), dim=0)

            print(output_str)

55239
13809


training:   0%|          | 0/8300 [00:00<?, ?it/s]

training loss: 1.2249150276184082


training:   0%|          | 1/8300 [00:17<40:24:39, 17.53s/it]

validation loss: 1.4861607551574707


training:   0%|          | 2/8300 [00:33<39:09:59, 16.99s/it]

training loss: 0.6849892735481262


training:   0%|          | 3/8300 [00:48<38:16:50, 16.61s/it]

training loss: 0.6937302350997925


training:   0%|          | 4/8300 [01:04<37:39:17, 16.34s/it]

training loss: 0.7848326563835144


training:   0%|          | 5/8300 [01:20<37:12:05, 16.15s/it]

training loss: 0.6002750992774963


training:   0%|          | 6/8300 [01:36<36:53:03, 16.01s/it]

training loss: 0.5118744969367981


training:   0%|          | 7/8300 [01:51<36:39:45, 15.92s/it]

training loss: 0.8865579962730408


training:   0%|          | 8/8300 [02:07<36:30:36, 15.85s/it]

training loss: 0.7288135290145874


training:   0%|          | 9/8300 [02:23<36:24:26, 15.81s/it]

training loss: 0.6375588178634644


training:   0%|          | 10/8300 [02:38<36:20:21, 15.78s/it]

training loss: 0.6668575406074524


training:   0%|          | 11/8300 [02:54<36:17:05, 15.76s/it]

training loss: 0.7117567658424377


training:   0%|          | 12/8300 [03:10<36:14:51, 15.74s/it]

training loss: 0.4076291620731354


training:   0%|          | 13/8300 [03:26<36:12:59, 15.73s/it]

training loss: 0.7782996892929077


training:   0%|          | 14/8300 [03:41<36:11:59, 15.73s/it]

training loss: 0.9618884921073914


training:   0%|          | 15/8300 [03:57<36:11:15, 15.72s/it]

training loss: 1.0756224393844604


training:   0%|          | 16/8300 [04:13<36:10:41, 15.72s/it]

training loss: 0.38304707407951355


training:   0%|          | 17/8300 [04:28<36:10:07, 15.72s/it]

training loss: 0.9162852168083191


training:   0%|          | 18/8300 [04:44<36:09:30, 15.72s/it]

training loss: 0.5511187314987183


training:   0%|          | 19/8300 [05:00<36:09:11, 15.72s/it]

training loss: 1.0232781171798706


training:   0%|          | 20/8300 [05:16<36:08:57, 15.72s/it]

training loss: 1.260239601135254


training:   0%|          | 21/8300 [05:31<36:08:28, 15.72s/it]

training loss: 0.7248073816299438


training:   0%|          | 22/8300 [05:47<36:08:13, 15.72s/it]

training loss: 0.9685246348381042


training:   0%|          | 23/8300 [06:03<36:08:03, 15.72s/it]

training loss: 0.8044099807739258


training:   0%|          | 24/8300 [06:18<36:07:52, 15.72s/it]

training loss: 0.5855470895767212


training:   0%|          | 25/8300 [06:34<36:07:23, 15.72s/it]

training loss: 0.7784263491630554


training:   0%|          | 26/8300 [06:50<36:07:07, 15.72s/it]

training loss: 0.755690336227417


training:   0%|          | 27/8300 [07:06<36:06:56, 15.72s/it]

training loss: 0.3651016354560852


training:   0%|          | 28/8300 [07:21<36:06:48, 15.72s/it]

training loss: 0.7294648885726929


training:   0%|          | 29/8300 [07:37<36:06:32, 15.72s/it]

training loss: 0.8172018527984619


training:   0%|          | 30/8300 [07:53<36:06:29, 15.72s/it]

training loss: 0.5677509903907776


training:   0%|          | 31/8300 [08:08<36:06:09, 15.72s/it]

training loss: 1.1930545568466187


training:   0%|          | 32/8300 [08:24<36:05:54, 15.72s/it]

training loss: 0.43583789467811584


training:   0%|          | 33/8300 [08:40<36:05:44, 15.72s/it]

training loss: 0.594941258430481


training:   0%|          | 34/8300 [08:56<36:05:19, 15.72s/it]

training loss: 0.8572234511375427


training:   0%|          | 35/8300 [09:11<36:05:03, 15.72s/it]

training loss: 0.6710062026977539


training:   0%|          | 36/8300 [09:27<36:04:51, 15.72s/it]

training loss: 0.7820802330970764


training:   0%|          | 37/8300 [09:43<36:04:17, 15.72s/it]

training loss: 0.6684061884880066


training:   0%|          | 38/8300 [09:58<36:04:03, 15.72s/it]

training loss: 0.5301483869552612


training:   0%|          | 39/8300 [10:14<36:03:43, 15.72s/it]

training loss: 0.6407985687255859


training:   0%|          | 40/8300 [10:30<36:03:11, 15.71s/it]

training loss: 0.7347403764724731


training:   0%|          | 41/8300 [10:46<36:02:56, 15.71s/it]

training loss: 0.8768454194068909


training:   1%|          | 42/8300 [11:01<36:02:47, 15.71s/it]

training loss: 1.0648596286773682


training:   1%|          | 43/8300 [11:17<36:02:44, 15.72s/it]

training loss: 0.4728723168373108


training:   1%|          | 44/8300 [11:33<36:02:29, 15.72s/it]

training loss: 0.937859296798706


training:   1%|          | 45/8300 [11:48<36:02:11, 15.72s/it]

training loss: 0.923416256904602


training:   1%|          | 46/8300 [12:04<36:01:56, 15.72s/it]

training loss: 0.4787595868110657


training:   1%|          | 47/8300 [12:20<36:01:42, 15.72s/it]

training loss: 0.9806509613990784


training:   1%|          | 48/8300 [12:36<36:01:16, 15.71s/it]

training loss: 1.2488880157470703


training:   1%|          | 49/8300 [12:51<36:00:58, 15.71s/it]

training loss: 0.7471317052841187


training:   1%|          | 50/8300 [13:07<36:01:06, 15.72s/it]

training loss: 0.6157804727554321


training:   1%|          | 51/8300 [13:23<36:00:56, 15.72s/it]

training loss: 0.9698678255081177


training:   1%|          | 52/8300 [13:38<36:00:11, 15.71s/it]

training loss: 1.2055336236953735


training:   1%|          | 53/8300 [13:54<35:59:59, 15.71s/it]

training loss: 0.7620349526405334


training:   1%|          | 54/8300 [14:10<35:59:41, 15.71s/it]

training loss: 0.9646420478820801


training:   1%|          | 55/8300 [14:26<35:59:11, 15.71s/it]

training loss: 1.0037593841552734


training:   1%|          | 56/8300 [14:41<35:59:04, 15.71s/it]

training loss: 1.3167306184768677


training:   1%|          | 57/8300 [14:57<35:59:06, 15.72s/it]

training loss: 1.0115092992782593


training:   1%|          | 58/8300 [15:13<35:58:40, 15.71s/it]

training loss: 0.4502311050891876


training:   1%|          | 59/8300 [15:28<35:58:04, 15.71s/it]

training loss: 0.7635403871536255


training:   1%|          | 60/8300 [15:44<35:58:07, 15.71s/it]

training loss: 1.0185675621032715


training:   1%|          | 61/8300 [16:00<35:57:45, 15.71s/it]

training loss: 0.9069865345954895


training:   1%|          | 62/8300 [16:16<35:57:37, 15.71s/it]

training loss: 0.6075513958930969


training:   1%|          | 63/8300 [16:31<35:57:13, 15.71s/it]

training loss: 0.9554219841957092


training:   1%|          | 64/8300 [16:47<35:57:06, 15.71s/it]

training loss: 0.716283917427063


training:   1%|          | 65/8300 [17:03<35:57:12, 15.72s/it]

training loss: 0.6439031362533569


training:   1%|          | 66/8300 [17:18<35:57:11, 15.72s/it]

training loss: 0.4995405673980713


training:   1%|          | 67/8300 [17:34<35:57:16, 15.72s/it]

training loss: 0.9713668823242188


training:   1%|          | 68/8300 [17:50<35:56:55, 15.72s/it]

training loss: 0.7548618912696838


training:   1%|          | 69/8300 [18:06<35:56:43, 15.72s/it]

training loss: 0.8388000130653381


training:   1%|          | 70/8300 [18:21<35:56:24, 15.72s/it]

training loss: 0.7597427368164062


training:   1%|          | 71/8300 [18:37<35:56:23, 15.72s/it]

training loss: 0.7142685651779175


training:   1%|          | 72/8300 [18:53<35:56:20, 15.72s/it]

training loss: 1.0985956192016602


training:   1%|          | 73/8300 [19:09<35:56:06, 15.72s/it]

training loss: 1.1930549144744873


training:   1%|          | 74/8300 [19:24<35:55:53, 15.72s/it]

training loss: 0.6284727454185486


training:   1%|          | 75/8300 [19:40<35:56:02, 15.73s/it]

training loss: 0.6169877648353577


training:   1%|          | 76/8300 [19:56<35:56:00, 15.73s/it]

training loss: 0.8913049697875977


training:   1%|          | 77/8300 [20:11<35:55:48, 15.73s/it]

training loss: 0.5905500054359436


training:   1%|          | 78/8300 [20:27<35:54:59, 15.73s/it]

training loss: 0.5552098155021667


training:   1%|          | 79/8300 [20:43<35:54:32, 15.72s/it]

training loss: 0.5325811505317688


training:   1%|          | 80/8300 [20:59<35:53:58, 15.72s/it]

training loss: 0.8463853597640991


training:   1%|          | 81/8300 [21:14<35:54:03, 15.72s/it]

training loss: 1.0422114133834839


training:   1%|          | 82/8300 [21:30<35:53:50, 15.73s/it]

training loss: 0.7128340601921082


training:   1%|          | 83/8300 [21:46<35:53:35, 15.73s/it]

training loss: 0.9124480485916138


training:   1%|          | 84/8300 [22:02<35:53:24, 15.73s/it]

training loss: 0.5281621217727661


training:   1%|          | 85/8300 [22:17<35:52:51, 15.72s/it]

training loss: 0.8818513751029968


training:   1%|          | 86/8300 [22:33<35:52:20, 15.72s/it]

training loss: 0.734624981880188


training:   1%|          | 87/8300 [22:49<35:52:22, 15.72s/it]

training loss: 0.5635988712310791


training:   1%|          | 88/8300 [23:04<35:51:58, 15.72s/it]

training loss: 0.8672187328338623


training:   1%|          | 89/8300 [23:20<35:51:37, 15.72s/it]

training loss: 0.9038235545158386


training:   1%|          | 90/8300 [23:36<35:51:39, 15.72s/it]

training loss: 0.8799406886100769


training:   1%|          | 91/8300 [23:52<35:51:26, 15.72s/it]

training loss: 0.9376764297485352


training:   1%|          | 92/8300 [24:07<35:51:26, 15.73s/it]

training loss: 0.8912452459335327


training:   1%|          | 93/8300 [24:23<35:50:52, 15.72s/it]

training loss: 0.8551300168037415


training:   1%|          | 94/8300 [24:39<35:50:50, 15.73s/it]

training loss: 1.1049634218215942


training:   1%|          | 95/8300 [24:54<35:50:27, 15.73s/it]

training loss: 0.594721794128418


training:   1%|          | 96/8300 [25:10<35:49:45, 15.72s/it]

training loss: 0.6690248847007751


training:   1%|          | 97/8300 [25:26<35:49:24, 15.72s/it]

training loss: 0.8142314553260803


training:   1%|          | 98/8300 [25:42<35:49:22, 15.72s/it]

training loss: 0.8123964667320251


training:   1%|          | 99/8300 [25:57<35:48:47, 15.72s/it]

training loss: 1.0450741052627563


training:   1%|          | 100/8300 [26:13<35:47:56, 15.72s/it]

training loss: 0.6209537386894226
training loss: 0.8980163335800171


training:   1%|          | 101/8300 [26:30<36:40:10, 16.10s/it]

validation loss: 1.5022865533828735


training:   1%|          | 102/8300 [26:46<36:24:15, 15.99s/it]

training loss: 0.7378277778625488


training:   1%|          | 103/8300 [27:01<36:12:39, 15.90s/it]

training loss: 0.9482948780059814


training:   1%|▏         | 104/8300 [27:17<36:04:42, 15.85s/it]

training loss: 0.9395590424537659


training:   1%|▏         | 105/8300 [27:33<35:58:47, 15.81s/it]

training loss: 0.7347180843353271


training:   1%|▏         | 106/8300 [27:49<35:54:29, 15.78s/it]

training loss: 0.7602542042732239


training:   1%|▏         | 107/8300 [28:04<35:51:38, 15.76s/it]

training loss: 0.7299529314041138


training:   1%|▏         | 108/8300 [28:20<35:49:17, 15.74s/it]

training loss: 0.8557836413383484


training:   1%|▏         | 109/8300 [28:36<35:48:03, 15.73s/it]

training loss: 0.6291079521179199


training:   1%|▏         | 110/8300 [28:51<35:46:51, 15.73s/it]

training loss: 0.4638326168060303


training:   1%|▏         | 111/8300 [29:07<35:45:43, 15.72s/it]

training loss: 0.5695165395736694


training:   1%|▏         | 112/8300 [29:23<35:45:35, 15.72s/it]

training loss: 0.8621046543121338


training:   1%|▏         | 113/8300 [29:39<35:45:48, 15.73s/it]

training loss: 0.8842261433601379


training:   1%|▏         | 114/8300 [29:54<35:45:42, 15.73s/it]

training loss: 0.6032541990280151


training:   1%|▏         | 115/8300 [30:10<35:45:41, 15.73s/it]

training loss: 1.033623456954956


training:   1%|▏         | 116/8300 [30:26<35:44:56, 15.73s/it]

training loss: 1.004426121711731


training:   1%|▏         | 117/8300 [30:42<35:44:56, 15.73s/it]

training loss: 0.9963636994361877


training:   1%|▏         | 118/8300 [30:57<35:44:50, 15.73s/it]

training loss: 0.7138082981109619


training:   1%|▏         | 119/8300 [31:13<35:44:41, 15.73s/it]

training loss: 0.6433459520339966


training:   1%|▏         | 120/8300 [31:29<35:43:40, 15.72s/it]

training loss: 0.8849911093711853


training:   1%|▏         | 121/8300 [31:44<35:43:17, 15.72s/it]

training loss: 0.8204758167266846


training:   1%|▏         | 122/8300 [32:00<35:42:49, 15.72s/it]

training loss: 0.5654773712158203


training:   1%|▏         | 123/8300 [32:16<35:42:06, 15.72s/it]

training loss: 0.5843532085418701


training:   1%|▏         | 124/8300 [32:32<35:41:25, 15.72s/it]

training loss: 1.185461163520813


training:   2%|▏         | 125/8300 [32:47<35:41:09, 15.71s/it]

training loss: 0.8557437062263489


training:   2%|▏         | 126/8300 [33:03<35:40:54, 15.71s/it]

training loss: 0.4096786379814148


training:   2%|▏         | 127/8300 [33:19<35:40:28, 15.71s/it]

training loss: 0.8616188168525696


training:   2%|▏         | 128/8300 [33:34<35:40:14, 15.71s/it]

training loss: 0.6227094531059265


training:   2%|▏         | 129/8300 [33:50<35:39:45, 15.71s/it]

training loss: 0.9657022953033447


training:   2%|▏         | 130/8300 [34:06<35:39:28, 15.71s/it]

training loss: 1.1557400226593018


training:   2%|▏         | 131/8300 [34:22<35:38:48, 15.71s/it]

training loss: 0.43184950947761536


training:   2%|▏         | 132/8300 [34:37<35:38:34, 15.71s/it]

training loss: 1.053123950958252


training:   2%|▏         | 133/8300 [34:53<35:38:30, 15.71s/it]

training loss: 0.9462933540344238


training:   2%|▏         | 134/8300 [35:09<35:38:12, 15.71s/it]

training loss: 0.5601442456245422


training:   2%|▏         | 135/8300 [35:24<35:37:56, 15.71s/it]

training loss: 1.081283688545227


training:   2%|▏         | 136/8300 [35:40<35:37:57, 15.71s/it]

training loss: 0.7157155871391296


training:   2%|▏         | 137/8300 [35:56<35:37:48, 15.71s/it]

training loss: 0.8138782978057861


training:   2%|▏         | 138/8300 [36:12<35:37:23, 15.71s/it]

training loss: 0.8870688676834106


training:   2%|▏         | 139/8300 [36:27<35:36:46, 15.71s/it]

training loss: 0.8815799951553345


training:   2%|▏         | 140/8300 [36:43<35:36:28, 15.71s/it]

training loss: 0.3354714512825012


training:   2%|▏         | 141/8300 [36:59<35:36:28, 15.71s/it]

training loss: 0.9077674150466919


training:   2%|▏         | 142/8300 [37:14<35:36:29, 15.71s/it]

training loss: 0.612396776676178


training:   2%|▏         | 143/8300 [37:30<35:36:08, 15.71s/it]

training loss: 1.103859782218933


training:   2%|▏         | 144/8300 [37:46<35:35:41, 15.71s/it]

training loss: 0.7388031482696533


training:   2%|▏         | 145/8300 [38:02<35:35:07, 15.71s/it]

training loss: 0.828839898109436


training:   2%|▏         | 146/8300 [38:17<35:34:48, 15.71s/it]

training loss: 0.9826757311820984


training:   2%|▏         | 147/8300 [38:33<35:34:23, 15.71s/it]

training loss: 0.8677147030830383


training:   2%|▏         | 148/8300 [38:49<35:34:18, 15.71s/it]

training loss: 1.108553409576416


training:   2%|▏         | 149/8300 [39:04<35:34:21, 15.71s/it]

training loss: 0.8364342451095581


training:   2%|▏         | 150/8300 [39:20<35:33:41, 15.71s/it]

training loss: 0.9124817848205566


training:   2%|▏         | 151/8300 [39:36<35:33:35, 15.71s/it]

training loss: 1.0341438055038452


training:   2%|▏         | 152/8300 [39:51<35:33:48, 15.71s/it]

training loss: 0.7667931318283081


training:   2%|▏         | 153/8300 [40:07<35:33:36, 15.71s/it]

training loss: 0.2636316120624542


training:   2%|▏         | 154/8300 [40:23<35:33:20, 15.71s/it]

training loss: 0.822865903377533


training:   2%|▏         | 155/8300 [40:39<35:32:54, 15.71s/it]

training loss: 0.9698744416236877


training:   2%|▏         | 156/8300 [40:54<35:32:56, 15.71s/it]

training loss: 0.6980571746826172


training:   2%|▏         | 157/8300 [41:10<35:32:39, 15.71s/it]

training loss: 0.9065432548522949


training:   2%|▏         | 158/8300 [41:26<35:32:22, 15.71s/it]

training loss: 0.9446389675140381


training:   2%|▏         | 159/8300 [41:41<35:32:24, 15.72s/it]

training loss: 0.8424679040908813


training:   2%|▏         | 160/8300 [41:57<35:31:43, 15.71s/it]

training loss: 0.9463882446289062


training:   2%|▏         | 161/8300 [42:13<35:31:22, 15.71s/it]

training loss: 0.3498489558696747


training:   2%|▏         | 162/8300 [42:29<35:31:00, 15.71s/it]

training loss: 0.910919725894928


training:   2%|▏         | 163/8300 [42:44<35:30:42, 15.71s/it]

training loss: 0.5704671144485474


training:   2%|▏         | 164/8300 [43:00<35:34:39, 15.74s/it]

training loss: 0.8375095725059509


training:   2%|▏         | 165/8300 [43:16<35:33:22, 15.73s/it]

training loss: 0.8850129246711731


training:   2%|▏         | 166/8300 [43:32<35:32:09, 15.73s/it]

training loss: 0.874906063079834


training:   2%|▏         | 167/8300 [43:47<35:31:14, 15.72s/it]

training loss: 0.5911039113998413


training:   2%|▏         | 168/8300 [44:03<35:30:22, 15.72s/it]

training loss: 0.5824595093727112


training:   2%|▏         | 169/8300 [44:19<35:29:30, 15.71s/it]

training loss: 0.6665460467338562


training:   2%|▏         | 170/8300 [44:34<35:28:44, 15.71s/it]

training loss: 0.6267786622047424


training:   2%|▏         | 171/8300 [44:50<35:28:14, 15.71s/it]

training loss: 0.6789223551750183


training:   2%|▏         | 172/8300 [45:06<35:28:02, 15.71s/it]

training loss: 0.6340318322181702


training:   2%|▏         | 173/8300 [45:22<35:27:41, 15.71s/it]

training loss: 0.9227337837219238


training:   2%|▏         | 174/8300 [45:37<35:27:32, 15.71s/it]

training loss: 0.7510270476341248


training:   2%|▏         | 175/8300 [45:53<35:27:25, 15.71s/it]

training loss: 0.6092850565910339


training:   2%|▏         | 176/8300 [46:09<35:26:57, 15.71s/it]

training loss: 0.885021984577179


training:   2%|▏         | 177/8300 [46:24<35:26:29, 15.71s/it]

training loss: 0.6793492436408997


training:   2%|▏         | 178/8300 [46:40<35:26:17, 15.71s/it]

training loss: 0.9396175146102905


training:   2%|▏         | 179/8300 [46:56<35:26:04, 15.71s/it]

training loss: 0.821271538734436


training:   2%|▏         | 180/8300 [47:11<35:25:37, 15.71s/it]

training loss: 0.9834834933280945


training:   2%|▏         | 181/8300 [47:27<35:25:25, 15.71s/it]

training loss: 0.43861550092697144


training:   2%|▏         | 182/8300 [47:43<35:25:23, 15.71s/it]

training loss: 0.593093752861023


training:   2%|▏         | 183/8300 [47:59<35:25:17, 15.71s/it]

training loss: 1.0677735805511475


training:   2%|▏         | 184/8300 [48:14<35:24:45, 15.71s/it]

training loss: 0.694446325302124


training:   2%|▏         | 185/8300 [48:30<35:24:13, 15.71s/it]

training loss: 1.0073506832122803


training:   2%|▏         | 186/8300 [48:46<35:23:59, 15.71s/it]

training loss: 0.91812664270401


training:   2%|▏         | 187/8300 [49:01<35:23:57, 15.71s/it]

training loss: 0.6207252144813538


training:   2%|▏         | 188/8300 [49:17<35:23:48, 15.71s/it]

training loss: 0.8498258590698242


training:   2%|▏         | 189/8300 [49:33<35:23:28, 15.71s/it]

training loss: 1.1773035526275635


training:   2%|▏         | 190/8300 [49:49<35:23:20, 15.71s/it]

training loss: 0.7788406610488892


training:   2%|▏         | 191/8300 [50:04<35:23:11, 15.71s/it]

training loss: 0.7681586742401123


training:   2%|▏         | 192/8300 [50:20<35:22:53, 15.71s/it]

training loss: 0.7993062734603882


training:   2%|▏         | 193/8300 [50:36<35:22:25, 15.71s/it]

training loss: 0.5741716623306274


training:   2%|▏         | 194/8300 [50:51<35:21:49, 15.71s/it]

training loss: 0.8892903327941895


training:   2%|▏         | 195/8300 [51:07<35:21:21, 15.70s/it]

training loss: 1.3831465244293213


training:   2%|▏         | 196/8300 [51:23<35:21:21, 15.71s/it]

training loss: 0.39372381567955017


training:   2%|▏         | 197/8300 [51:39<35:21:03, 15.71s/it]

training loss: 1.1991409063339233


training:   2%|▏         | 198/8300 [51:54<35:20:54, 15.71s/it]

training loss: 0.8500961065292358


training:   2%|▏         | 199/8300 [52:10<35:20:54, 15.71s/it]

training loss: 0.9080965518951416


training:   2%|▏         | 200/8300 [52:26<35:20:21, 15.71s/it]

training loss: 0.8550552129745483
training loss: 1.2090572118759155


training:   2%|▏         | 201/8300 [52:43<36:11:40, 16.09s/it]

validation loss: 1.5164642333984375


training:   2%|▏         | 202/8300 [52:58<35:56:44, 15.98s/it]

training loss: 0.9421722888946533


training:   2%|▏         | 203/8300 [53:14<35:45:25, 15.90s/it]

training loss: 0.7616108059883118


training:   2%|▏         | 204/8300 [53:30<35:37:26, 15.84s/it]

training loss: 0.46173393726348877


training:   2%|▏         | 205/8300 [53:45<35:31:57, 15.80s/it]

training loss: 0.9669522047042847


training:   2%|▏         | 206/8300 [54:01<35:28:09, 15.78s/it]

training loss: 0.8962777256965637


training:   2%|▏         | 207/8300 [54:17<35:25:13, 15.76s/it]

training loss: 0.9002383947372437


training:   3%|▎         | 208/8300 [54:33<35:22:55, 15.74s/it]

training loss: 0.7617343068122864


training:   3%|▎         | 209/8300 [54:48<35:21:22, 15.73s/it]

training loss: 0.8541524410247803


training:   3%|▎         | 210/8300 [55:04<35:20:42, 15.73s/it]

training loss: 0.8811604976654053


training:   3%|▎         | 211/8300 [55:20<35:19:46, 15.72s/it]

training loss: 0.5328391790390015


training:   3%|▎         | 212/8300 [55:35<35:19:03, 15.72s/it]

training loss: 0.5597106218338013


training:   3%|▎         | 213/8300 [55:51<35:18:50, 15.72s/it]

training loss: 0.6313471794128418


training:   3%|▎         | 214/8300 [56:07<35:18:15, 15.72s/it]

training loss: 0.8135298490524292


training:   3%|▎         | 215/8300 [56:23<35:17:46, 15.72s/it]

training loss: 0.7035989165306091


training:   3%|▎         | 216/8300 [56:38<35:17:10, 15.71s/it]

training loss: 0.554455041885376


training:   3%|▎         | 217/8300 [56:54<35:16:39, 15.71s/it]

training loss: 0.859707772731781


training:   3%|▎         | 218/8300 [57:10<35:16:31, 15.71s/it]

training loss: 0.7854492664337158


training:   3%|▎         | 219/8300 [57:25<35:16:19, 15.71s/it]

training loss: 0.8819106817245483


training:   3%|▎         | 220/8300 [57:41<35:16:13, 15.71s/it]

training loss: 0.6793104410171509


training:   3%|▎         | 221/8300 [57:57<35:15:50, 15.71s/it]

training loss: 0.5909790396690369


training:   3%|▎         | 222/8300 [58:13<35:15:33, 15.71s/it]

training loss: 1.1306419372558594


training:   3%|▎         | 223/8300 [58:28<35:15:01, 15.71s/it]

training loss: 0.7004112005233765


training:   3%|▎         | 224/8300 [58:44<35:14:55, 15.71s/it]

training loss: 0.8499839901924133


training:   3%|▎         | 225/8300 [59:00<35:14:44, 15.71s/it]

training loss: 0.8864858746528625


training:   3%|▎         | 226/8300 [59:15<35:14:32, 15.71s/it]

training loss: 0.8548098206520081


training:   3%|▎         | 227/8300 [59:31<35:13:54, 15.71s/it]

training loss: 0.6842716932296753


training:   3%|▎         | 228/8300 [59:47<35:13:38, 15.71s/it]

training loss: 1.075819492340088


training:   3%|▎         | 229/8300 [1:00:03<35:14:35, 15.72s/it]

training loss: 0.6084113717079163


training:   3%|▎         | 230/8300 [1:00:18<35:14:46, 15.72s/it]

training loss: 0.4938189685344696


training:   3%|▎         | 231/8300 [1:00:34<35:14:42, 15.72s/it]

training loss: 0.842510998249054


training:   3%|▎         | 232/8300 [1:00:50<35:14:39, 15.73s/it]

training loss: 1.0215981006622314


training:   3%|▎         | 233/8300 [1:01:06<35:14:28, 15.73s/it]

training loss: 0.685804009437561


training:   3%|▎         | 234/8300 [1:01:21<35:14:25, 15.73s/it]

training loss: 0.45926862955093384


training:   3%|▎         | 235/8300 [1:01:37<35:14:15, 15.73s/it]

training loss: 0.4336090385913849


training:   3%|▎         | 236/8300 [1:01:53<35:14:09, 15.73s/it]

training loss: 0.3316363990306854


training:   3%|▎         | 237/8300 [1:02:08<35:13:14, 15.73s/it]

training loss: 0.8485552668571472


training:   3%|▎         | 238/8300 [1:02:24<35:12:02, 15.72s/it]

training loss: 0.8981664776802063


training:   3%|▎         | 239/8300 [1:02:40<35:11:04, 15.71s/it]

training loss: 0.9006552696228027


training:   3%|▎         | 240/8300 [1:02:56<35:10:15, 15.71s/it]

training loss: 0.8583605289459229


training:   3%|▎         | 241/8300 [1:03:11<35:09:49, 15.71s/it]

training loss: 1.0107288360595703


training:   3%|▎         | 242/8300 [1:03:27<35:09:07, 15.70s/it]

training loss: 0.437996506690979


training:   3%|▎         | 243/8300 [1:03:43<35:08:28, 15.70s/it]

training loss: 0.5444987416267395


training:   3%|▎         | 244/8300 [1:03:58<35:08:09, 15.70s/it]

training loss: 0.6122779846191406


training:   3%|▎         | 245/8300 [1:04:14<35:07:55, 15.70s/it]

training loss: 0.805488646030426


training:   3%|▎         | 246/8300 [1:04:30<35:07:27, 15.70s/it]

training loss: 1.1059768199920654


training:   3%|▎         | 247/8300 [1:04:45<35:07:09, 15.70s/it]

training loss: 0.8862074017524719


training:   3%|▎         | 248/8300 [1:05:01<35:06:49, 15.70s/it]

training loss: 0.7406718730926514


training:   3%|▎         | 249/8300 [1:05:17<35:06:57, 15.70s/it]

training loss: 0.7287371754646301


training:   3%|▎         | 250/8300 [1:05:33<35:06:38, 15.70s/it]

training loss: 0.7817270755767822


training:   3%|▎         | 251/8300 [1:05:48<35:06:21, 15.70s/it]

training loss: 0.4842916429042816


training:   3%|▎         | 252/8300 [1:06:04<35:06:21, 15.70s/it]

training loss: 1.0983808040618896


training:   3%|▎         | 253/8300 [1:06:20<35:05:54, 15.70s/it]

training loss: 1.1708860397338867


training:   3%|▎         | 254/8300 [1:06:35<35:05:23, 15.70s/it]

training loss: 0.9769850969314575


training:   3%|▎         | 255/8300 [1:06:51<35:05:08, 15.70s/it]

training loss: 0.9360462427139282


training:   3%|▎         | 256/8300 [1:07:07<35:04:37, 15.70s/it]

training loss: 0.7859474420547485


training:   3%|▎         | 257/8300 [1:07:22<35:04:27, 15.70s/it]

training loss: 0.31616953015327454


training:   3%|▎         | 258/8300 [1:07:38<35:04:08, 15.70s/it]

training loss: 0.6387068629264832


training:   3%|▎         | 259/8300 [1:07:54<35:04:02, 15.70s/it]

training loss: 0.8005681037902832


training:   3%|▎         | 260/8300 [1:08:10<35:03:56, 15.70s/it]

training loss: 0.8483447432518005


training:   3%|▎         | 261/8300 [1:08:25<35:03:36, 15.70s/it]

training loss: 1.3928890228271484


training:   3%|▎         | 262/8300 [1:08:41<35:03:19, 15.70s/it]

training loss: 0.8043291568756104


training:   3%|▎         | 263/8300 [1:08:57<35:02:55, 15.70s/it]

training loss: 0.7767072319984436


training:   3%|▎         | 264/8300 [1:09:12<35:02:42, 15.70s/it]

training loss: 0.6621623635292053


training:   3%|▎         | 265/8300 [1:09:28<35:02:24, 15.70s/it]

training loss: 0.6981132626533508


training:   3%|▎         | 266/8300 [1:09:44<35:02:12, 15.70s/it]

training loss: 0.8681368827819824


training:   3%|▎         | 267/8300 [1:09:59<35:01:46, 15.70s/it]

training loss: 0.5969119668006897


training:   3%|▎         | 268/8300 [1:10:15<35:01:33, 15.70s/it]

training loss: 0.7931405901908875


training:   3%|▎         | 269/8300 [1:10:31<35:01:14, 15.70s/it]

training loss: 0.959831953048706


training:   3%|▎         | 270/8300 [1:10:47<35:00:53, 15.70s/it]

training loss: 0.8587437868118286


training:   3%|▎         | 271/8300 [1:11:02<35:00:33, 15.70s/it]

training loss: 0.7451072335243225


training:   3%|▎         | 272/8300 [1:11:18<35:00:38, 15.70s/it]

training loss: 0.9057670831680298


training:   3%|▎         | 273/8300 [1:11:34<35:00:10, 15.70s/it]

training loss: 0.592373788356781


training:   3%|▎         | 274/8300 [1:11:49<34:59:46, 15.70s/it]

training loss: 0.8514890670776367


training:   3%|▎         | 275/8300 [1:12:05<34:59:36, 15.70s/it]

training loss: 0.9002567529678345


training:   3%|▎         | 276/8300 [1:12:21<34:59:24, 15.70s/it]

training loss: 0.7727985382080078


training:   3%|▎         | 277/8300 [1:12:36<34:59:15, 15.70s/it]

training loss: 0.8024770617485046


training:   3%|▎         | 278/8300 [1:12:52<34:58:50, 15.70s/it]

training loss: 0.7098200917243958


training:   3%|▎         | 279/8300 [1:13:08<34:58:28, 15.70s/it]

training loss: 0.7558266520500183


training:   3%|▎         | 280/8300 [1:13:23<34:58:34, 15.70s/it]

training loss: 1.0092930793762207


training:   3%|▎         | 281/8300 [1:13:39<34:58:28, 15.70s/it]

training loss: 0.7948055267333984


training:   3%|▎         | 282/8300 [1:13:55<34:58:20, 15.70s/it]

training loss: 0.9643129110336304


training:   3%|▎         | 283/8300 [1:14:11<34:57:48, 15.70s/it]

training loss: 0.9451292753219604


training:   3%|▎         | 284/8300 [1:14:26<34:57:26, 15.70s/it]

training loss: 0.7546929121017456


training:   3%|▎         | 285/8300 [1:14:42<34:57:08, 15.70s/it]

training loss: 0.8855746388435364


training:   3%|▎         | 286/8300 [1:14:58<34:56:53, 15.70s/it]

training loss: 0.8957440853118896


training:   3%|▎         | 287/8300 [1:15:13<34:56:45, 15.70s/it]

training loss: 0.9061127305030823


training:   3%|▎         | 288/8300 [1:15:29<34:56:05, 15.70s/it]

training loss: 1.0655696392059326


training:   3%|▎         | 289/8300 [1:15:45<34:55:54, 15.70s/it]

training loss: 1.2179150581359863


training:   3%|▎         | 290/8300 [1:16:00<34:55:42, 15.70s/it]

training loss: 0.9422067403793335


training:   4%|▎         | 291/8300 [1:16:16<34:56:34, 15.71s/it]

training loss: 0.647497296333313


training:   4%|▎         | 292/8300 [1:16:32<34:55:55, 15.70s/it]

training loss: 0.7903118133544922


training:   4%|▎         | 293/8300 [1:16:48<34:55:54, 15.71s/it]

training loss: 0.8599121570587158


training:   4%|▎         | 294/8300 [1:17:03<34:55:37, 15.71s/it]

training loss: 0.4932776093482971


training:   4%|▎         | 295/8300 [1:17:19<34:55:38, 15.71s/it]

training loss: 0.8135281801223755


training:   4%|▎         | 296/8300 [1:17:35<34:55:08, 15.71s/it]

training loss: 0.872758150100708


training:   4%|▎         | 297/8300 [1:17:50<34:54:19, 15.70s/it]

training loss: 0.5079907774925232


training:   4%|▎         | 298/8300 [1:18:06<34:54:07, 15.70s/it]

training loss: 0.6560518145561218


training:   4%|▎         | 299/8300 [1:18:22<34:53:33, 15.70s/it]

training loss: 0.7518842816352844


training:   4%|▎         | 300/8300 [1:18:38<34:53:31, 15.70s/it]

training loss: 0.7652415037155151
training loss: 0.7024452686309814



generating:   0%|          | 0/512 [00:00<?, ?it/s][A

validation loss: 1.505059003829956
okonca, ze by bol takymto stretnutim
pocteny.Vodici, ktori od februara do konca marca 2016 zaplatili pokutu za
prekrocenie rychlosti obmedzenej na 50 km/h, dostavaju od magistratu
pisomne vysvetlenie.
Bolo zistene, ze pri merani automatizovanym technickym prostriedkom
pouzivanym bez obsluhy pri dohladu na bezpecnost premavky na pozemnych
komunikaciach v tunelovom komplexe Blanka, a to v obdobi od 8. februara
2016 do 31. marca 2016, sa vyskytli technicke problemy, ktore mohli mat
vplyv na presnost urcenia vysledku merania rychlosti vozidla
prechadzajuceho tymto usekom, pise sa v listoch.
Magistrat pokuty za nespravne identifikovane prekrocenie najvyssej
dovolenej rychlosti stornoval. Podla hovorcu magistratu Vita Hofmana islo
o 3 796 pripadov.
Magistrat uz zmenil dodavatela aj prevadzkovatela systemu merania.Vyhlasenie lekarky bolo sucastou aktualizovanych informacii
o zdravotnom stave 68-rocnej politicky. Stalo sa tak po tom, co
v uplyn


generating:   0%|          | 1/512 [00:00<01:59,  4.27it/s][A
generating:   0%|          | 2/512 [00:00<02:00,  4.25it/s][A
generating:   1%|          | 3/512 [00:00<01:59,  4.26it/s][A
generating:   1%|          | 4/512 [00:00<01:59,  4.25it/s][A
generating:   1%|          | 5/512 [00:01<01:59,  4.25it/s][A
generating:   1%|          | 6/512 [00:01<01:58,  4.27it/s][A
generating:   1%|▏         | 7/512 [00:01<01:57,  4.29it/s][A
generating:   2%|▏         | 8/512 [00:01<01:56,  4.31it/s][A
generating:   2%|▏         | 9/512 [00:02<01:56,  4.31it/s][A
generating:   2%|▏         | 10/512 [00:02<01:56,  4.30it/s][A
generating:   2%|▏         | 11/512 [00:02<01:57,  4.28it/s][A
generating:   2%|▏         | 12/512 [00:02<01:56,  4.30it/s][A
generating:   3%|▎         | 13/512 [00:03<01:56,  4.27it/s][A
generating:   3%|▎         | 14/512 [00:03<01:57,  4.25it/s][A
generating:   3%|▎         | 15/512 [00:03<01:57,  4.23it/s][A
generating:   3%|▎         | 16/512 [00:03<01:57

rojekty R2 CESR
Rusi ziskusan ulohy pritom ukonceni sucasnej unie dosiahla podla sefa Kremisonavit s Kazerskom, pretoze
znamenava, ktory sa rozprava na urovni minuly ministrom zisku rozvinu lepsieho
programu a otazky ci na svoju cenu odmieta. Keby a podla nej mocele pre
kolektivnej opolitickych statov vlady na tejto bona bol velmi
nemusi pomer domaceho kroku.
Stalinoveho programu Slovenskej vyzvov. Karolina okolo
31,5 miliona eur pre uprave velmi rozsireniu zaregistrovany obsahuje predovsetkym
emokr


training:   4%|▎         | 302/8300 [1:21:11<91:40:08, 41.26s/it] 

training loss: 0.950536847114563


training:   4%|▎         | 303/8300 [1:21:26<74:37:41, 33.60s/it]

training loss: 0.7626466751098633


training:   4%|▎         | 304/8300 [1:21:42<62:41:34, 28.23s/it]

training loss: 0.903112530708313


training:   4%|▎         | 305/8300 [1:21:58<54:20:31, 24.47s/it]

training loss: 0.7152228355407715


training:   4%|▎         | 306/8300 [1:22:13<48:29:53, 21.84s/it]

training loss: 0.6150272488594055


training:   4%|▎         | 307/8300 [1:22:29<44:24:13, 20.00s/it]

training loss: 0.8875287175178528


training:   4%|▎         | 308/8300 [1:22:45<41:32:11, 18.71s/it]

training loss: 0.7147281765937805


training:   4%|▎         | 309/8300 [1:23:01<39:31:41, 17.81s/it]

training loss: 0.8467770218849182


training:   4%|▎         | 310/8300 [1:23:16<38:07:16, 17.18s/it]

training loss: 0.6882596611976624


training:   4%|▎         | 311/8300 [1:23:32<37:07:56, 16.73s/it]

training loss: 0.6798403859138489


training:   4%|▍         | 312/8300 [1:23:48<36:26:43, 16.43s/it]

training loss: 0.9737892150878906


training:   4%|▍         | 313/8300 [1:24:03<35:57:46, 16.21s/it]

training loss: 0.6271206736564636


training:   4%|▍         | 314/8300 [1:24:19<35:37:25, 16.06s/it]

training loss: 0.4367903470993042


training:   4%|▍         | 315/8300 [1:24:35<35:23:08, 15.95s/it]

training loss: 0.5668256282806396


training:   4%|▍         | 316/8300 [1:24:51<35:13:19, 15.88s/it]

training loss: 0.5005013942718506


training:   4%|▍         | 317/8300 [1:25:06<35:06:25, 15.83s/it]

training loss: 0.8414378762245178


training:   4%|▍         | 318/8300 [1:25:22<35:01:35, 15.80s/it]

training loss: 0.8843469023704529


training:   4%|▍         | 319/8300 [1:25:38<34:58:09, 15.77s/it]

training loss: 0.7654115557670593


training:   4%|▍         | 320/8300 [1:25:53<34:55:16, 15.75s/it]

training loss: 1.0309834480285645


training:   4%|▍         | 321/8300 [1:26:09<34:53:20, 15.74s/it]

training loss: 0.9506800770759583


training:   4%|▍         | 322/8300 [1:26:25<34:51:51, 15.73s/it]

training loss: 0.8704672455787659


training:   4%|▍         | 323/8300 [1:26:41<34:50:49, 15.73s/it]

training loss: 0.46068117022514343


training:   4%|▍         | 324/8300 [1:26:56<34:50:09, 15.72s/it]

training loss: 0.7125493288040161


training:   4%|▍         | 325/8300 [1:27:12<34:49:27, 15.72s/it]

training loss: 0.9256832003593445


training:   4%|▍         | 326/8300 [1:27:28<34:48:43, 15.72s/it]

training loss: 0.8843719363212585


training:   4%|▍         | 327/8300 [1:27:43<34:48:31, 15.72s/it]

training loss: 0.972518801689148


training:   4%|▍         | 328/8300 [1:27:59<34:48:18, 15.72s/it]

training loss: 0.8793512582778931


training:   4%|▍         | 329/8300 [1:28:15<34:47:40, 15.71s/it]

training loss: 0.9210268259048462


training:   4%|▍         | 330/8300 [1:28:30<34:47:17, 15.71s/it]

training loss: 0.8919522762298584


training:   4%|▍         | 331/8300 [1:28:46<34:46:49, 15.71s/it]

training loss: 0.3547253906726837


training:   4%|▍         | 332/8300 [1:29:02<34:46:41, 15.71s/it]

training loss: 0.47609177231788635


training:   4%|▍         | 333/8300 [1:29:18<34:46:34, 15.71s/it]

training loss: 0.4812106490135193


training:   4%|▍         | 334/8300 [1:29:33<34:46:19, 15.71s/it]

training loss: 0.46555304527282715


training:   4%|▍         | 335/8300 [1:29:49<34:45:45, 15.71s/it]

training loss: 0.9636221528053284


training:   4%|▍         | 336/8300 [1:30:05<34:45:10, 15.71s/it]

training loss: 0.9414942860603333


training:   4%|▍         | 337/8300 [1:30:20<34:44:52, 15.71s/it]

training loss: 0.8802095651626587


training:   4%|▍         | 338/8300 [1:30:36<34:44:58, 15.71s/it]

training loss: 0.8692494034767151


training:   4%|▍         | 339/8300 [1:30:52<34:44:34, 15.71s/it]

training loss: 0.3611365854740143


training:   4%|▍         | 340/8300 [1:31:08<34:44:14, 15.71s/it]

training loss: 0.6315792202949524


training:   4%|▍         | 341/8300 [1:31:23<34:44:23, 15.71s/it]

training loss: 0.979721188545227


training:   4%|▍         | 342/8300 [1:31:39<34:43:48, 15.71s/it]

training loss: 0.9393699169158936


training:   4%|▍         | 343/8300 [1:31:55<34:43:27, 15.71s/it]

training loss: 0.740157961845398


training:   4%|▍         | 344/8300 [1:32:10<34:43:16, 15.71s/it]

training loss: 1.0923149585723877


training:   4%|▍         | 345/8300 [1:32:26<34:42:38, 15.71s/it]

training loss: 0.88334721326828


training:   4%|▍         | 346/8300 [1:32:42<34:42:16, 15.71s/it]

training loss: 0.7741389274597168


training:   4%|▍         | 347/8300 [1:32:58<34:41:49, 15.71s/it]

training loss: 0.9630725383758545


training:   4%|▍         | 348/8300 [1:33:13<34:41:33, 15.71s/it]

training loss: 0.7788257002830505


training:   4%|▍         | 349/8300 [1:33:29<34:41:20, 15.71s/it]

training loss: 0.7879199981689453


training:   4%|▍         | 350/8300 [1:33:45<34:41:03, 15.71s/it]

training loss: 0.548690915107727


training:   4%|▍         | 351/8300 [1:34:00<34:40:53, 15.71s/it]

training loss: 0.5265634059906006


training:   4%|▍         | 352/8300 [1:34:16<34:41:03, 15.71s/it]

training loss: 0.7938710451126099


training:   4%|▍         | 353/8300 [1:34:32<34:40:23, 15.71s/it]

training loss: 0.15118126571178436


training:   4%|▍         | 354/8300 [1:34:48<34:39:56, 15.71s/it]

training loss: 0.6002213358879089


training:   4%|▍         | 355/8300 [1:35:03<34:39:50, 15.71s/it]

training loss: 0.8853843808174133


training:   4%|▍         | 356/8300 [1:35:19<34:39:35, 15.71s/it]

training loss: 0.66493821144104


training:   4%|▍         | 357/8300 [1:35:35<34:39:12, 15.71s/it]

training loss: 0.8053900599479675


training:   4%|▍         | 358/8300 [1:35:50<34:38:52, 15.71s/it]

training loss: 0.7599908113479614


training:   4%|▍         | 359/8300 [1:36:06<34:38:42, 15.71s/it]

training loss: 0.7788062691688538


training:   4%|▍         | 360/8300 [1:36:22<34:38:09, 15.70s/it]

training loss: 0.5709474086761475


training:   4%|▍         | 361/8300 [1:36:37<34:37:49, 15.70s/it]

training loss: 0.719133734703064


training:   4%|▍         | 362/8300 [1:36:53<34:37:10, 15.70s/it]

training loss: 0.8443097472190857


training:   4%|▍         | 363/8300 [1:37:09<34:36:48, 15.70s/it]

training loss: 0.7250791192054749


training:   4%|▍         | 364/8300 [1:37:25<34:36:17, 15.70s/it]

training loss: 1.0050421953201294


training:   4%|▍         | 365/8300 [1:37:40<34:36:12, 15.70s/it]

training loss: 0.7694593071937561


training:   4%|▍         | 366/8300 [1:37:56<34:36:04, 15.70s/it]

training loss: 0.8401050567626953


training:   4%|▍         | 367/8300 [1:38:12<34:35:34, 15.70s/it]

training loss: 0.8237268924713135


training:   4%|▍         | 368/8300 [1:38:27<34:35:00, 15.70s/it]

training loss: 0.8471243977546692


training:   4%|▍         | 369/8300 [1:38:43<34:34:47, 15.70s/it]

training loss: 0.9469144940376282


training:   4%|▍         | 370/8300 [1:38:59<34:34:26, 15.70s/it]

training loss: 0.7765670418739319


training:   4%|▍         | 371/8300 [1:39:14<34:34:23, 15.70s/it]

training loss: 0.9039227366447449


training:   4%|▍         | 372/8300 [1:39:30<34:34:26, 15.70s/it]

training loss: 0.4957300126552582


training:   4%|▍         | 373/8300 [1:39:46<34:34:04, 15.70s/it]

training loss: 0.8150646090507507


training:   5%|▍         | 374/8300 [1:40:02<34:33:47, 15.70s/it]

training loss: 1.0857996940612793


training:   5%|▍         | 375/8300 [1:40:17<34:33:23, 15.70s/it]

training loss: 0.7992272973060608


training:   5%|▍         | 376/8300 [1:40:33<34:33:08, 15.70s/it]

training loss: 0.2978670001029968


training:   5%|▍         | 377/8300 [1:40:49<34:32:55, 15.70s/it]

training loss: 0.7984341979026794


training:   5%|▍         | 378/8300 [1:41:04<34:32:45, 15.70s/it]

training loss: 0.6595163345336914


training:   5%|▍         | 379/8300 [1:41:20<34:32:29, 15.70s/it]

training loss: 0.9789591431617737


training:   5%|▍         | 380/8300 [1:41:36<34:32:26, 15.70s/it]

training loss: 0.7264145612716675


training:   5%|▍         | 381/8300 [1:41:51<34:32:00, 15.70s/it]

training loss: 0.6310174465179443


training:   5%|▍         | 382/8300 [1:42:07<34:32:00, 15.70s/it]

training loss: 0.7589550018310547


training:   5%|▍         | 383/8300 [1:42:23<34:31:33, 15.70s/it]

training loss: 0.6585452556610107


training:   5%|▍         | 384/8300 [1:42:39<34:31:38, 15.70s/it]

training loss: 0.7497791647911072


training:   5%|▍         | 385/8300 [1:42:54<34:31:25, 15.70s/it]

training loss: 0.5958964824676514


training:   5%|▍         | 386/8300 [1:43:10<34:30:57, 15.70s/it]

training loss: 0.88697749376297


training:   5%|▍         | 387/8300 [1:43:26<34:30:43, 15.70s/it]

training loss: 0.9136306047439575


training:   5%|▍         | 388/8300 [1:43:41<34:30:33, 15.70s/it]

training loss: 1.0952374935150146


training:   5%|▍         | 389/8300 [1:43:57<34:30:13, 15.70s/it]

training loss: 0.8571276068687439


training:   5%|▍         | 390/8300 [1:44:13<34:29:52, 15.70s/it]

training loss: 0.8287917375564575


training:   5%|▍         | 391/8300 [1:44:28<34:29:37, 15.70s/it]

training loss: 1.1859872341156006


training:   5%|▍         | 392/8300 [1:44:44<34:29:43, 15.70s/it]

training loss: 0.6927528977394104


training:   5%|▍         | 393/8300 [1:45:00<34:29:16, 15.70s/it]

training loss: 0.7207473516464233


training:   5%|▍         | 394/8300 [1:45:16<34:28:52, 15.70s/it]

training loss: 0.7149381041526794


training:   5%|▍         | 395/8300 [1:45:31<34:28:36, 15.70s/it]

training loss: 1.0081815719604492


training:   5%|▍         | 396/8300 [1:45:47<34:28:26, 15.70s/it]

training loss: 0.9812570810317993


training:   5%|▍         | 397/8300 [1:46:03<34:28:11, 15.70s/it]

training loss: 0.5878923535346985


training:   5%|▍         | 398/8300 [1:46:18<34:27:41, 15.70s/it]

training loss: 0.6495197415351868


training:   5%|▍         | 399/8300 [1:46:34<34:27:12, 15.70s/it]

training loss: 0.7279319167137146


training:   5%|▍         | 400/8300 [1:46:50<34:30:49, 15.73s/it]

training loss: 0.6787500381469727
training loss: 0.8436026573181152


training:   5%|▍         | 401/8300 [1:47:07<35:19:35, 16.10s/it]

validation loss: 1.592887043952942


training:   5%|▍         | 402/8300 [1:47:22<35:03:54, 15.98s/it]

training loss: 1.0571181774139404


training:   5%|▍         | 403/8300 [1:47:38<34:52:36, 15.90s/it]

training loss: 0.6205961108207703


training:   5%|▍         | 404/8300 [1:47:54<34:44:30, 15.84s/it]

training loss: 0.7814300656318665


training:   5%|▍         | 405/8300 [1:48:10<34:38:44, 15.80s/it]

training loss: 0.3246244490146637


training:   5%|▍         | 406/8300 [1:48:25<34:34:27, 15.77s/it]

training loss: 0.7817984819412231


training:   5%|▍         | 407/8300 [1:48:41<34:31:26, 15.75s/it]

training loss: 1.0523455142974854


training:   5%|▍         | 408/8300 [1:48:57<34:29:18, 15.73s/it]

training loss: 0.9564831256866455


training:   5%|▍         | 409/8300 [1:49:12<34:27:56, 15.72s/it]

training loss: 0.6390331983566284


training:   5%|▍         | 410/8300 [1:49:28<34:26:43, 15.72s/it]

training loss: 0.8799302577972412


training:   5%|▍         | 411/8300 [1:49:44<34:26:14, 15.71s/it]

training loss: 0.7316021919250488


training:   5%|▍         | 412/8300 [1:50:00<34:25:28, 15.71s/it]

training loss: 0.7473230361938477


training:   5%|▍         | 413/8300 [1:50:15<34:25:01, 15.71s/it]

training loss: 1.092909574508667


training:   5%|▍         | 414/8300 [1:50:31<34:24:09, 15.70s/it]

training loss: 0.6189516186714172


training:   5%|▌         | 415/8300 [1:50:47<34:23:44, 15.70s/it]

training loss: 1.2772679328918457


training:   5%|▌         | 416/8300 [1:51:02<34:23:22, 15.70s/it]

training loss: 1.1244909763336182


training:   5%|▌         | 417/8300 [1:51:18<34:23:01, 15.70s/it]

training loss: 0.760075569152832


training:   5%|▌         | 418/8300 [1:51:34<34:22:46, 15.70s/it]

training loss: 0.7612721920013428


training:   5%|▌         | 419/8300 [1:51:49<34:22:51, 15.71s/it]

training loss: 0.6933184266090393


training:   5%|▌         | 420/8300 [1:52:05<34:22:33, 15.70s/it]

training loss: 1.1720517873764038


training:   5%|▌         | 421/8300 [1:52:21<34:22:14, 15.70s/it]

training loss: 0.4452448785305023


training:   5%|▌         | 422/8300 [1:52:37<34:21:48, 15.70s/it]

training loss: 0.8643278479576111


training:   5%|▌         | 423/8300 [1:52:52<34:21:40, 15.70s/it]

training loss: 0.5082607865333557


training:   5%|▌         | 424/8300 [1:53:08<34:21:28, 15.70s/it]

training loss: 0.5900271534919739


training:   5%|▌         | 425/8300 [1:53:24<34:21:17, 15.71s/it]

training loss: 0.49506497383117676


training:   5%|▌         | 426/8300 [1:53:39<34:20:43, 15.70s/it]

training loss: 0.581659734249115


training:   5%|▌         | 427/8300 [1:53:55<34:20:38, 15.70s/it]

training loss: 0.4506869316101074


training:   5%|▌         | 428/8300 [1:54:11<34:20:41, 15.71s/it]

training loss: 1.134061336517334


training:   5%|▌         | 429/8300 [1:54:26<34:19:45, 15.70s/it]

training loss: 0.7828132510185242


training:   5%|▌         | 430/8300 [1:54:42<34:19:27, 15.70s/it]

training loss: 1.0463933944702148


training:   5%|▌         | 431/8300 [1:54:58<34:19:26, 15.70s/it]

training loss: 0.7007606029510498


training:   5%|▌         | 432/8300 [1:55:14<34:19:27, 15.71s/it]

training loss: 0.4189611077308655


training:   5%|▌         | 433/8300 [1:55:29<34:19:23, 15.71s/it]

training loss: 0.877302885055542


training:   5%|▌         | 434/8300 [1:55:45<34:19:18, 15.71s/it]

training loss: 0.8428149223327637


training:   5%|▌         | 435/8300 [1:56:01<34:18:53, 15.71s/it]

training loss: 0.7393738031387329


training:   5%|▌         | 436/8300 [1:56:16<34:18:25, 15.71s/it]

training loss: 0.7018154263496399


training:   5%|▌         | 437/8300 [1:56:32<34:18:08, 15.71s/it]

training loss: 1.121989369392395


training:   5%|▌         | 438/8300 [1:56:48<34:18:03, 15.71s/it]

training loss: 0.6694937944412231


training:   5%|▌         | 439/8300 [1:57:04<34:17:53, 15.71s/it]

training loss: 0.9159865975379944


training:   5%|▌         | 440/8300 [1:57:19<34:17:27, 15.71s/it]

training loss: 1.1650543212890625


training:   5%|▌         | 441/8300 [1:57:35<34:17:02, 15.70s/it]

training loss: 0.6375371813774109


training:   5%|▌         | 442/8300 [1:57:51<34:16:51, 15.71s/it]

training loss: 0.8368911147117615


training:   5%|▌         | 443/8300 [1:58:06<34:16:49, 15.71s/it]

training loss: 0.900881826877594


training:   5%|▌         | 444/8300 [1:58:22<34:16:28, 15.71s/it]

training loss: 1.0627944469451904


training:   5%|▌         | 445/8300 [1:58:38<34:16:19, 15.71s/it]

training loss: 0.9091440439224243


training:   5%|▌         | 446/8300 [1:58:53<34:15:57, 15.71s/it]

training loss: 1.0962826013565063


training:   5%|▌         | 447/8300 [1:59:09<34:15:38, 15.71s/it]

training loss: 1.2733702659606934


training:   5%|▌         | 448/8300 [1:59:25<34:15:28, 15.71s/it]

training loss: 0.7597411274909973


training:   5%|▌         | 449/8300 [1:59:41<34:15:10, 15.71s/it]

training loss: 0.8416672348976135


training:   5%|▌         | 450/8300 [1:59:56<34:14:59, 15.71s/it]

training loss: 0.6284093260765076


training:   5%|▌         | 451/8300 [2:00:12<34:14:48, 15.71s/it]

training loss: 0.787979245185852


training:   5%|▌         | 452/8300 [2:00:28<34:14:26, 15.71s/it]

training loss: 1.1620837450027466


training:   5%|▌         | 453/8300 [2:00:43<34:13:45, 15.70s/it]

training loss: 0.7959295511245728


training:   5%|▌         | 454/8300 [2:00:59<34:13:30, 15.70s/it]

training loss: 0.6993532180786133


training:   5%|▌         | 455/8300 [2:01:15<34:13:16, 15.70s/it]

training loss: 0.6380179524421692


training:   5%|▌         | 456/8300 [2:01:31<34:12:44, 15.70s/it]

training loss: 0.7259894013404846


training:   6%|▌         | 457/8300 [2:01:46<34:12:20, 15.70s/it]

training loss: 0.44675779342651367


training:   6%|▌         | 458/8300 [2:02:02<34:12:18, 15.70s/it]

training loss: 0.6617438793182373


training:   6%|▌         | 459/8300 [2:02:18<34:12:51, 15.71s/it]

training loss: 1.0992735624313354


training:   6%|▌         | 460/8300 [2:02:33<34:12:20, 15.71s/it]

training loss: 0.9568508863449097


training:   6%|▌         | 461/8300 [2:02:49<34:11:40, 15.70s/it]

training loss: 1.0758368968963623


training:   6%|▌         | 462/8300 [2:03:05<34:11:27, 15.70s/it]

training loss: 0.9419045448303223


training:   6%|▌         | 463/8300 [2:03:20<34:10:55, 15.70s/it]

training loss: 0.9028981328010559


training:   6%|▌         | 464/8300 [2:03:36<34:10:44, 15.70s/it]

training loss: 0.6254451870918274


training:   6%|▌         | 465/8300 [2:03:52<34:10:30, 15.70s/it]

training loss: 0.7501960396766663


training:   6%|▌         | 466/8300 [2:04:08<34:10:05, 15.70s/it]

training loss: 0.7369458079338074


training:   6%|▌         | 467/8300 [2:04:23<34:09:35, 15.70s/it]

training loss: 0.674571692943573


training:   6%|▌         | 468/8300 [2:04:39<34:08:55, 15.70s/it]

training loss: 0.9303427934646606


training:   6%|▌         | 469/8300 [2:04:55<34:08:20, 15.69s/it]

training loss: 0.7019068598747253


training:   6%|▌         | 470/8300 [2:05:10<34:08:39, 15.70s/it]

training loss: 1.0651650428771973


training:   6%|▌         | 471/8300 [2:05:26<34:08:13, 15.70s/it]

training loss: 0.9008319973945618


training:   6%|▌         | 472/8300 [2:05:42<34:07:54, 15.70s/it]

training loss: 0.9174713492393494


training:   6%|▌         | 473/8300 [2:05:57<34:07:34, 15.70s/it]

training loss: 0.8606660962104797


training:   6%|▌         | 474/8300 [2:06:13<34:07:08, 15.69s/it]

training loss: 0.8540983200073242


training:   6%|▌         | 475/8300 [2:06:29<34:07:04, 15.70s/it]

training loss: 1.1288161277770996


training:   6%|▌         | 476/8300 [2:06:45<34:07:02, 15.70s/it]

training loss: 0.8089954257011414


training:   6%|▌         | 477/8300 [2:07:00<34:06:54, 15.70s/it]

training loss: 1.1513900756835938


training:   6%|▌         | 478/8300 [2:07:16<34:06:38, 15.70s/it]

training loss: 0.5660330057144165


training:   6%|▌         | 479/8300 [2:07:32<34:06:21, 15.70s/it]

training loss: 0.4356735646724701


training:   6%|▌         | 480/8300 [2:07:47<34:06:09, 15.70s/it]

training loss: 0.9387140274047852


training:   6%|▌         | 481/8300 [2:08:03<34:05:59, 15.70s/it]

training loss: 0.7717302441596985


training:   6%|▌         | 482/8300 [2:08:19<34:05:28, 15.70s/it]

training loss: 0.5563887357711792


training:   6%|▌         | 483/8300 [2:08:34<34:05:19, 15.70s/it]

training loss: 1.0228002071380615


training:   6%|▌         | 484/8300 [2:08:50<34:05:11, 15.70s/it]

training loss: 0.49784064292907715


training:   6%|▌         | 485/8300 [2:09:06<34:05:12, 15.70s/it]

training loss: 0.7298679947853088


training:   6%|▌         | 486/8300 [2:09:22<34:04:41, 15.70s/it]

training loss: 0.6147449016571045


training:   6%|▌         | 487/8300 [2:09:37<34:04:28, 15.70s/it]

training loss: 0.558466911315918


training:   6%|▌         | 488/8300 [2:09:53<34:04:03, 15.70s/it]

training loss: 0.6877837181091309


training:   6%|▌         | 489/8300 [2:10:09<34:04:12, 15.70s/it]

training loss: 1.062200903892517


training:   6%|▌         | 490/8300 [2:10:24<34:03:49, 15.70s/it]

training loss: 0.8638724088668823


training:   6%|▌         | 491/8300 [2:10:40<34:03:23, 15.70s/it]

training loss: 0.3965279161930084


training:   6%|▌         | 492/8300 [2:10:56<34:03:05, 15.70s/it]

training loss: 1.0933692455291748


training:   6%|▌         | 493/8300 [2:11:11<34:02:40, 15.70s/it]

training loss: 0.759749174118042


training:   6%|▌         | 494/8300 [2:11:27<34:02:16, 15.70s/it]

training loss: 0.6335517168045044


training:   6%|▌         | 495/8300 [2:11:43<34:02:17, 15.70s/it]

training loss: 0.4888974130153656


training:   6%|▌         | 496/8300 [2:11:58<34:01:44, 15.70s/it]

training loss: 0.9113889336585999


training:   6%|▌         | 497/8300 [2:12:14<34:01:44, 15.70s/it]

training loss: 0.880459189414978


training:   6%|▌         | 498/8300 [2:12:30<34:01:01, 15.70s/it]

training loss: 0.6256631016731262


training:   6%|▌         | 499/8300 [2:12:46<34:00:44, 15.70s/it]

training loss: 0.8007513880729675


training:   6%|▌         | 500/8300 [2:13:01<34:00:38, 15.70s/it]

training loss: 0.598896861076355
training loss: 0.998451828956604


training:   6%|▌         | 501/8300 [2:13:18<34:50:45, 16.08s/it]

validation loss: 1.5140372514724731


training:   6%|▌         | 502/8300 [2:13:34<34:36:25, 15.98s/it]

training loss: 0.6180271506309509


training:   6%|▌         | 503/8300 [2:13:50<34:25:22, 15.89s/it]

training loss: 1.052009105682373


training:   6%|▌         | 504/8300 [2:14:05<34:17:40, 15.84s/it]

training loss: 0.5343267917633057


training:   6%|▌         | 505/8300 [2:14:21<34:12:08, 15.80s/it]

training loss: 0.6582867503166199


training:   6%|▌         | 506/8300 [2:14:37<34:07:53, 15.77s/it]

training loss: 0.9718869924545288


training:   6%|▌         | 507/8300 [2:14:52<34:05:01, 15.75s/it]

training loss: 0.5057703256607056


training:   6%|▌         | 508/8300 [2:15:08<34:03:17, 15.73s/it]

training loss: 0.3895756006240845


training:   6%|▌         | 509/8300 [2:15:24<34:01:42, 15.72s/it]

training loss: 0.5436476469039917


training:   6%|▌         | 510/8300 [2:15:40<34:00:37, 15.72s/it]

training loss: 0.35723626613616943


training:   6%|▌         | 511/8300 [2:15:55<33:59:50, 15.71s/it]

training loss: 0.7400758862495422


training:   6%|▌         | 512/8300 [2:16:11<33:58:52, 15.71s/it]

training loss: 0.6575961112976074


training:   6%|▌         | 513/8300 [2:16:27<33:58:11, 15.70s/it]

training loss: 0.6937393546104431


training:   6%|▌         | 514/8300 [2:16:42<33:57:27, 15.70s/it]

training loss: 0.7482770085334778


training:   6%|▌         | 515/8300 [2:16:58<33:57:10, 15.70s/it]

training loss: 0.5921810865402222


training:   6%|▌         | 516/8300 [2:17:14<33:57:12, 15.70s/it]

training loss: 1.0990917682647705


training:   6%|▌         | 517/8300 [2:17:30<33:57:08, 15.70s/it]

training loss: 0.557336688041687


training:   6%|▌         | 518/8300 [2:17:45<33:56:44, 15.70s/it]

training loss: 0.7051987051963806


training:   6%|▋         | 519/8300 [2:18:01<33:56:16, 15.70s/it]

training loss: 0.8909579515457153


training:   6%|▋         | 520/8300 [2:18:17<33:56:09, 15.70s/it]

training loss: 0.6702014207839966


training:   6%|▋         | 521/8300 [2:18:32<33:55:44, 15.70s/it]

training loss: 0.8939547538757324


training:   6%|▋         | 522/8300 [2:18:48<33:55:37, 15.70s/it]

training loss: 1.2046163082122803


training:   6%|▋         | 523/8300 [2:19:04<33:55:26, 15.70s/it]

training loss: 0.9599307775497437


training:   6%|▋         | 524/8300 [2:19:19<33:55:04, 15.70s/it]

training loss: 0.5066095590591431


training:   6%|▋         | 525/8300 [2:19:35<33:54:44, 15.70s/it]

training loss: 0.898768961429596


training:   6%|▋         | 526/8300 [2:19:51<33:54:10, 15.70s/it]

training loss: 0.838777482509613


training:   6%|▋         | 527/8300 [2:20:07<33:54:12, 15.70s/it]

training loss: 1.0088645219802856


training:   6%|▋         | 528/8300 [2:20:22<33:53:44, 15.70s/it]

training loss: 0.6494571566581726


training:   6%|▋         | 529/8300 [2:20:38<33:53:44, 15.70s/it]

training loss: 0.879729151725769


training:   6%|▋         | 530/8300 [2:20:54<33:53:36, 15.70s/it]

training loss: 0.29636308550834656


training:   6%|▋         | 531/8300 [2:21:09<33:53:26, 15.70s/it]

training loss: 0.47367745637893677


training:   6%|▋         | 532/8300 [2:21:25<33:53:15, 15.70s/it]

training loss: 0.6300216317176819


training:   6%|▋         | 533/8300 [2:21:41<33:52:56, 15.70s/it]

training loss: 0.3572203814983368


training:   6%|▋         | 534/8300 [2:21:56<33:52:52, 15.71s/it]

training loss: 0.5967122316360474


training:   6%|▋         | 535/8300 [2:22:12<33:52:37, 15.71s/it]

training loss: 0.9201610088348389


training:   6%|▋         | 536/8300 [2:22:28<33:52:47, 15.71s/it]

training loss: 0.4570868909358978


training:   6%|▋         | 537/8300 [2:22:44<33:52:28, 15.71s/it]

training loss: 1.0621651411056519


training:   6%|▋         | 538/8300 [2:22:59<33:52:12, 15.71s/it]

training loss: 0.7917473912239075


training:   6%|▋         | 539/8300 [2:23:15<33:51:38, 15.71s/it]

training loss: 0.686250627040863


training:   7%|▋         | 540/8300 [2:23:31<33:51:10, 15.70s/it]

training loss: 0.5143970251083374


training:   7%|▋         | 541/8300 [2:23:46<33:50:44, 15.70s/it]

training loss: 1.1083049774169922


training:   7%|▋         | 542/8300 [2:24:02<33:50:39, 15.71s/it]

training loss: 0.6212708950042725


training:   7%|▋         | 543/8300 [2:24:18<33:50:34, 15.71s/it]

training loss: 0.6057673096656799


training:   7%|▋         | 544/8300 [2:24:34<33:50:01, 15.70s/it]

training loss: 0.9414055943489075


training:   7%|▋         | 545/8300 [2:24:49<33:49:36, 15.70s/it]

training loss: 0.9390817284584045


training:   7%|▋         | 546/8300 [2:25:05<33:49:51, 15.71s/it]

training loss: 0.6830762028694153


training:   7%|▋         | 547/8300 [2:25:21<33:49:27, 15.71s/it]

training loss: 0.42807286977767944


training:   7%|▋         | 548/8300 [2:25:36<33:49:14, 15.71s/it]

training loss: 0.7700023055076599


training:   7%|▋         | 549/8300 [2:25:52<33:48:39, 15.70s/it]

training loss: 0.9052293300628662


training:   7%|▋         | 550/8300 [2:26:08<33:48:49, 15.71s/it]

training loss: 0.8364328145980835


training:   7%|▋         | 551/8300 [2:26:23<33:48:26, 15.71s/it]

training loss: 0.8143635392189026


training:   7%|▋         | 552/8300 [2:26:39<33:48:04, 15.71s/it]

training loss: 0.989484429359436


training:   7%|▋         | 553/8300 [2:26:55<33:47:55, 15.71s/it]

training loss: 0.7485776543617249


training:   7%|▋         | 554/8300 [2:27:11<33:47:33, 15.71s/it]

training loss: 1.1817930936813354


training:   7%|▋         | 555/8300 [2:27:26<33:47:13, 15.70s/it]

training loss: 0.8280414342880249


training:   7%|▋         | 556/8300 [2:27:42<33:46:46, 15.70s/it]

training loss: 0.8134676814079285


training:   7%|▋         | 557/8300 [2:27:58<33:46:21, 15.70s/it]

training loss: 0.7526811957359314


training:   7%|▋         | 558/8300 [2:28:13<33:45:50, 15.70s/it]

training loss: 0.9131920337677002


training:   7%|▋         | 559/8300 [2:28:29<33:45:48, 15.70s/it]

training loss: 0.9139742851257324


training:   7%|▋         | 560/8300 [2:28:45<33:45:22, 15.70s/it]

training loss: 0.833137571811676


training:   7%|▋         | 561/8300 [2:29:00<33:45:23, 15.70s/it]

training loss: 0.7998099327087402


training:   7%|▋         | 562/8300 [2:29:16<33:45:01, 15.70s/it]

training loss: 0.6633114218711853


training:   7%|▋         | 563/8300 [2:29:32<33:44:56, 15.70s/it]

training loss: 1.205353021621704


training:   7%|▋         | 564/8300 [2:29:48<33:44:39, 15.70s/it]

training loss: 1.1791280508041382


training:   7%|▋         | 565/8300 [2:30:03<33:44:29, 15.70s/it]

training loss: 0.2902566194534302


training:   7%|▋         | 566/8300 [2:30:19<33:44:08, 15.70s/it]

training loss: 0.9462946057319641


training:   7%|▋         | 567/8300 [2:30:35<33:43:48, 15.70s/it]

training loss: 0.7411024570465088


training:   7%|▋         | 568/8300 [2:30:50<33:43:26, 15.70s/it]

training loss: 0.674537181854248


training:   7%|▋         | 569/8300 [2:31:06<33:43:33, 15.70s/it]

training loss: 0.8970931768417358


training:   7%|▋         | 570/8300 [2:31:22<33:43:28, 15.71s/it]

training loss: 0.6766681671142578


training:   7%|▋         | 571/8300 [2:31:38<33:43:21, 15.71s/it]

training loss: 0.7929567098617554


training:   7%|▋         | 572/8300 [2:31:53<33:42:47, 15.70s/it]

training loss: 0.7121745347976685


training:   7%|▋         | 573/8300 [2:32:09<33:42:38, 15.71s/it]

training loss: 0.6404822468757629


training:   7%|▋         | 574/8300 [2:32:25<33:42:09, 15.70s/it]

training loss: 0.5918853878974915


training:   7%|▋         | 575/8300 [2:32:40<33:41:57, 15.70s/it]

training loss: 1.093450903892517


training:   7%|▋         | 576/8300 [2:32:56<33:41:21, 15.70s/it]

training loss: 0.7646723985671997


training:   7%|▋         | 577/8300 [2:33:12<33:41:05, 15.70s/it]

training loss: 0.30441340804100037


training:   7%|▋         | 578/8300 [2:33:27<33:40:51, 15.70s/it]

training loss: 0.5375566482543945


training:   7%|▋         | 579/8300 [2:33:43<33:40:46, 15.70s/it]

training loss: 0.5245155096054077


training:   7%|▋         | 580/8300 [2:33:59<33:40:25, 15.70s/it]

training loss: 0.971532940864563


training:   7%|▋         | 581/8300 [2:34:15<33:40:08, 15.70s/it]

training loss: 0.830303966999054


training:   7%|▋         | 582/8300 [2:34:30<33:40:00, 15.70s/it]

training loss: 0.657800555229187


training:   7%|▋         | 583/8300 [2:34:46<33:39:38, 15.70s/it]

training loss: 0.7674970030784607


training:   7%|▋         | 584/8300 [2:35:02<33:39:48, 15.71s/it]

training loss: 0.493252694606781


training:   7%|▋         | 585/8300 [2:35:17<33:39:47, 15.71s/it]

training loss: 0.6224771738052368


training:   7%|▋         | 586/8300 [2:35:33<33:39:43, 15.71s/it]

training loss: 0.9436562061309814


training:   7%|▋         | 587/8300 [2:35:49<33:39:02, 15.71s/it]

training loss: 0.6196538209915161


training:   7%|▋         | 588/8300 [2:36:05<33:38:53, 15.71s/it]

training loss: 0.8059438467025757


training:   7%|▋         | 589/8300 [2:36:20<33:38:52, 15.71s/it]

training loss: 0.8274769186973572


training:   7%|▋         | 590/8300 [2:36:36<33:38:44, 15.71s/it]

training loss: 0.4566364586353302


training:   7%|▋         | 591/8300 [2:36:52<33:38:07, 15.71s/it]

training loss: 0.7517569065093994


training:   7%|▋         | 592/8300 [2:37:07<33:37:55, 15.71s/it]

training loss: 0.7435191869735718


training:   7%|▋         | 593/8300 [2:37:23<33:37:42, 15.71s/it]

training loss: 0.6349989175796509


training:   7%|▋         | 594/8300 [2:37:39<33:37:34, 15.71s/it]

training loss: 0.8993900418281555


training:   7%|▋         | 595/8300 [2:37:54<33:37:02, 15.71s/it]

training loss: 0.3916221857070923


training:   7%|▋         | 596/8300 [2:38:10<33:36:37, 15.71s/it]

training loss: 0.7308503985404968


training:   7%|▋         | 597/8300 [2:38:26<33:36:06, 15.70s/it]

training loss: 0.9744468927383423


training:   7%|▋         | 598/8300 [2:38:42<33:35:51, 15.70s/it]

training loss: 0.6722382307052612


training:   7%|▋         | 599/8300 [2:38:57<33:35:45, 15.71s/it]

training loss: 0.7803821563720703


training:   7%|▋         | 600/8300 [2:39:13<33:35:24, 15.70s/it]

training loss: 0.5948107242584229
training loss: 0.7118897438049316


training:   7%|▋         | 601/8300 [2:39:30<34:24:08, 16.09s/it]

validation loss: 1.5152584314346313


training:   7%|▋         | 602/8300 [2:39:46<34:09:39, 15.98s/it]

training loss: 0.7867711782455444


training:   7%|▋         | 603/8300 [2:40:01<33:59:24, 15.90s/it]

training loss: 0.6488112807273865


training:   7%|▋         | 604/8300 [2:40:17<33:52:01, 15.84s/it]

training loss: 1.0414339303970337


training:   7%|▋         | 605/8300 [2:40:33<33:46:11, 15.80s/it]

training loss: 0.9684621691703796


training:   7%|▋         | 606/8300 [2:40:49<33:42:14, 15.77s/it]

training loss: 0.6228499412536621


training:   7%|▋         | 607/8300 [2:41:04<33:39:25, 15.75s/it]

training loss: 0.802297830581665


training:   7%|▋         | 608/8300 [2:41:20<33:37:35, 15.74s/it]

training loss: 0.4728226065635681


training:   7%|▋         | 609/8300 [2:41:36<33:36:50, 15.73s/it]

training loss: 0.6477209329605103


training:   7%|▋         | 610/8300 [2:41:51<33:35:27, 15.73s/it]

training loss: 0.7278491854667664


training:   7%|▋         | 611/8300 [2:42:07<33:34:46, 15.72s/it]

training loss: 0.882845938205719


training:   7%|▋         | 612/8300 [2:42:23<33:33:38, 15.72s/it]

training loss: 1.181351661682129


training:   7%|▋         | 613/8300 [2:42:38<33:33:02, 15.71s/it]

training loss: 0.6504636406898499


training:   7%|▋         | 614/8300 [2:42:54<33:32:33, 15.71s/it]

training loss: 0.9256566166877747


training:   7%|▋         | 615/8300 [2:43:10<33:32:05, 15.71s/it]

training loss: 0.7957782745361328


training:   7%|▋         | 616/8300 [2:43:26<33:31:26, 15.71s/it]

training loss: 0.6870883107185364


training:   7%|▋         | 617/8300 [2:43:41<33:31:17, 15.71s/it]

training loss: 0.6548132300376892


training:   7%|▋         | 618/8300 [2:43:57<33:30:51, 15.71s/it]

training loss: 0.9818069338798523


training:   7%|▋         | 619/8300 [2:44:13<33:30:14, 15.70s/it]

training loss: 0.5230731964111328


training:   7%|▋         | 620/8300 [2:44:28<33:30:06, 15.70s/it]

training loss: 0.868002712726593


training:   7%|▋         | 621/8300 [2:44:44<33:30:01, 15.71s/it]

training loss: 0.624931812286377


training:   7%|▋         | 622/8300 [2:45:00<33:29:56, 15.71s/it]

training loss: 0.6117410659790039


training:   8%|▊         | 623/8300 [2:45:16<33:29:41, 15.71s/it]

training loss: 0.960537314414978


training:   8%|▊         | 624/8300 [2:45:31<33:29:27, 15.71s/it]

training loss: 0.5512666702270508


training:   8%|▊         | 625/8300 [2:45:47<33:29:24, 15.71s/it]

training loss: 0.7823353409767151


training:   8%|▊         | 626/8300 [2:46:03<33:29:12, 15.71s/it]

training loss: 0.8628818988800049


training:   8%|▊         | 627/8300 [2:46:18<33:28:50, 15.71s/it]

training loss: 0.7409696578979492


training:   8%|▊         | 628/8300 [2:46:34<33:28:23, 15.71s/it]

training loss: 0.9900012612342834


training:   8%|▊         | 629/8300 [2:46:50<33:28:32, 15.71s/it]

training loss: 0.6021760106086731


training:   8%|▊         | 630/8300 [2:47:06<33:28:23, 15.71s/it]

training loss: 0.5741057395935059


training:   8%|▊         | 631/8300 [2:47:21<33:27:52, 15.71s/it]

training loss: 0.9938443303108215


training:   8%|▊         | 632/8300 [2:47:37<33:27:30, 15.71s/it]

training loss: 0.7039567828178406


training:   8%|▊         | 633/8300 [2:47:53<33:27:09, 15.71s/it]

training loss: 1.1246198415756226


training:   8%|▊         | 634/8300 [2:48:08<33:26:59, 15.71s/it]

training loss: 0.713094174861908


training:   8%|▊         | 635/8300 [2:48:24<33:26:20, 15.71s/it]

training loss: 0.7750738859176636


training:   8%|▊         | 636/8300 [2:48:40<33:26:04, 15.71s/it]

training loss: 0.6616119146347046


training:   8%|▊         | 637/8300 [2:48:55<33:25:49, 15.71s/it]

training loss: 0.6748597621917725


training:   8%|▊         | 638/8300 [2:49:11<33:25:33, 15.71s/it]

training loss: 1.0923833847045898


training:   8%|▊         | 639/8300 [2:49:27<33:25:35, 15.71s/it]

training loss: 0.6430767178535461


training:   8%|▊         | 640/8300 [2:49:43<33:25:42, 15.71s/it]

training loss: 0.7293815016746521


training:   8%|▊         | 641/8300 [2:49:58<33:25:30, 15.71s/it]

training loss: 0.7600004076957703


training:   8%|▊         | 642/8300 [2:50:14<33:25:05, 15.71s/it]

training loss: 0.858004093170166


training:   8%|▊         | 643/8300 [2:50:30<33:24:34, 15.71s/it]

training loss: 1.0191075801849365


training:   8%|▊         | 644/8300 [2:50:45<33:24:09, 15.71s/it]

training loss: 0.6978217363357544


training:   8%|▊         | 645/8300 [2:51:01<33:23:42, 15.71s/it]

training loss: 1.1447261571884155


training:   8%|▊         | 646/8300 [2:51:17<33:23:21, 15.70s/it]

training loss: 0.600492000579834


training:   8%|▊         | 647/8300 [2:51:33<33:22:54, 15.70s/it]

training loss: 0.7468650937080383


training:   8%|▊         | 648/8300 [2:51:48<33:22:57, 15.71s/it]

training loss: 0.4590317904949188


training:   8%|▊         | 649/8300 [2:52:04<33:23:04, 15.71s/it]

training loss: 0.7040138244628906


training:   8%|▊         | 650/8300 [2:52:20<33:22:25, 15.71s/it]

training loss: 0.768528163433075


training:   8%|▊         | 651/8300 [2:52:35<33:22:14, 15.71s/it]

training loss: 0.7622219324111938


training:   8%|▊         | 652/8300 [2:52:51<33:21:49, 15.70s/it]

training loss: 0.8407727479934692


training:   8%|▊         | 653/8300 [2:53:07<33:21:39, 15.71s/it]

training loss: 0.783033013343811


training:   8%|▊         | 654/8300 [2:53:22<33:21:17, 15.70s/it]

training loss: 0.6458157300949097


training:   8%|▊         | 655/8300 [2:53:38<33:21:23, 15.71s/it]

training loss: 0.6799373030662537


training:   8%|▊         | 656/8300 [2:53:54<33:20:58, 15.71s/it]

training loss: 0.538463830947876


training:   8%|▊         | 657/8300 [2:54:10<33:21:12, 15.71s/it]

training loss: 0.7572190165519714


training:   8%|▊         | 658/8300 [2:54:25<33:20:35, 15.71s/it]

training loss: 0.7245705127716064


training:   8%|▊         | 659/8300 [2:54:41<33:20:30, 15.71s/it]

training loss: 0.5923210382461548


training:   8%|▊         | 660/8300 [2:54:57<33:20:00, 15.71s/it]

training loss: 0.8232468962669373


training:   8%|▊         | 661/8300 [2:55:12<33:19:42, 15.71s/it]

training loss: 0.449546754360199


training:   8%|▊         | 662/8300 [2:55:28<33:19:19, 15.71s/it]

training loss: 0.6718094348907471


training:   8%|▊         | 663/8300 [2:55:44<33:19:39, 15.71s/it]

training loss: 0.697993278503418


training:   8%|▊         | 664/8300 [2:56:00<33:19:10, 15.71s/it]

training loss: 0.5236429572105408


training:   8%|▊         | 665/8300 [2:56:15<33:18:39, 15.71s/it]

training loss: 0.304782897233963


training:   8%|▊         | 666/8300 [2:56:31<33:18:09, 15.70s/it]

training loss: 0.8202404379844666


training:   8%|▊         | 667/8300 [2:56:47<33:17:56, 15.71s/it]

training loss: 0.8522794842720032


training:   8%|▊         | 668/8300 [2:57:02<33:18:12, 15.71s/it]

training loss: 0.579917848110199


training:   8%|▊         | 669/8300 [2:57:18<33:18:18, 15.71s/it]

training loss: 0.7105635404586792


training:   8%|▊         | 670/8300 [2:57:34<33:18:02, 15.71s/it]

training loss: 1.1879594326019287


training:   8%|▊         | 671/8300 [2:57:50<33:17:55, 15.71s/it]

training loss: 0.43231725692749023


training:   8%|▊         | 672/8300 [2:58:05<33:17:42, 15.71s/it]

training loss: 0.9411157965660095


training:   8%|▊         | 673/8300 [2:58:21<33:17:20, 15.71s/it]

training loss: 0.9526355266571045


training:   8%|▊         | 674/8300 [2:58:37<33:16:46, 15.71s/it]

training loss: 0.9141981601715088


training:   8%|▊         | 675/8300 [2:58:52<33:16:31, 15.71s/it]

training loss: 0.6029448509216309


training:   8%|▊         | 676/8300 [2:59:08<33:16:21, 15.71s/it]

training loss: 0.9466440677642822


training:   8%|▊         | 677/8300 [2:59:24<33:15:56, 15.71s/it]

training loss: 0.5610784292221069


training:   8%|▊         | 678/8300 [2:59:39<33:15:57, 15.71s/it]

training loss: 1.2187089920043945


training:   8%|▊         | 679/8300 [2:59:55<33:16:00, 15.71s/it]

training loss: 0.8538336157798767


training:   8%|▊         | 680/8300 [3:00:11<33:15:30, 15.71s/it]

training loss: 0.8111722469329834


training:   8%|▊         | 681/8300 [3:00:27<33:14:56, 15.71s/it]

training loss: 0.6859180927276611


training:   8%|▊         | 682/8300 [3:00:42<33:14:40, 15.71s/it]

training loss: 0.8748751282691956


training:   8%|▊         | 683/8300 [3:00:58<33:14:19, 15.71s/it]

training loss: 0.895008385181427


training:   8%|▊         | 684/8300 [3:01:14<33:13:56, 15.71s/it]

training loss: 0.6652472019195557


training:   8%|▊         | 685/8300 [3:01:29<33:13:54, 15.71s/it]

training loss: 1.304484486579895


training:   8%|▊         | 686/8300 [3:01:45<33:13:39, 15.71s/it]

training loss: 0.5663145780563354


training:   8%|▊         | 687/8300 [3:02:01<33:13:16, 15.71s/it]

training loss: 0.5896127820014954


training:   8%|▊         | 688/8300 [3:02:17<33:16:17, 15.74s/it]

training loss: 0.4968924820423126


training:   8%|▊         | 689/8300 [3:02:32<33:14:39, 15.72s/it]

training loss: 0.49363839626312256


training:   8%|▊         | 690/8300 [3:02:48<33:13:44, 15.72s/it]

training loss: 0.8164718151092529


training:   8%|▊         | 691/8300 [3:03:04<33:12:45, 15.71s/it]

training loss: 0.7656480073928833


training:   8%|▊         | 692/8300 [3:03:19<33:12:03, 15.71s/it]

training loss: 0.6046932339668274


training:   8%|▊         | 693/8300 [3:03:35<33:11:39, 15.71s/it]

training loss: 1.137967824935913


training:   8%|▊         | 694/8300 [3:03:51<33:10:59, 15.71s/it]

training loss: 1.0414650440216064


training:   8%|▊         | 695/8300 [3:04:07<33:10:45, 15.71s/it]

training loss: 0.7741129398345947


training:   8%|▊         | 696/8300 [3:04:22<33:10:02, 15.70s/it]

training loss: 0.7714828252792358


training:   8%|▊         | 697/8300 [3:04:38<33:10:05, 15.71s/it]

training loss: 0.8773293495178223


training:   8%|▊         | 698/8300 [3:04:54<33:09:39, 15.70s/it]

training loss: 0.7807479500770569


training:   8%|▊         | 699/8300 [3:05:09<33:09:34, 15.71s/it]

training loss: 0.7975338697433472


training:   8%|▊         | 700/8300 [3:05:25<33:09:41, 15.71s/it]

training loss: 1.0767656564712524
training loss: 0.8906106352806091


training:   8%|▊         | 701/8300 [3:05:42<33:58:01, 16.09s/it]

validation loss: 1.478755235671997


training:   8%|▊         | 702/8300 [3:05:58<33:43:30, 15.98s/it]

training loss: 0.8176884651184082


training:   8%|▊         | 703/8300 [3:06:14<33:32:50, 15.90s/it]

training loss: 0.4696662724018097


training:   8%|▊         | 704/8300 [3:06:29<33:24:57, 15.84s/it]

training loss: 0.6003651022911072


training:   8%|▊         | 705/8300 [3:06:45<33:19:45, 15.80s/it]

training loss: 0.8170728087425232


training:   9%|▊         | 706/8300 [3:07:01<33:16:04, 15.77s/it]

training loss: 0.5314735174179077


training:   9%|▊         | 707/8300 [3:07:16<33:13:19, 15.75s/it]

training loss: 0.5754473209381104


training:   9%|▊         | 708/8300 [3:07:32<33:11:13, 15.74s/it]

training loss: 0.569981575012207


training:   9%|▊         | 709/8300 [3:07:48<33:10:02, 15.73s/it]

training loss: 0.2516459822654724


training:   9%|▊         | 710/8300 [3:08:03<33:09:06, 15.72s/it]

training loss: 0.9298018217086792


training:   9%|▊         | 711/8300 [3:08:19<33:08:14, 15.72s/it]

training loss: 0.8241459727287292


training:   9%|▊         | 712/8300 [3:08:35<33:07:18, 15.71s/it]

training loss: 0.8138511776924133


training:   9%|▊         | 713/8300 [3:08:51<33:06:48, 15.71s/it]

training loss: 0.6717325448989868


training:   9%|▊         | 714/8300 [3:09:06<33:06:23, 15.71s/it]

training loss: 0.578887939453125


training:   9%|▊         | 715/8300 [3:09:22<33:05:33, 15.71s/it]

training loss: 0.7514249086380005


training:   9%|▊         | 716/8300 [3:09:38<33:05:20, 15.71s/it]

training loss: 0.7265381813049316


training:   9%|▊         | 717/8300 [3:09:53<33:04:56, 15.71s/it]

training loss: 0.5291265845298767


training:   9%|▊         | 718/8300 [3:10:09<33:04:47, 15.71s/it]

training loss: 0.8052645921707153


training:   9%|▊         | 719/8300 [3:10:25<33:04:24, 15.71s/it]

training loss: 0.990828275680542


training:   9%|▊         | 720/8300 [3:10:41<33:04:20, 15.71s/it]

training loss: 0.6682748794555664


training:   9%|▊         | 721/8300 [3:10:56<33:03:56, 15.71s/it]

training loss: 0.724325954914093


training:   9%|▊         | 722/8300 [3:11:12<33:03:39, 15.71s/it]

training loss: 1.153120994567871


training:   9%|▊         | 723/8300 [3:11:28<33:03:06, 15.70s/it]

training loss: 0.5688132643699646


training:   9%|▊         | 724/8300 [3:11:43<33:03:05, 15.71s/it]

training loss: 0.4988793432712555


training:   9%|▊         | 725/8300 [3:11:59<33:02:42, 15.70s/it]

training loss: 0.8842964768409729


training:   9%|▊         | 726/8300 [3:12:15<33:02:20, 15.70s/it]

training loss: 0.4226856231689453


training:   9%|▉         | 727/8300 [3:12:30<33:01:57, 15.70s/it]

training loss: 0.8783926963806152


training:   9%|▉         | 728/8300 [3:12:46<33:01:50, 15.70s/it]

training loss: 0.6042082905769348


training:   9%|▉         | 729/8300 [3:13:02<33:01:51, 15.71s/it]

training loss: 0.6987910866737366


training:   9%|▉         | 730/8300 [3:13:18<33:01:34, 15.71s/it]

training loss: 1.102146029472351


training:   9%|▉         | 731/8300 [3:13:33<33:01:07, 15.70s/it]

training loss: 0.7621975541114807


training:   9%|▉         | 732/8300 [3:13:49<33:00:55, 15.71s/it]

training loss: 0.672805666923523


training:   9%|▉         | 733/8300 [3:14:05<33:00:58, 15.71s/it]

training loss: 0.7243293523788452


training:   9%|▉         | 734/8300 [3:14:20<33:00:23, 15.70s/it]

training loss: 0.8969091176986694


training:   9%|▉         | 735/8300 [3:14:36<32:59:59, 15.70s/it]

training loss: 0.5914223790168762


training:   9%|▉         | 736/8300 [3:14:52<32:59:41, 15.70s/it]

training loss: 0.892091691493988


training:   9%|▉         | 737/8300 [3:15:08<32:59:38, 15.71s/it]

training loss: 0.8626709580421448


training:   9%|▉         | 738/8300 [3:15:23<32:59:14, 15.70s/it]

training loss: 0.843079686164856


training:   9%|▉         | 739/8300 [3:15:39<32:58:53, 15.70s/it]

training loss: 0.6158653497695923


training:   9%|▉         | 740/8300 [3:15:55<32:58:30, 15.70s/it]

training loss: 0.9009673595428467


training:   9%|▉         | 741/8300 [3:16:10<32:58:24, 15.70s/it]

training loss: 1.0383023023605347


training:   9%|▉         | 742/8300 [3:16:26<32:58:37, 15.71s/it]

training loss: 0.8076854944229126


training:   9%|▉         | 743/8300 [3:16:42<32:58:14, 15.71s/it]

training loss: 0.6365648508071899


training:   9%|▉         | 744/8300 [3:16:57<32:57:41, 15.70s/it]

training loss: 0.7671521902084351


training:   9%|▉         | 745/8300 [3:17:13<32:57:23, 15.70s/it]

training loss: 0.8651536107063293


training:   9%|▉         | 746/8300 [3:17:29<32:57:04, 15.70s/it]

training loss: 0.641469419002533


training:   9%|▉         | 747/8300 [3:17:45<32:56:49, 15.70s/it]

training loss: 0.5577396750450134


training:   9%|▉         | 748/8300 [3:18:00<32:56:45, 15.71s/it]

training loss: 0.38151228427886963


training:   9%|▉         | 749/8300 [3:18:16<32:56:11, 15.70s/it]

training loss: 0.5445911288261414


training:   9%|▉         | 750/8300 [3:18:32<32:56:00, 15.70s/it]

training loss: 1.2858483791351318


training:   9%|▉         | 751/8300 [3:18:47<32:55:36, 15.70s/it]

training loss: 1.0066388845443726


training:   9%|▉         | 752/8300 [3:19:03<32:55:38, 15.70s/it]

training loss: 0.5087456107139587


training:   9%|▉         | 753/8300 [3:19:19<32:55:27, 15.71s/it]

training loss: 0.5200842022895813


training:   9%|▉         | 754/8300 [3:19:34<32:55:07, 15.70s/it]

training loss: 1.026544213294983


training:   9%|▉         | 755/8300 [3:19:50<32:54:38, 15.70s/it]

training loss: 0.7553051114082336


training:   9%|▉         | 756/8300 [3:20:06<32:54:46, 15.71s/it]

training loss: 1.0603902339935303


training:   9%|▉         | 757/8300 [3:20:22<32:54:31, 15.71s/it]

training loss: 0.5785320997238159


training:   9%|▉         | 758/8300 [3:20:37<32:54:10, 15.71s/it]

training loss: 0.7794067859649658


training:   9%|▉         | 759/8300 [3:20:53<32:53:45, 15.70s/it]

training loss: 0.8890703320503235


training:   9%|▉         | 760/8300 [3:21:09<32:53:49, 15.71s/it]

training loss: 0.6362093687057495


training:   9%|▉         | 761/8300 [3:21:24<32:53:33, 15.71s/it]

training loss: 0.7562962770462036


training:   9%|▉         | 762/8300 [3:21:40<32:53:09, 15.71s/it]

training loss: 1.0310566425323486


training:   9%|▉         | 763/8300 [3:21:56<32:52:49, 15.71s/it]

training loss: 0.4589175879955292


training:   9%|▉         | 764/8300 [3:22:12<32:52:34, 15.71s/it]

training loss: 0.4963394105434418


training:   9%|▉         | 765/8300 [3:22:27<32:51:55, 15.70s/it]

training loss: 0.774699866771698


training:   9%|▉         | 766/8300 [3:22:43<32:51:29, 15.70s/it]

training loss: 0.5475276708602905


training:   9%|▉         | 767/8300 [3:22:59<32:51:36, 15.70s/it]

training loss: 0.6711187958717346


training:   9%|▉         | 768/8300 [3:23:14<32:51:33, 15.71s/it]

training loss: 1.022742748260498


training:   9%|▉         | 769/8300 [3:23:30<32:51:08, 15.70s/it]

training loss: 0.34082749485969543


training:   9%|▉         | 770/8300 [3:23:46<32:50:44, 15.70s/it]

training loss: 0.9240216016769409


training:   9%|▉         | 771/8300 [3:24:01<32:50:47, 15.71s/it]

training loss: 0.6736278533935547


training:   9%|▉         | 772/8300 [3:24:17<32:50:39, 15.71s/it]

training loss: 0.4436275362968445


training:   9%|▉         | 773/8300 [3:24:33<32:50:21, 15.71s/it]

training loss: 0.940226674079895


training:   9%|▉         | 774/8300 [3:24:49<32:49:48, 15.70s/it]

training loss: 1.081408977508545


training:   9%|▉         | 775/8300 [3:25:04<32:49:45, 15.71s/it]

training loss: 0.7980507016181946


training:   9%|▉         | 776/8300 [3:25:20<32:49:38, 15.71s/it]

training loss: 0.7489517331123352


training:   9%|▉         | 777/8300 [3:25:36<32:49:17, 15.71s/it]

training loss: 1.0964843034744263


training:   9%|▉         | 778/8300 [3:25:51<32:48:48, 15.70s/it]

training loss: 1.2285023927688599


training:   9%|▉         | 779/8300 [3:26:07<32:48:38, 15.71s/it]

training loss: 1.0371665954589844


training:   9%|▉         | 780/8300 [3:26:23<32:48:00, 15.70s/it]

training loss: 0.6364005208015442


training:   9%|▉         | 781/8300 [3:26:38<32:47:40, 15.70s/it]

training loss: 0.5974222421646118


training:   9%|▉         | 782/8300 [3:26:54<32:47:32, 15.70s/it]

training loss: 0.817891538143158


training:   9%|▉         | 783/8300 [3:27:10<32:47:23, 15.70s/it]

training loss: 0.7552186250686646


training:   9%|▉         | 784/8300 [3:27:26<32:47:09, 15.70s/it]

training loss: 0.5818475484848022


training:   9%|▉         | 785/8300 [3:27:41<32:46:33, 15.70s/it]

training loss: 0.6868459582328796


training:   9%|▉         | 786/8300 [3:27:57<32:46:32, 15.70s/it]

training loss: 0.9320915341377258


training:   9%|▉         | 787/8300 [3:28:13<32:46:29, 15.70s/it]

training loss: 0.6887820959091187


training:   9%|▉         | 788/8300 [3:28:28<32:46:08, 15.70s/it]

training loss: 1.1474155187606812


training:  10%|▉         | 789/8300 [3:28:44<32:46:04, 15.71s/it]

training loss: 0.8595126271247864


training:  10%|▉         | 790/8300 [3:29:00<32:45:40, 15.70s/it]

training loss: 0.9459142684936523


training:  10%|▉         | 791/8300 [3:29:16<32:45:11, 15.70s/it]

training loss: 0.8995482921600342


training:  10%|▉         | 792/8300 [3:29:31<32:44:58, 15.70s/it]

training loss: 0.7813800573348999


training:  10%|▉         | 793/8300 [3:29:47<32:44:37, 15.70s/it]

training loss: 0.671129584312439


training:  10%|▉         | 794/8300 [3:30:03<32:44:33, 15.70s/it]

training loss: 0.7784387469291687


training:  10%|▉         | 795/8300 [3:30:18<32:44:15, 15.70s/it]

training loss: 0.7101441621780396


training:  10%|▉         | 796/8300 [3:30:34<32:44:07, 15.70s/it]

training loss: 0.32165980339050293


training:  10%|▉         | 797/8300 [3:30:50<32:43:40, 15.70s/it]

training loss: 0.7929014563560486


training:  10%|▉         | 798/8300 [3:31:05<32:43:46, 15.71s/it]

training loss: 1.1691322326660156


training:  10%|▉         | 799/8300 [3:31:21<32:43:31, 15.71s/it]

training loss: 0.6849414110183716


training:  10%|▉         | 800/8300 [3:31:37<32:43:20, 15.71s/it]

training loss: 0.8462380170822144
training loss: 0.6183061599731445



generating:   0%|          | 0/512 [00:00<?, ?it/s][A

validation loss: 1.5198259353637695
yvojom jednotlivych projektov.
Bezne hotely za 150 milionov dolarov
Dalsim z prikladov plytvania je podla neho 150 milionov dolarov,
ktore pracovnici Pentagonu minuli za ubytovanie v luxusnych hoteloch
v case, ked investicne projekty v Afganistane realizovali.
Ministerstvo obrany v reakcii uviedlo, ze Sopkove obvinenia su prehnane a
co sa tyka hotelov, tak mali iba beznu kvalitu. Konkretny prinos
kritizovanych projektov vsak nepreukazalo.
Pentagon posobi v Afganistane od roku 2001, ked USA zacali spolu so
spojencami utok s cielom zvrhnut vladu Talibanu, co sa vzapati
podarilo. Subezne s bojovou misiou prebiehala snaha o rekonstrukciu
krajiny, na ktorej bolo podla dostupnych udajov vyclenenych vyse
100 miliard dolarov.
Citajte viac
Americania
predlzia svoju vojensku pritomnost v Afganistane
USA
v Afganistane zabili vysokeho velitela siete Al-Kajda
NATO
predlzilo misiu v Afganistane
Dva
roky ho nikto nevidel. Je vodca Talibanu mrtv


generating:   0%|          | 1/512 [00:00<02:05,  4.06it/s][A
generating:   0%|          | 2/512 [00:00<02:05,  4.06it/s][A
generating:   1%|          | 3/512 [00:00<02:05,  4.06it/s][A
generating:   1%|          | 4/512 [00:00<02:03,  4.13it/s][A
generating:   1%|          | 5/512 [00:01<02:01,  4.18it/s][A
generating:   1%|          | 6/512 [00:01<02:00,  4.20it/s][A
generating:   1%|▏         | 7/512 [00:01<01:59,  4.23it/s][A
generating:   2%|▏         | 8/512 [00:01<02:00,  4.20it/s][A
generating:   2%|▏         | 9/512 [00:02<02:00,  4.19it/s][A
generating:   2%|▏         | 10/512 [00:02<01:59,  4.21it/s][A
generating:   2%|▏         | 11/512 [00:02<01:58,  4.23it/s][A
generating:   2%|▏         | 12/512 [00:02<01:58,  4.20it/s][A
generating:   3%|▎         | 13/512 [00:03<01:57,  4.24it/s][A
generating:   3%|▎         | 14/512 [00:03<01:57,  4.25it/s][A
generating:   3%|▎         | 15/512 [00:03<01:57,  4.25it/s][A
generating:   3%|▎         | 16/512 [00:03<02:01

at
skolamov s velkemu jatuna cena argumentu. Takptoviem.
Tuklo sa stal snemovala, ze autorska policia statov a socialne klimingu sposobodil,
ze toto vystrelovat, povedala, ze v roku
2019, kedy sa podla ktoreho bolo nikoho pohybu totiz klesol tento rok urobil americky budu mat
v Stredoeuropskej komisie. To sto
eska privazera sa zastaralo odohodnut, ale niekdajsieho cinsky dolarit, ktore bude od
Nastupel pre Trump v meste Twitter.com/Ryan Putina od severovych cien sa o dva
proti ich budovania. Tomu vy


training:  10%|▉         | 802/8300 [3:34:11<86:18:16, 41.44s/it] 

training loss: 0.7676504850387573


training:  10%|▉         | 803/8300 [3:34:27<70:13:00, 33.72s/it]

training loss: 0.7595049142837524


training:  10%|▉         | 804/8300 [3:34:42<58:57:19, 28.31s/it]

training loss: 1.0273947715759277


training:  10%|▉         | 805/8300 [3:34:58<51:04:13, 24.53s/it]

training loss: 0.5820215940475464


training:  10%|▉         | 806/8300 [3:35:14<45:33:30, 21.89s/it]

training loss: 0.9323669075965881


training:  10%|▉         | 807/8300 [3:35:29<41:41:28, 20.03s/it]

training loss: 1.005977749824524


training:  10%|▉         | 808/8300 [3:35:45<38:58:55, 18.73s/it]

training loss: 0.9127581715583801


training:  10%|▉         | 809/8300 [3:36:01<37:05:07, 17.82s/it]

training loss: 0.9272361397743225


training:  10%|▉         | 810/8300 [3:36:16<35:45:35, 17.19s/it]

training loss: 1.0658941268920898


training:  10%|▉         | 811/8300 [3:36:32<34:49:33, 16.74s/it]

training loss: 0.6364244222640991


training:  10%|▉         | 812/8300 [3:36:48<34:10:42, 16.43s/it]

training loss: 0.6761449575424194


training:  10%|▉         | 813/8300 [3:37:04<33:43:15, 16.21s/it]

training loss: 0.8961235284805298


training:  10%|▉         | 814/8300 [3:37:19<33:23:47, 16.06s/it]

training loss: 0.6455237865447998


training:  10%|▉         | 815/8300 [3:37:35<33:10:39, 15.96s/it]

training loss: 0.7599573731422424


training:  10%|▉         | 816/8300 [3:37:51<33:01:00, 15.88s/it]

training loss: 0.701980710029602


training:  10%|▉         | 817/8300 [3:38:06<32:54:16, 15.83s/it]

training loss: 0.9719274640083313


training:  10%|▉         | 818/8300 [3:38:22<32:49:28, 15.79s/it]

training loss: 0.9000122547149658


training:  10%|▉         | 819/8300 [3:38:38<32:46:21, 15.77s/it]

training loss: 0.9678628444671631


training:  10%|▉         | 820/8300 [3:38:54<32:43:27, 15.75s/it]

training loss: 0.5930153727531433


training:  10%|▉         | 821/8300 [3:39:09<32:41:52, 15.74s/it]

training loss: 1.0716572999954224


training:  10%|▉         | 822/8300 [3:39:25<32:40:38, 15.73s/it]

training loss: 0.44984591007232666


training:  10%|▉         | 823/8300 [3:39:41<32:39:27, 15.72s/it]

training loss: 0.8623334765434265


training:  10%|▉         | 824/8300 [3:39:56<32:38:36, 15.72s/it]

training loss: 0.8193870186805725


training:  10%|▉         | 825/8300 [3:40:12<32:37:46, 15.71s/it]

training loss: 0.6668732166290283


training:  10%|▉         | 826/8300 [3:40:28<32:37:19, 15.71s/it]

training loss: 0.3522351384162903


training:  10%|▉         | 827/8300 [3:40:43<32:36:32, 15.71s/it]

training loss: 0.9461793899536133


training:  10%|▉         | 828/8300 [3:40:59<32:35:53, 15.71s/it]

training loss: 0.8733184337615967


training:  10%|▉         | 829/8300 [3:41:15<32:35:40, 15.71s/it]

training loss: 0.4095429480075836


training:  10%|█         | 830/8300 [3:41:31<32:35:38, 15.71s/it]

training loss: 0.649789571762085


training:  10%|█         | 831/8300 [3:41:46<32:35:16, 15.71s/it]

training loss: 0.6971277594566345


training:  10%|█         | 832/8300 [3:42:02<32:34:50, 15.71s/it]

training loss: 0.9927440285682678


training:  10%|█         | 833/8300 [3:42:18<32:35:25, 15.71s/it]

training loss: 0.7321910262107849


training:  10%|█         | 834/8300 [3:42:33<32:35:24, 15.71s/it]

training loss: 0.8100520372390747


training:  10%|█         | 835/8300 [3:42:49<32:35:28, 15.72s/it]

training loss: 1.052443265914917


training:  10%|█         | 836/8300 [3:43:05<32:35:45, 15.72s/it]

training loss: 0.6016420125961304


training:  10%|█         | 837/8300 [3:43:21<32:35:22, 15.72s/it]

training loss: 0.9463754296302795


training:  10%|█         | 838/8300 [3:43:36<32:35:16, 15.72s/it]

training loss: 0.755560576915741


training:  10%|█         | 839/8300 [3:43:52<32:35:07, 15.72s/it]

training loss: 0.7844785451889038


training:  10%|█         | 840/8300 [3:44:08<32:34:58, 15.72s/it]

training loss: 0.7172748446464539


training:  10%|█         | 841/8300 [3:44:24<32:34:46, 15.72s/it]

training loss: 0.27102312445640564


training:  10%|█         | 842/8300 [3:44:39<32:34:38, 15.73s/it]

training loss: 0.7219119071960449


training:  10%|█         | 843/8300 [3:44:55<32:34:07, 15.72s/it]

training loss: 0.6966711282730103


training:  10%|█         | 844/8300 [3:45:11<32:34:00, 15.72s/it]

training loss: 1.1235911846160889


training:  10%|█         | 845/8300 [3:45:26<32:33:46, 15.72s/it]

training loss: 0.7980802655220032


training:  10%|█         | 846/8300 [3:45:42<32:33:29, 15.72s/it]

training loss: 0.9692875742912292


training:  10%|█         | 847/8300 [3:45:58<32:32:54, 15.72s/it]

training loss: 0.47800618410110474


training:  10%|█         | 848/8300 [3:46:14<32:32:10, 15.72s/it]

training loss: 0.7772244811058044


training:  10%|█         | 849/8300 [3:46:29<32:32:10, 15.72s/it]

training loss: 0.9323434233665466


training:  10%|█         | 850/8300 [3:46:45<32:31:32, 15.72s/it]

training loss: 0.6382444500923157


training:  10%|█         | 851/8300 [3:47:01<32:30:58, 15.71s/it]

training loss: 0.5995804071426392


training:  10%|█         | 852/8300 [3:47:16<32:30:34, 15.71s/it]

training loss: 1.1968107223510742


training:  10%|█         | 853/8300 [3:47:32<32:30:15, 15.71s/it]

training loss: 0.7588163018226624


training:  10%|█         | 854/8300 [3:47:48<32:30:06, 15.71s/it]

training loss: 0.5590786933898926


training:  10%|█         | 855/8300 [3:48:04<32:29:41, 15.71s/it]

training loss: 0.8622581958770752


training:  10%|█         | 856/8300 [3:48:19<32:29:23, 15.71s/it]

training loss: 0.9027646780014038


training:  10%|█         | 857/8300 [3:48:35<32:29:07, 15.71s/it]

training loss: 1.043105125427246


training:  10%|█         | 858/8300 [3:48:51<32:29:05, 15.71s/it]

training loss: 0.8053853511810303


training:  10%|█         | 859/8300 [3:49:06<32:28:37, 15.71s/it]

training loss: 0.43534907698631287


training:  10%|█         | 860/8300 [3:49:22<32:28:38, 15.71s/it]

training loss: 0.6526774168014526


training:  10%|█         | 861/8300 [3:49:38<32:28:40, 15.72s/it]

training loss: 0.7979229688644409


training:  10%|█         | 862/8300 [3:49:54<32:28:08, 15.72s/it]

training loss: 0.766638994216919


training:  10%|█         | 863/8300 [3:50:09<32:28:00, 15.72s/it]

training loss: 0.8483238816261292


training:  10%|█         | 864/8300 [3:50:25<32:27:57, 15.72s/it]

training loss: 0.9544053673744202


training:  10%|█         | 865/8300 [3:50:41<32:27:45, 15.72s/it]

training loss: 0.6342233419418335


training:  10%|█         | 866/8300 [3:50:56<32:27:36, 15.72s/it]

training loss: 0.8857307434082031


training:  10%|█         | 867/8300 [3:51:12<32:27:22, 15.72s/it]

training loss: 0.4826125502586365


training:  10%|█         | 868/8300 [3:51:28<32:27:14, 15.72s/it]

training loss: 0.8860148191452026


training:  10%|█         | 869/8300 [3:51:44<32:27:09, 15.72s/it]

training loss: 1.0242433547973633


training:  10%|█         | 870/8300 [3:51:59<32:26:36, 15.72s/it]

training loss: 1.023853063583374


training:  10%|█         | 871/8300 [3:52:15<32:25:58, 15.72s/it]

training loss: 0.9165832996368408


training:  11%|█         | 872/8300 [3:52:31<32:25:28, 15.71s/it]

training loss: 0.7908670902252197


training:  11%|█         | 873/8300 [3:52:46<32:24:49, 15.71s/it]

training loss: 0.5634293556213379


training:  11%|█         | 874/8300 [3:53:02<32:24:41, 15.71s/it]

training loss: 0.8531063795089722


training:  11%|█         | 875/8300 [3:53:18<32:24:29, 15.71s/it]

training loss: 0.7954533100128174


training:  11%|█         | 876/8300 [3:53:34<32:24:31, 15.72s/it]

training loss: 0.7885934710502625


training:  11%|█         | 877/8300 [3:53:49<32:24:01, 15.71s/it]

training loss: 0.9856286644935608


training:  11%|█         | 878/8300 [3:54:05<32:24:05, 15.72s/it]

training loss: 0.3048946261405945


training:  11%|█         | 879/8300 [3:54:21<32:23:33, 15.71s/it]

training loss: 0.7922990322113037


training:  11%|█         | 880/8300 [3:54:36<32:23:31, 15.72s/it]

training loss: 0.9408237338066101


training:  11%|█         | 881/8300 [3:54:52<32:23:14, 15.72s/it]

training loss: 0.6804340481758118


training:  11%|█         | 882/8300 [3:55:08<32:23:00, 15.72s/it]

training loss: 0.9200270175933838


training:  11%|█         | 883/8300 [3:55:24<32:23:00, 15.72s/it]

training loss: 0.5930160880088806


training:  11%|█         | 884/8300 [3:55:39<32:22:42, 15.72s/it]

training loss: 0.6756925582885742


training:  11%|█         | 885/8300 [3:55:55<32:22:36, 15.72s/it]

training loss: 0.8489891886711121


training:  11%|█         | 886/8300 [3:56:11<32:22:08, 15.72s/it]

training loss: 0.8955510258674622


training:  11%|█         | 887/8300 [3:56:27<32:21:49, 15.72s/it]

training loss: 0.8091815114021301


training:  11%|█         | 888/8300 [3:56:42<32:21:31, 15.72s/it]

training loss: 0.7208127379417419


training:  11%|█         | 889/8300 [3:56:58<32:21:17, 15.72s/it]

training loss: 0.6480879187583923


training:  11%|█         | 890/8300 [3:57:14<32:20:46, 15.71s/it]

training loss: 0.8236005306243896


training:  11%|█         | 891/8300 [3:57:29<32:20:34, 15.72s/it]

training loss: 0.8683830499649048


training:  11%|█         | 892/8300 [3:57:45<32:20:17, 15.72s/it]

training loss: 0.6614165306091309


training:  11%|█         | 893/8300 [3:58:01<32:19:54, 15.71s/it]

training loss: 1.180759310722351


training:  11%|█         | 894/8300 [3:58:16<32:19:28, 15.71s/it]

training loss: 0.6266728639602661


training:  11%|█         | 895/8300 [3:58:32<32:20:13, 15.72s/it]

training loss: 0.7882309556007385


training:  11%|█         | 896/8300 [3:58:48<32:19:33, 15.72s/it]

training loss: 0.7971887588500977


training:  11%|█         | 897/8300 [3:59:04<32:19:03, 15.72s/it]

training loss: 0.7798535823822021


training:  11%|█         | 898/8300 [3:59:19<32:18:47, 15.72s/it]

training loss: 0.47268664836883545


training:  11%|█         | 899/8300 [3:59:35<32:18:13, 15.71s/it]

training loss: 0.5366133451461792


training:  11%|█         | 900/8300 [3:59:51<32:17:31, 15.71s/it]

training loss: 0.9165128469467163
training loss: 0.7223961353302002


training:  11%|█         | 901/8300 [4:00:08<33:04:35, 16.09s/it]

validation loss: 1.494273066520691


training:  11%|█         | 902/8300 [4:00:24<32:51:17, 15.99s/it]

training loss: 1.1244301795959473


training:  11%|█         | 903/8300 [4:00:39<32:40:40, 15.90s/it]

training loss: 1.2585347890853882


training:  11%|█         | 904/8300 [4:00:55<32:33:10, 15.85s/it]

training loss: 0.46074041724205017


training:  11%|█         | 905/8300 [4:01:11<32:28:06, 15.81s/it]

training loss: 0.8550482988357544


training:  11%|█         | 906/8300 [4:01:26<32:24:57, 15.78s/it]

training loss: 0.6890712380409241


training:  11%|█         | 907/8300 [4:01:42<32:21:56, 15.76s/it]

training loss: 0.5693202614784241


training:  11%|█         | 908/8300 [4:01:58<32:19:58, 15.75s/it]

training loss: 0.5590862035751343


training:  11%|█         | 909/8300 [4:02:14<32:18:45, 15.74s/it]

training loss: 0.6945297718048096


training:  11%|█         | 910/8300 [4:02:29<32:17:55, 15.73s/it]

training loss: 0.6373333930969238


training:  11%|█         | 911/8300 [4:02:45<32:16:44, 15.73s/it]

training loss: 0.8124681115150452


training:  11%|█         | 912/8300 [4:03:01<32:16:02, 15.72s/it]

training loss: 0.8466005325317383


training:  11%|█         | 913/8300 [4:03:16<32:15:26, 15.72s/it]

training loss: 1.1153161525726318


training:  11%|█         | 914/8300 [4:03:32<32:15:07, 15.72s/it]

training loss: 1.2932178974151611


training:  11%|█         | 915/8300 [4:03:48<32:14:42, 15.72s/it]

training loss: 0.7786167860031128


training:  11%|█         | 916/8300 [4:04:04<32:14:16, 15.72s/it]

training loss: 0.6429492235183716


training:  11%|█         | 917/8300 [4:04:19<32:13:57, 15.72s/it]

training loss: 0.8847657442092896


training:  11%|█         | 918/8300 [4:04:35<32:13:37, 15.72s/it]

training loss: 0.905633270740509


training:  11%|█         | 919/8300 [4:04:51<32:13:09, 15.71s/it]

training loss: 0.6683273911476135


training:  11%|█         | 920/8300 [4:05:06<32:13:05, 15.72s/it]

training loss: 0.9005843997001648


training:  11%|█         | 921/8300 [4:05:22<32:12:48, 15.72s/it]

training loss: 0.6950243711471558


training:  11%|█         | 922/8300 [4:05:38<32:12:33, 15.72s/it]

training loss: 0.745751142501831


training:  11%|█         | 923/8300 [4:05:54<32:11:52, 15.71s/it]

training loss: 0.6541450619697571


training:  11%|█         | 924/8300 [4:06:09<32:11:49, 15.71s/it]

training loss: 0.9506661891937256


training:  11%|█         | 925/8300 [4:06:25<32:11:20, 15.71s/it]

training loss: 0.7594103813171387


training:  11%|█         | 926/8300 [4:06:41<32:14:55, 15.74s/it]

training loss: 0.6075665950775146


training:  11%|█         | 927/8300 [4:06:56<32:13:13, 15.73s/it]

training loss: 0.631252646446228


training:  11%|█         | 928/8300 [4:07:12<32:12:12, 15.73s/it]

training loss: 1.0335613489151


training:  11%|█         | 929/8300 [4:07:28<32:11:42, 15.72s/it]

training loss: 1.1206250190734863


training:  11%|█         | 930/8300 [4:07:44<32:11:06, 15.72s/it]

training loss: 0.7521581649780273


training:  11%|█         | 931/8300 [4:07:59<32:10:38, 15.72s/it]

training loss: 0.6371448636054993


training:  11%|█         | 932/8300 [4:08:15<32:09:51, 15.72s/it]

training loss: 0.9768211841583252


training:  11%|█         | 933/8300 [4:08:31<32:09:16, 15.71s/it]

training loss: 0.46923989057540894


training:  11%|█▏        | 934/8300 [4:08:46<32:08:57, 15.71s/it]

training loss: 0.6317936778068542


training:  11%|█▏        | 935/8300 [4:09:02<32:08:31, 15.71s/it]

training loss: 0.865283191204071


training:  11%|█▏        | 936/8300 [4:09:18<32:08:12, 15.71s/it]

training loss: 0.7352436184883118


training:  11%|█▏        | 937/8300 [4:09:34<32:08:06, 15.71s/it]

training loss: 1.0121201276779175


training:  11%|█▏        | 938/8300 [4:09:49<32:07:44, 15.71s/it]

training loss: 0.3440772294998169


training:  11%|█▏        | 939/8300 [4:10:05<32:07:36, 15.71s/it]

training loss: 0.7309818267822266


training:  11%|█▏        | 940/8300 [4:10:21<32:07:08, 15.71s/it]

training loss: 0.886253833770752


training:  11%|█▏        | 941/8300 [4:10:36<32:06:53, 15.71s/it]

training loss: 0.9676172137260437


training:  11%|█▏        | 942/8300 [4:10:52<32:06:24, 15.71s/it]

training loss: 0.8538554310798645


training:  11%|█▏        | 943/8300 [4:11:08<32:06:16, 15.71s/it]

training loss: 0.7260270714759827


training:  11%|█▏        | 944/8300 [4:11:24<32:06:09, 15.71s/it]

training loss: 0.5808035731315613


training:  11%|█▏        | 945/8300 [4:11:39<32:05:41, 15.71s/it]

training loss: 0.5548800230026245


training:  11%|█▏        | 946/8300 [4:11:55<32:05:20, 15.71s/it]

training loss: 0.898100733757019


training:  11%|█▏        | 947/8300 [4:12:11<32:05:05, 15.71s/it]

training loss: 0.5443575382232666


training:  11%|█▏        | 948/8300 [4:12:26<32:04:46, 15.71s/it]

training loss: 0.9691612720489502


training:  11%|█▏        | 949/8300 [4:12:42<32:04:28, 15.71s/it]

training loss: 0.6714222431182861


training:  11%|█▏        | 950/8300 [4:12:58<32:04:34, 15.71s/it]

training loss: 0.4668627083301544


training:  11%|█▏        | 951/8300 [4:13:14<32:04:16, 15.71s/it]

training loss: 1.0349910259246826


training:  11%|█▏        | 952/8300 [4:13:29<32:04:15, 15.71s/it]

training loss: 0.7429545521736145


training:  11%|█▏        | 953/8300 [4:13:45<32:03:54, 15.71s/it]

training loss: 0.8097779750823975


training:  11%|█▏        | 954/8300 [4:14:01<32:03:31, 15.71s/it]

training loss: 0.6884850263595581


training:  12%|█▏        | 955/8300 [4:14:16<32:03:05, 15.71s/it]

training loss: 0.6481908559799194


training:  12%|█▏        | 956/8300 [4:14:32<32:02:41, 15.71s/it]

training loss: 0.5170092582702637


training:  12%|█▏        | 957/8300 [4:14:48<32:02:55, 15.71s/it]

training loss: 0.8223567008972168


training:  12%|█▏        | 958/8300 [4:15:04<32:02:27, 15.71s/it]

training loss: 0.7846460342407227


training:  12%|█▏        | 959/8300 [4:15:19<32:01:58, 15.71s/it]

training loss: 0.7185147404670715


training:  12%|█▏        | 960/8300 [4:15:35<32:01:40, 15.71s/it]

training loss: 0.9219322800636292


training:  12%|█▏        | 961/8300 [4:15:51<32:01:26, 15.71s/it]

training loss: 0.8777837753295898


training:  12%|█▏        | 962/8300 [4:16:06<32:01:11, 15.71s/it]

training loss: 0.7355200052261353


training:  12%|█▏        | 963/8300 [4:16:22<32:00:49, 15.71s/it]

training loss: 0.7087690830230713


training:  12%|█▏        | 964/8300 [4:16:38<32:00:47, 15.71s/it]

training loss: 0.5065323710441589


training:  12%|█▏        | 965/8300 [4:16:53<32:00:22, 15.71s/it]

training loss: 1.0373648405075073


training:  12%|█▏        | 966/8300 [4:17:09<32:00:18, 15.71s/it]

training loss: 0.8570155501365662


training:  12%|█▏        | 967/8300 [4:17:25<32:00:15, 15.71s/it]

training loss: 0.7640236020088196


training:  12%|█▏        | 968/8300 [4:17:41<31:59:44, 15.71s/it]

training loss: 0.7029377222061157


training:  12%|█▏        | 969/8300 [4:17:56<31:59:19, 15.71s/it]

training loss: 0.7703854441642761


training:  12%|█▏        | 970/8300 [4:18:12<31:59:09, 15.71s/it]

training loss: 0.5287368297576904


training:  12%|█▏        | 971/8300 [4:18:28<31:59:25, 15.71s/it]

training loss: 0.845012903213501


training:  12%|█▏        | 972/8300 [4:18:43<31:58:44, 15.71s/it]

training loss: 0.8805484175682068


training:  12%|█▏        | 973/8300 [4:18:59<31:58:44, 15.71s/it]

training loss: 0.5256516933441162


training:  12%|█▏        | 974/8300 [4:19:15<31:58:30, 15.71s/it]

training loss: 0.7710055708885193


training:  12%|█▏        | 975/8300 [4:19:31<31:57:58, 15.71s/it]

training loss: 1.1677863597869873


training:  12%|█▏        | 976/8300 [4:19:46<31:57:24, 15.71s/it]

training loss: 1.0652493238449097


training:  12%|█▏        | 977/8300 [4:20:02<31:57:07, 15.71s/it]

training loss: 0.5654165744781494


training:  12%|█▏        | 978/8300 [4:20:18<31:56:54, 15.71s/it]

training loss: 1.1972533464431763


training:  12%|█▏        | 979/8300 [4:20:33<31:56:34, 15.71s/it]

training loss: 0.8677335977554321


training:  12%|█▏        | 980/8300 [4:20:49<31:56:12, 15.71s/it]

training loss: 0.8470238447189331


training:  12%|█▏        | 981/8300 [4:21:05<31:56:13, 15.71s/it]

training loss: 0.5922444462776184


training:  12%|█▏        | 982/8300 [4:21:21<31:56:01, 15.71s/it]

training loss: 0.5456188917160034


training:  12%|█▏        | 983/8300 [4:21:36<31:55:51, 15.71s/it]

training loss: 0.5900300145149231


training:  12%|█▏        | 984/8300 [4:21:52<31:55:10, 15.71s/it]

training loss: 0.5643415451049805


training:  12%|█▏        | 985/8300 [4:22:08<31:54:57, 15.71s/it]

training loss: 0.6572137475013733


training:  12%|█▏        | 986/8300 [4:22:23<31:54:32, 15.71s/it]

training loss: 0.7523581981658936


training:  12%|█▏        | 987/8300 [4:22:39<31:54:10, 15.71s/it]

training loss: 0.6324693560600281


training:  12%|█▏        | 988/8300 [4:22:55<31:54:01, 15.71s/it]

training loss: 0.8276487588882446


training:  12%|█▏        | 989/8300 [4:23:10<31:53:50, 15.71s/it]

training loss: 0.9384766817092896


training:  12%|█▏        | 990/8300 [4:23:26<31:53:52, 15.71s/it]

training loss: 0.7706326246261597


training:  12%|█▏        | 991/8300 [4:23:42<31:53:46, 15.71s/it]

training loss: 0.6602840423583984


training:  12%|█▏        | 992/8300 [4:23:58<31:53:23, 15.71s/it]

training loss: 0.7036154866218567


training:  12%|█▏        | 993/8300 [4:24:13<31:53:22, 15.71s/it]

training loss: 0.8050838708877563


training:  12%|█▏        | 994/8300 [4:24:29<31:52:54, 15.71s/it]

training loss: 0.8296599984169006


training:  12%|█▏        | 995/8300 [4:24:45<31:52:43, 15.71s/it]

training loss: 0.9284281134605408


training:  12%|█▏        | 996/8300 [4:25:00<31:52:32, 15.71s/it]

training loss: 1.0573756694793701


training:  12%|█▏        | 997/8300 [4:25:16<31:52:06, 15.71s/it]

training loss: 0.9961508512496948


training:  12%|█▏        | 998/8300 [4:25:32<31:51:56, 15.71s/it]

training loss: 0.7713031768798828


training:  12%|█▏        | 999/8300 [4:25:48<31:51:30, 15.71s/it]

training loss: 0.6973240375518799


training:  12%|█▏        | 1000/8300 [4:26:03<31:50:59, 15.71s/it]

training loss: 0.7388222217559814
training loss: 0.8069525957107544


training:  12%|█▏        | 1001/8300 [4:26:20<32:36:52, 16.09s/it]

validation loss: 1.4871151447296143


training:  12%|█▏        | 1002/8300 [4:26:36<32:23:34, 15.98s/it]

training loss: 0.9021236896514893


training:  12%|█▏        | 1003/8300 [4:26:52<32:13:15, 15.90s/it]

training loss: 1.2014527320861816


training:  12%|█▏        | 1004/8300 [4:27:07<32:05:47, 15.84s/it]

training loss: 0.4873160421848297


training:  12%|█▏        | 1005/8300 [4:27:23<32:00:51, 15.80s/it]

training loss: 1.0446062088012695


training:  12%|█▏        | 1006/8300 [4:27:39<31:57:32, 15.77s/it]

training loss: 0.6130998134613037


training:  12%|█▏        | 1007/8300 [4:27:55<31:54:53, 15.75s/it]

training loss: 0.7127419114112854


training:  12%|█▏        | 1008/8300 [4:28:10<31:53:00, 15.74s/it]

training loss: 0.6905192732810974


training:  12%|█▏        | 1009/8300 [4:28:26<31:51:20, 15.73s/it]

training loss: 0.4433252811431885


training:  12%|█▏        | 1010/8300 [4:28:42<31:50:02, 15.72s/it]

training loss: 0.7488996386528015


training:  12%|█▏        | 1011/8300 [4:28:57<31:49:29, 15.72s/it]

training loss: 1.0559667348861694


training:  12%|█▏        | 1012/8300 [4:29:13<31:48:59, 15.72s/it]

training loss: 0.8098814487457275


training:  12%|█▏        | 1013/8300 [4:29:29<31:48:31, 15.71s/it]

training loss: 0.6199465394020081


training:  12%|█▏        | 1014/8300 [4:29:44<31:48:12, 15.71s/it]

training loss: 0.9822864532470703


training:  12%|█▏        | 1015/8300 [4:30:00<31:47:58, 15.71s/it]

training loss: 0.45758283138275146


training:  12%|█▏        | 1016/8300 [4:30:16<31:47:40, 15.71s/it]

training loss: 0.9686211943626404


training:  12%|█▏        | 1017/8300 [4:30:32<31:47:14, 15.71s/it]

training loss: 0.7935013175010681


training:  12%|█▏        | 1018/8300 [4:30:47<31:46:41, 15.71s/it]

training loss: 0.9191261529922485


training:  12%|█▏        | 1019/8300 [4:31:03<31:46:26, 15.71s/it]

training loss: 0.9054420590400696


training:  12%|█▏        | 1020/8300 [4:31:19<31:46:16, 15.71s/it]

training loss: 0.7352917790412903


training:  12%|█▏        | 1021/8300 [4:31:34<31:46:11, 15.71s/it]

training loss: 1.0051653385162354


training:  12%|█▏        | 1022/8300 [4:31:50<31:45:32, 15.71s/it]

training loss: 0.3534615635871887


training:  12%|█▏        | 1023/8300 [4:32:06<31:45:28, 15.71s/it]

training loss: 0.6271733045578003


training:  12%|█▏        | 1024/8300 [4:32:22<31:44:58, 15.71s/it]

training loss: 0.834036648273468


training:  12%|█▏        | 1025/8300 [4:32:37<31:44:38, 15.71s/it]

training loss: 0.9152848720550537


training:  12%|█▏        | 1026/8300 [4:32:53<31:44:00, 15.71s/it]

training loss: 0.7619704604148865


training:  12%|█▏        | 1027/8300 [4:33:09<31:43:56, 15.71s/it]

training loss: 0.860793948173523


training:  12%|█▏        | 1028/8300 [4:33:24<31:43:48, 15.71s/it]

training loss: 1.1595189571380615


training:  12%|█▏        | 1029/8300 [4:33:40<31:43:45, 15.71s/it]

training loss: 0.7330213785171509


training:  12%|█▏        | 1030/8300 [4:33:56<31:43:12, 15.71s/it]

training loss: 0.8795535564422607


training:  12%|█▏        | 1031/8300 [4:34:12<31:42:52, 15.71s/it]

training loss: 0.7147600054740906


training:  12%|█▏        | 1032/8300 [4:34:27<31:42:33, 15.71s/it]

training loss: 0.5156841278076172


training:  12%|█▏        | 1033/8300 [4:34:43<31:42:01, 15.70s/it]

training loss: 1.1459615230560303


training:  12%|█▏        | 1034/8300 [4:34:59<31:41:39, 15.70s/it]

training loss: 0.8585113883018494


training:  12%|█▏        | 1035/8300 [4:35:14<31:41:37, 15.71s/it]

training loss: 0.9137292504310608


training:  12%|█▏        | 1036/8300 [4:35:30<31:41:47, 15.71s/it]

training loss: 1.1859644651412964


training:  12%|█▏        | 1037/8300 [4:35:46<31:41:10, 15.71s/it]

training loss: 0.8453478217124939


training:  13%|█▎        | 1038/8300 [4:36:01<31:40:57, 15.71s/it]

training loss: 0.7216466665267944


training:  13%|█▎        | 1039/8300 [4:36:17<31:40:31, 15.70s/it]

training loss: 0.6021060943603516


training:  13%|█▎        | 1040/8300 [4:36:33<31:40:27, 15.71s/it]

training loss: 0.6139160394668579


training:  13%|█▎        | 1041/8300 [4:36:49<31:39:53, 15.70s/it]

training loss: 0.7207551598548889


training:  13%|█▎        | 1042/8300 [4:37:04<31:39:40, 15.70s/it]

training loss: 0.9760488271713257


training:  13%|█▎        | 1043/8300 [4:37:20<31:39:28, 15.70s/it]

training loss: 0.9703161716461182


training:  13%|█▎        | 1044/8300 [4:37:36<31:39:55, 15.71s/it]

training loss: 0.6823721528053284


training:  13%|█▎        | 1045/8300 [4:37:51<31:39:21, 15.71s/it]

training loss: 0.8833732604980469


training:  13%|█▎        | 1046/8300 [4:38:07<31:38:47, 15.71s/it]

training loss: 0.8667355179786682


training:  13%|█▎        | 1047/8300 [4:38:23<31:38:31, 15.71s/it]

training loss: 0.8738633394241333


training:  13%|█▎        | 1048/8300 [4:38:39<31:38:22, 15.71s/it]

training loss: 0.569298505783081


training:  13%|█▎        | 1049/8300 [4:38:54<31:37:55, 15.70s/it]

training loss: 0.9613350629806519


training:  13%|█▎        | 1050/8300 [4:39:10<31:37:47, 15.71s/it]

training loss: 0.9925888776779175


training:  13%|█▎        | 1051/8300 [4:39:26<31:37:51, 15.71s/it]

training loss: 1.0308067798614502


training:  13%|█▎        | 1052/8300 [4:39:41<31:37:20, 15.71s/it]

training loss: 0.7739803791046143


training:  13%|█▎        | 1053/8300 [4:39:57<31:37:16, 15.71s/it]

training loss: 1.112414002418518


training:  13%|█▎        | 1054/8300 [4:40:13<31:36:45, 15.71s/it]

training loss: 0.7513262629508972


training:  13%|█▎        | 1055/8300 [4:40:28<31:36:47, 15.71s/it]

training loss: 0.8692049384117126


training:  13%|█▎        | 1056/8300 [4:40:44<31:36:36, 15.71s/it]

training loss: 0.7086730599403381


training:  13%|█▎        | 1057/8300 [4:41:00<31:36:26, 15.71s/it]

training loss: 1.1958022117614746


training:  13%|█▎        | 1058/8300 [4:41:16<31:36:05, 15.71s/it]

training loss: 0.7696067094802856


training:  13%|█▎        | 1059/8300 [4:41:31<31:36:14, 15.71s/it]

training loss: 0.7128508687019348


training:  13%|█▎        | 1060/8300 [4:41:47<31:35:42, 15.71s/it]

training loss: 0.9543119072914124


training:  13%|█▎        | 1061/8300 [4:42:03<31:35:09, 15.71s/it]

training loss: 1.3675731420516968


training:  13%|█▎        | 1062/8300 [4:42:18<31:34:36, 15.71s/it]

training loss: 0.9578741192817688


training:  13%|█▎        | 1063/8300 [4:42:34<31:34:29, 15.71s/it]

training loss: 0.9694619178771973


training:  13%|█▎        | 1064/8300 [4:42:50<31:34:13, 15.71s/it]

training loss: 0.5525768995285034


training:  13%|█▎        | 1065/8300 [4:43:06<31:33:57, 15.71s/it]

training loss: 0.6338501572608948


training:  13%|█▎        | 1066/8300 [4:43:21<31:33:49, 15.71s/it]

training loss: 0.7352051734924316


training:  13%|█▎        | 1067/8300 [4:43:37<31:33:48, 15.71s/it]

training loss: 0.7296200394630432


training:  13%|█▎        | 1068/8300 [4:43:53<31:33:35, 15.71s/it]

training loss: 1.120066523551941


training:  13%|█▎        | 1069/8300 [4:44:08<31:33:27, 15.71s/it]

training loss: 0.7499479651451111


training:  13%|█▎        | 1070/8300 [4:44:24<31:33:01, 15.71s/it]

training loss: 0.6179578304290771


training:  13%|█▎        | 1071/8300 [4:44:40<31:32:42, 15.71s/it]

training loss: 1.0171666145324707


training:  13%|█▎        | 1072/8300 [4:44:56<31:32:05, 15.71s/it]

training loss: 0.6545669436454773


training:  13%|█▎        | 1073/8300 [4:45:11<31:31:59, 15.71s/it]

training loss: 0.8606421947479248


training:  13%|█▎        | 1074/8300 [4:45:27<31:32:03, 15.71s/it]

training loss: 1.0933928489685059


training:  13%|█▎        | 1075/8300 [4:45:43<31:31:46, 15.71s/it]

training loss: 1.2114949226379395


training:  13%|█▎        | 1076/8300 [4:45:58<31:31:32, 15.71s/it]

training loss: 1.1817858219146729


training:  13%|█▎        | 1077/8300 [4:46:14<31:31:03, 15.71s/it]

training loss: 0.671538233757019


training:  13%|█▎        | 1078/8300 [4:46:30<31:31:04, 15.71s/it]

training loss: 0.8351279497146606


training:  13%|█▎        | 1079/8300 [4:46:45<31:30:28, 15.71s/it]

training loss: 1.135388731956482


training:  13%|█▎        | 1080/8300 [4:47:01<31:30:05, 15.71s/it]

training loss: 1.0815578699111938


training:  13%|█▎        | 1081/8300 [4:47:17<31:30:03, 15.71s/it]

training loss: 0.7277410626411438


training:  13%|█▎        | 1082/8300 [4:47:33<31:30:07, 15.71s/it]

training loss: 0.5725207328796387


training:  13%|█▎        | 1083/8300 [4:47:48<31:29:42, 15.71s/it]

training loss: 0.482960045337677


training:  13%|█▎        | 1084/8300 [4:48:04<31:29:30, 15.71s/it]

training loss: 0.9236422777175903


training:  13%|█▎        | 1085/8300 [4:48:20<31:29:04, 15.71s/it]

training loss: 0.9715949892997742


training:  13%|█▎        | 1086/8300 [4:48:35<31:28:40, 15.71s/it]

training loss: 0.9088765978813171


training:  13%|█▎        | 1087/8300 [4:48:51<31:28:10, 15.71s/it]

training loss: 0.4786236882209778


training:  13%|█▎        | 1088/8300 [4:49:07<31:27:56, 15.71s/it]

training loss: 0.9704323410987854


training:  13%|█▎        | 1089/8300 [4:49:23<31:27:40, 15.71s/it]

training loss: 0.4105795621871948


training:  13%|█▎        | 1090/8300 [4:49:38<31:27:31, 15.71s/it]

training loss: 1.1086167097091675


training:  13%|█▎        | 1091/8300 [4:49:54<31:27:13, 15.71s/it]

training loss: 0.7204686403274536


training:  13%|█▎        | 1092/8300 [4:50:10<31:27:02, 15.71s/it]

training loss: 0.997852087020874


training:  13%|█▎        | 1093/8300 [4:50:25<31:26:50, 15.71s/it]

training loss: 0.7239488959312439


training:  13%|█▎        | 1094/8300 [4:50:41<31:26:27, 15.71s/it]

training loss: 0.9045023918151855


training:  13%|█▎        | 1095/8300 [4:50:57<31:26:42, 15.71s/it]

training loss: 0.8397433161735535


training:  13%|█▎        | 1096/8300 [4:51:13<31:26:20, 15.71s/it]

training loss: 0.8679656982421875


training:  13%|█▎        | 1097/8300 [4:51:28<31:26:09, 15.71s/it]

training loss: 0.9827380776405334


training:  13%|█▎        | 1098/8300 [4:51:44<31:25:51, 15.71s/it]

training loss: 0.5809305906295776


training:  13%|█▎        | 1099/8300 [4:52:00<31:25:33, 15.71s/it]

training loss: 0.9281222224235535


training:  13%|█▎        | 1100/8300 [4:52:15<31:25:04, 15.71s/it]

training loss: 0.8509846925735474
training loss: 1.0897570848464966


training:  13%|█▎        | 1101/8300 [4:52:32<32:11:14, 16.10s/it]

validation loss: 1.4884570837020874


training:  13%|█▎        | 1102/8300 [4:52:48<31:57:34, 15.98s/it]

training loss: 1.0137231349945068


training:  13%|█▎        | 1103/8300 [4:53:04<31:47:15, 15.90s/it]

training loss: 0.7368389368057251


training:  13%|█▎        | 1104/8300 [4:53:20<31:39:59, 15.84s/it]

training loss: 0.5437412261962891


training:  13%|█▎        | 1105/8300 [4:53:35<31:35:04, 15.80s/it]

training loss: 1.0323424339294434


training:  13%|█▎        | 1106/8300 [4:53:51<31:31:16, 15.77s/it]

training loss: 1.1757662296295166


training:  13%|█▎        | 1107/8300 [4:54:07<31:28:43, 15.75s/it]

training loss: 0.793305516242981


training:  13%|█▎        | 1108/8300 [4:54:22<31:26:40, 15.74s/it]

training loss: 0.547100841999054


training:  13%|█▎        | 1109/8300 [4:54:38<31:25:10, 15.73s/it]

training loss: 1.0090556144714355


training:  13%|█▎        | 1110/8300 [4:54:54<31:23:53, 15.72s/it]

training loss: 0.7159000039100647


training:  13%|█▎        | 1111/8300 [4:55:09<31:23:35, 15.72s/it]

training loss: 0.7189377546310425


training:  13%|█▎        | 1112/8300 [4:55:25<31:22:53, 15.72s/it]

training loss: 0.43711143732070923


training:  13%|█▎        | 1113/8300 [4:55:41<31:22:12, 15.71s/it]

training loss: 0.6138873100280762


training:  13%|█▎        | 1114/8300 [4:55:57<31:21:50, 15.71s/it]

training loss: 0.2878189980983734


training:  13%|█▎        | 1115/8300 [4:56:12<31:21:17, 15.71s/it]

training loss: 0.7256447076797485


training:  13%|█▎        | 1116/8300 [4:56:28<31:20:56, 15.71s/it]

training loss: 0.8631194233894348


training:  13%|█▎        | 1117/8300 [4:56:44<31:20:25, 15.71s/it]

training loss: 0.7712282538414001


training:  13%|█▎        | 1118/8300 [4:56:59<31:20:12, 15.71s/it]

training loss: 0.7158867120742798


training:  13%|█▎        | 1119/8300 [4:57:15<31:19:51, 15.71s/it]

training loss: 0.8432581424713135


training:  13%|█▎        | 1120/8300 [4:57:31<31:19:54, 15.71s/it]

training loss: 0.580837070941925


training:  14%|█▎        | 1121/8300 [4:57:47<31:19:52, 15.71s/it]

training loss: 0.6899745464324951


training:  14%|█▎        | 1122/8300 [4:58:02<31:19:33, 15.71s/it]

training loss: 0.8029516935348511


training:  14%|█▎        | 1123/8300 [4:58:18<31:19:17, 15.71s/it]

training loss: 0.8769233226776123


training:  14%|█▎        | 1124/8300 [4:58:34<31:19:05, 15.71s/it]

training loss: 0.6949602365493774


training:  14%|█▎        | 1125/8300 [4:58:49<31:18:44, 15.71s/it]

training loss: 0.772994339466095


training:  14%|█▎        | 1126/8300 [4:59:05<31:18:34, 15.71s/it]

training loss: 0.5032119154930115


training:  14%|█▎        | 1127/8300 [4:59:21<31:17:59, 15.71s/it]

training loss: 0.9625622034072876


training:  14%|█▎        | 1128/8300 [4:59:37<31:18:01, 15.71s/it]

training loss: 0.9246782064437866


training:  14%|█▎        | 1129/8300 [4:59:52<31:17:35, 15.71s/it]

training loss: 0.7885276675224304


training:  14%|█▎        | 1130/8300 [5:00:08<31:17:15, 15.71s/it]

training loss: 0.844218373298645


training:  14%|█▎        | 1131/8300 [5:00:24<31:17:00, 15.71s/it]

training loss: 0.3676266074180603


training:  14%|█▎        | 1132/8300 [5:00:39<31:16:27, 15.71s/it]

training loss: 1.0130051374435425


training:  14%|█▎        | 1133/8300 [5:00:55<31:16:23, 15.71s/it]

training loss: 0.43640559911727905


training:  14%|█▎        | 1134/8300 [5:01:11<31:16:04, 15.71s/it]

training loss: 1.0144214630126953


training:  14%|█▎        | 1135/8300 [5:01:26<31:15:54, 15.71s/it]

training loss: 0.7720308303833008


training:  14%|█▎        | 1136/8300 [5:01:42<31:15:35, 15.71s/it]

training loss: 0.5704704523086548


training:  14%|█▎        | 1137/8300 [5:01:58<31:15:32, 15.71s/it]

training loss: 0.948358416557312


training:  14%|█▎        | 1138/8300 [5:02:14<31:15:06, 15.71s/it]

training loss: 0.939937949180603


training:  14%|█▎        | 1139/8300 [5:02:29<31:15:10, 15.71s/it]

training loss: 0.7970104217529297


training:  14%|█▎        | 1140/8300 [5:02:45<31:14:28, 15.71s/it]

training loss: 0.8205592036247253


training:  14%|█▎        | 1141/8300 [5:03:01<31:14:22, 15.71s/it]

training loss: 0.4361756443977356


training:  14%|█▍        | 1142/8300 [5:03:16<31:13:55, 15.71s/it]

training loss: 0.6579771637916565


training:  14%|█▍        | 1143/8300 [5:03:32<31:13:42, 15.71s/it]

training loss: 0.38784635066986084


training:  14%|█▍        | 1144/8300 [5:03:48<31:13:23, 15.71s/it]

training loss: 0.7359970808029175


training:  14%|█▍        | 1145/8300 [5:04:04<31:13:17, 15.71s/it]

training loss: 0.8198003768920898


training:  14%|█▍        | 1146/8300 [5:04:19<31:12:40, 15.71s/it]

training loss: 0.6584352850914001


training:  14%|█▍        | 1147/8300 [5:04:35<31:12:48, 15.71s/it]

training loss: 0.3758825659751892


training:  14%|█▍        | 1148/8300 [5:04:51<31:12:35, 15.71s/it]

training loss: 0.7696084380149841


training:  14%|█▍        | 1149/8300 [5:05:06<31:12:35, 15.71s/it]

training loss: 1.0181126594543457


training:  14%|█▍        | 1150/8300 [5:05:22<31:11:50, 15.71s/it]

training loss: 0.7295086979866028


training:  14%|█▍        | 1151/8300 [5:05:38<31:11:42, 15.71s/it]

training loss: 0.6725143790245056


training:  14%|█▍        | 1152/8300 [5:05:54<31:11:16, 15.71s/it]

training loss: 0.9443617463111877


training:  14%|█▍        | 1153/8300 [5:06:09<31:11:07, 15.71s/it]

training loss: 0.8728205561637878


training:  14%|█▍        | 1154/8300 [5:06:25<31:10:46, 15.71s/it]

training loss: 0.5971381068229675


training:  14%|█▍        | 1155/8300 [5:06:41<31:10:23, 15.71s/it]

training loss: 0.983340859413147


training:  14%|█▍        | 1156/8300 [5:06:56<31:10:04, 15.71s/it]

training loss: 0.7245751619338989


training:  14%|█▍        | 1157/8300 [5:07:12<31:09:54, 15.71s/it]

training loss: 0.6579145789146423


training:  14%|█▍        | 1158/8300 [5:07:28<31:09:40, 15.71s/it]

training loss: 0.7943825721740723


training:  14%|█▍        | 1159/8300 [5:07:43<31:09:36, 15.71s/it]

training loss: 0.7843302488327026


training:  14%|█▍        | 1160/8300 [5:07:59<31:09:09, 15.71s/it]

training loss: 0.5771906971931458


training:  14%|█▍        | 1161/8300 [5:08:15<31:09:02, 15.71s/it]

training loss: 0.8113357424736023


training:  14%|█▍        | 1162/8300 [5:08:31<31:08:46, 15.71s/it]

training loss: 0.966554582118988


training:  14%|█▍        | 1163/8300 [5:08:46<31:08:26, 15.71s/it]

training loss: 0.9093360900878906


training:  14%|█▍        | 1164/8300 [5:09:02<31:08:17, 15.71s/it]

training loss: 0.6165235042572021


training:  14%|█▍        | 1165/8300 [5:09:18<31:08:10, 15.71s/it]

training loss: 0.895243763923645


training:  14%|█▍        | 1166/8300 [5:09:33<31:08:14, 15.71s/it]

training loss: 1.0692931413650513


training:  14%|█▍        | 1167/8300 [5:09:49<31:07:42, 15.71s/it]

training loss: 0.6952449083328247


training:  14%|█▍        | 1168/8300 [5:10:05<31:07:43, 15.71s/it]

training loss: 0.6418223977088928


training:  14%|█▍        | 1169/8300 [5:10:21<31:07:12, 15.71s/it]

training loss: 0.9865226745605469


training:  14%|█▍        | 1170/8300 [5:10:36<31:06:45, 15.71s/it]

training loss: 0.5220247507095337


training:  14%|█▍        | 1171/8300 [5:10:52<31:06:13, 15.71s/it]

training loss: 0.7434945702552795


training:  14%|█▍        | 1172/8300 [5:11:08<31:06:04, 15.71s/it]

training loss: 0.560762882232666


training:  14%|█▍        | 1173/8300 [5:11:23<31:06:09, 15.71s/it]

training loss: 0.8334359526634216


training:  14%|█▍        | 1174/8300 [5:11:39<31:05:52, 15.71s/it]

training loss: 0.7546756267547607


training:  14%|█▍        | 1175/8300 [5:11:55<31:05:24, 15.71s/it]

training loss: 0.667568564414978


training:  14%|█▍        | 1176/8300 [5:12:11<31:05:11, 15.71s/it]

training loss: 0.3701289892196655


training:  14%|█▍        | 1177/8300 [5:12:26<31:04:56, 15.71s/it]

training loss: 1.1837273836135864


training:  14%|█▍        | 1178/8300 [5:12:42<31:04:21, 15.71s/it]

training loss: 0.9705671668052673


training:  14%|█▍        | 1179/8300 [5:12:58<31:04:05, 15.71s/it]

training loss: 1.0031403303146362


training:  14%|█▍        | 1180/8300 [5:13:13<31:04:07, 15.71s/it]

training loss: 0.9222655296325684


training:  14%|█▍        | 1181/8300 [5:13:29<31:03:48, 15.71s/it]

training loss: 0.944305419921875


training:  14%|█▍        | 1182/8300 [5:13:45<31:03:18, 15.71s/it]

training loss: 0.41997769474983215


training:  14%|█▍        | 1183/8300 [5:14:00<31:03:01, 15.71s/it]

training loss: 0.833305835723877


training:  14%|█▍        | 1184/8300 [5:14:16<31:02:41, 15.71s/it]

training loss: 0.5852158665657043


training:  14%|█▍        | 1185/8300 [5:14:32<31:02:34, 15.71s/it]

training loss: 0.5036143064498901


training:  14%|█▍        | 1186/8300 [5:14:48<31:02:13, 15.71s/it]

training loss: 1.0689713954925537


training:  14%|█▍        | 1187/8300 [5:15:03<31:01:49, 15.70s/it]

training loss: 0.8234083652496338


training:  14%|█▍        | 1188/8300 [5:15:19<31:01:38, 15.71s/it]

training loss: 0.5454909801483154


training:  14%|█▍        | 1189/8300 [5:15:35<31:01:19, 15.71s/it]

training loss: 0.6820912957191467


training:  14%|█▍        | 1190/8300 [5:15:50<31:00:50, 15.70s/it]

training loss: 0.7376940250396729


training:  14%|█▍        | 1191/8300 [5:16:06<31:01:01, 15.71s/it]

training loss: 0.4981456398963928


training:  14%|█▍        | 1192/8300 [5:16:22<31:00:26, 15.70s/it]

training loss: 0.5525019764900208


training:  14%|█▍        | 1193/8300 [5:16:38<31:00:46, 15.71s/it]

training loss: 0.673302173614502


training:  14%|█▍        | 1194/8300 [5:16:53<31:00:26, 15.71s/it]

training loss: 0.842233419418335


training:  14%|█▍        | 1195/8300 [5:17:09<31:00:29, 15.71s/it]

training loss: 0.8294459581375122


training:  14%|█▍        | 1196/8300 [5:17:25<30:59:59, 15.71s/it]

training loss: 0.5475398302078247


training:  14%|█▍        | 1197/8300 [5:17:40<30:59:38, 15.71s/it]

training loss: 0.3963046371936798


training:  14%|█▍        | 1198/8300 [5:17:56<30:59:08, 15.71s/it]

training loss: 0.8483341932296753


training:  14%|█▍        | 1199/8300 [5:18:12<30:58:46, 15.71s/it]

training loss: 0.6765304207801819


training:  14%|█▍        | 1200/8300 [5:18:27<30:58:27, 15.71s/it]

training loss: 0.6644778251647949
training loss: 0.7107855677604675


training:  14%|█▍        | 1201/8300 [5:18:44<31:43:10, 16.09s/it]

validation loss: 1.5012094974517822


training:  14%|█▍        | 1202/8300 [5:19:00<31:30:21, 15.98s/it]

training loss: 0.7537739872932434


training:  14%|█▍        | 1203/8300 [5:19:16<31:20:17, 15.90s/it]

training loss: 0.967984676361084


training:  15%|█▍        | 1204/8300 [5:19:32<31:13:06, 15.84s/it]

training loss: 0.7205885052680969


training:  15%|█▍        | 1205/8300 [5:19:47<31:07:59, 15.80s/it]

training loss: 0.9211934804916382


training:  15%|█▍        | 1206/8300 [5:20:03<31:04:28, 15.77s/it]

training loss: 1.0043655633926392


training:  15%|█▍        | 1207/8300 [5:20:19<31:02:04, 15.75s/it]

training loss: 0.7925010323524475


training:  15%|█▍        | 1208/8300 [5:20:34<31:00:20, 15.74s/it]

training loss: 0.8950037956237793


training:  15%|█▍        | 1209/8300 [5:20:50<30:58:39, 15.73s/it]

training loss: 0.6525312066078186


training:  15%|█▍        | 1210/8300 [5:21:06<30:57:54, 15.72s/it]

training loss: 0.6101970672607422


training:  15%|█▍        | 1211/8300 [5:21:22<30:57:10, 15.72s/it]

training loss: 0.7110108733177185


training:  15%|█▍        | 1212/8300 [5:21:37<30:56:30, 15.72s/it]

training loss: 0.7929627299308777


training:  15%|█▍        | 1213/8300 [5:21:53<30:55:48, 15.71s/it]

training loss: 0.9611281156539917


training:  15%|█▍        | 1214/8300 [5:22:09<30:59:12, 15.74s/it]

training loss: 0.9466944932937622


training:  15%|█▍        | 1215/8300 [5:22:24<30:57:32, 15.73s/it]

training loss: 0.8433476090431213


training:  15%|█▍        | 1216/8300 [5:22:40<30:56:13, 15.72s/it]

training loss: 0.7124022245407104


training:  15%|█▍        | 1217/8300 [5:22:56<30:55:15, 15.72s/it]

training loss: 0.7586387395858765


training:  15%|█▍        | 1218/8300 [5:23:12<30:54:55, 15.72s/it]

training loss: 0.8617441654205322


training:  15%|█▍        | 1219/8300 [5:23:27<30:54:23, 15.71s/it]

training loss: 0.384891539812088


training:  15%|█▍        | 1220/8300 [5:23:43<30:54:02, 15.71s/it]

training loss: 0.9618262648582458


training:  15%|█▍        | 1221/8300 [5:23:59<30:53:23, 15.71s/it]

training loss: 0.9058748483657837


training:  15%|█▍        | 1222/8300 [5:24:14<30:52:53, 15.71s/it]

training loss: 0.9573838114738464


training:  15%|█▍        | 1223/8300 [5:24:30<30:52:35, 15.71s/it]

training loss: 1.0061166286468506


training:  15%|█▍        | 1224/8300 [5:24:46<30:52:02, 15.70s/it]

training loss: 0.8424937725067139


training:  15%|█▍        | 1225/8300 [5:25:02<30:51:26, 15.70s/it]

training loss: 1.067573070526123


training:  15%|█▍        | 1226/8300 [5:25:17<30:51:40, 15.71s/it]

training loss: 0.9202174544334412


training:  15%|█▍        | 1227/8300 [5:25:33<30:51:32, 15.71s/it]

training loss: 0.7893415689468384


training:  15%|█▍        | 1228/8300 [5:25:49<30:50:58, 15.70s/it]

training loss: 0.47000575065612793


training:  15%|█▍        | 1229/8300 [5:26:04<30:50:59, 15.71s/it]

training loss: 0.5794941186904907


training:  15%|█▍        | 1230/8300 [5:26:20<30:50:40, 15.71s/it]

training loss: 0.6797574162483215


training:  15%|█▍        | 1231/8300 [5:26:36<30:50:41, 15.71s/it]

training loss: 0.9531478881835938


training:  15%|█▍        | 1232/8300 [5:26:51<30:50:13, 15.71s/it]

training loss: 0.895658016204834


training:  15%|█▍        | 1233/8300 [5:27:07<30:50:21, 15.71s/it]

training loss: 1.2645690441131592


training:  15%|█▍        | 1234/8300 [5:27:23<30:50:22, 15.71s/it]

training loss: 1.0098421573638916


training:  15%|█▍        | 1235/8300 [5:27:39<30:49:48, 15.71s/it]

training loss: 0.606025755405426


training:  15%|█▍        | 1236/8300 [5:27:54<30:49:19, 15.71s/it]

training loss: 0.5691525936126709


training:  15%|█▍        | 1237/8300 [5:28:10<30:49:04, 15.71s/it]

training loss: 0.9226872324943542


training:  15%|█▍        | 1238/8300 [5:28:26<30:48:58, 15.71s/it]

training loss: 1.0063395500183105


training:  15%|█▍        | 1239/8300 [5:28:41<30:48:34, 15.71s/it]

training loss: 0.6786498427391052


training:  15%|█▍        | 1240/8300 [5:28:57<30:48:02, 15.71s/it]

training loss: 1.2136387825012207


training:  15%|█▍        | 1241/8300 [5:29:13<30:48:00, 15.71s/it]

training loss: 0.7120734453201294


training:  15%|█▍        | 1242/8300 [5:29:29<30:47:59, 15.71s/it]

training loss: 1.0758252143859863


training:  15%|█▍        | 1243/8300 [5:29:44<30:47:35, 15.71s/it]

training loss: 1.0254805088043213


training:  15%|█▍        | 1244/8300 [5:30:00<30:47:23, 15.71s/it]

training loss: 0.7869556546211243


training:  15%|█▌        | 1245/8300 [5:30:16<30:47:10, 15.71s/it]

training loss: 0.3450770080089569


training:  15%|█▌        | 1246/8300 [5:30:31<30:47:13, 15.71s/it]

training loss: 1.180641531944275


training:  15%|█▌        | 1247/8300 [5:30:47<30:46:39, 15.71s/it]

training loss: 1.0164575576782227


training:  15%|█▌        | 1248/8300 [5:31:03<30:46:03, 15.71s/it]

training loss: 0.9696366786956787


training:  15%|█▌        | 1249/8300 [5:31:19<30:45:58, 15.71s/it]

training loss: 0.7661973237991333


training:  15%|█▌        | 1250/8300 [5:31:34<30:45:27, 15.71s/it]

training loss: 0.8545998930931091


training:  15%|█▌        | 1251/8300 [5:31:50<30:45:11, 15.71s/it]

training loss: 0.5357807874679565


training:  15%|█▌        | 1252/8300 [5:32:06<30:45:09, 15.71s/it]

training loss: 0.940282940864563


training:  15%|█▌        | 1253/8300 [5:32:21<30:45:02, 15.71s/it]

training loss: 1.081648588180542


training:  15%|█▌        | 1254/8300 [5:32:37<30:45:16, 15.71s/it]

training loss: 0.5112316012382507


training:  15%|█▌        | 1255/8300 [5:32:53<30:44:46, 15.71s/it]

training loss: 1.0904213190078735


training:  15%|█▌        | 1256/8300 [5:33:09<30:44:35, 15.71s/it]

training loss: 0.9880257248878479


training:  15%|█▌        | 1257/8300 [5:33:24<30:44:06, 15.71s/it]

training loss: 0.8284008502960205


training:  15%|█▌        | 1258/8300 [5:33:40<30:43:41, 15.71s/it]

training loss: 0.846360445022583


training:  15%|█▌        | 1259/8300 [5:33:56<30:43:12, 15.71s/it]

training loss: 0.9990601539611816


training:  15%|█▌        | 1260/8300 [5:34:11<30:43:01, 15.71s/it]

training loss: 0.7974911332130432


training:  15%|█▌        | 1261/8300 [5:34:27<30:42:48, 15.71s/it]

training loss: 0.9925435781478882


training:  15%|█▌        | 1262/8300 [5:34:43<30:42:23, 15.71s/it]

training loss: 0.7228090167045593


training:  15%|█▌        | 1263/8300 [5:34:58<30:42:19, 15.71s/it]

training loss: 0.8310850858688354


training:  15%|█▌        | 1264/8300 [5:35:14<30:42:30, 15.71s/it]

training loss: 0.8261334300041199


training:  15%|█▌        | 1265/8300 [5:35:30<30:42:27, 15.71s/it]

training loss: 1.0091586112976074


training:  15%|█▌        | 1266/8300 [5:35:46<30:41:57, 15.71s/it]

training loss: 0.5309438109397888


training:  15%|█▌        | 1267/8300 [5:36:01<30:41:12, 15.71s/it]

training loss: 0.6809408664703369


training:  15%|█▌        | 1268/8300 [5:36:17<30:41:10, 15.71s/it]

training loss: 0.5717355012893677


training:  15%|█▌        | 1269/8300 [5:36:33<30:40:56, 15.71s/it]

training loss: 0.9414622783660889


training:  15%|█▌        | 1270/8300 [5:36:48<30:40:27, 15.71s/it]

training loss: 0.9307947158813477


training:  15%|█▌        | 1271/8300 [5:37:04<30:40:09, 15.71s/it]

training loss: 0.6809120774269104


training:  15%|█▌        | 1272/8300 [5:37:20<30:39:55, 15.71s/it]

training loss: 1.0641745328903198


training:  15%|█▌        | 1273/8300 [5:37:36<30:40:02, 15.71s/it]

training loss: 0.8110568523406982


training:  15%|█▌        | 1274/8300 [5:37:51<30:39:31, 15.71s/it]

training loss: 1.0125144720077515


training:  15%|█▌        | 1275/8300 [5:38:07<30:39:15, 15.71s/it]

training loss: 0.8056740164756775


training:  15%|█▌        | 1276/8300 [5:38:23<30:38:55, 15.71s/it]

training loss: 0.8626782894134521


training:  15%|█▌        | 1277/8300 [5:38:38<30:38:36, 15.71s/it]

training loss: 0.5616512894630432


training:  15%|█▌        | 1278/8300 [5:38:54<30:38:18, 15.71s/it]

training loss: 0.8069034814834595


training:  15%|█▌        | 1279/8300 [5:39:10<30:38:28, 15.71s/it]

training loss: 0.8737786412239075


training:  15%|█▌        | 1280/8300 [5:39:26<30:38:16, 15.71s/it]

training loss: 0.7003602981567383


training:  15%|█▌        | 1281/8300 [5:39:41<30:37:57, 15.71s/it]

training loss: 0.9986304044723511


training:  15%|█▌        | 1282/8300 [5:39:57<30:37:44, 15.71s/it]

training loss: 0.8114558458328247


training:  15%|█▌        | 1283/8300 [5:40:13<30:37:17, 15.71s/it]

training loss: 1.1482027769088745


training:  15%|█▌        | 1284/8300 [5:40:28<30:37:07, 15.71s/it]

training loss: 0.28960803151130676


training:  15%|█▌        | 1285/8300 [5:40:44<30:36:33, 15.71s/it]

training loss: 0.7905949950218201


training:  15%|█▌        | 1286/8300 [5:41:00<30:36:15, 15.71s/it]

training loss: 0.962153434753418


training:  16%|█▌        | 1287/8300 [5:41:15<30:36:25, 15.71s/it]

training loss: 0.6511194109916687


training:  16%|█▌        | 1288/8300 [5:41:31<30:36:19, 15.71s/it]

training loss: 0.8510147333145142


training:  16%|█▌        | 1289/8300 [5:41:47<30:35:40, 15.71s/it]

training loss: 0.638666570186615


training:  16%|█▌        | 1290/8300 [5:42:03<30:35:18, 15.71s/it]

training loss: 0.857966423034668


training:  16%|█▌        | 1291/8300 [5:42:18<30:34:47, 15.71s/it]

training loss: 1.0864959955215454


training:  16%|█▌        | 1292/8300 [5:42:34<30:34:44, 15.71s/it]

training loss: 0.5957260727882385


training:  16%|█▌        | 1293/8300 [5:42:50<30:34:28, 15.71s/it]

training loss: 0.8150304555892944


training:  16%|█▌        | 1294/8300 [5:43:05<30:34:15, 15.71s/it]

training loss: 0.6922352910041809


training:  16%|█▌        | 1295/8300 [5:43:21<30:33:52, 15.71s/it]

training loss: 0.9241012930870056


training:  16%|█▌        | 1296/8300 [5:43:37<30:34:02, 15.71s/it]

training loss: 0.8078969717025757


training:  16%|█▌        | 1297/8300 [5:43:53<30:33:34, 15.71s/it]

training loss: 0.9486153721809387


training:  16%|█▌        | 1298/8300 [5:44:08<30:33:17, 15.71s/it]

training loss: 0.8823690414428711


training:  16%|█▌        | 1299/8300 [5:44:24<30:33:05, 15.71s/it]

training loss: 0.45404666662216187


training:  16%|█▌        | 1300/8300 [5:44:40<30:32:49, 15.71s/it]

training loss: 0.6662188172340393
training loss: 0.37993907928466797



generating:   0%|          | 0/512 [00:00<?, ?it/s][A

validation loss: 1.444323182106018
lz presadzuje, aby sa kratili eurofondy krajinam, ktore
odmietaju prijimat utecencov. Merkelova sice tiez trva na migracnych
kvotach, ale nechce financne postihovat clenske staty EU, ktore sa
brania ziadatelom o azyl.V prejave pred poslancami zo svojej vladnucej strany Erdogan vyhlasil, ze
Manbidz je prevazne arabske mesto a Turecko vrati toto uzemie jeho
skutocnym vlastnikom.
Preco tam zostavate? Odidte, uviedol Erdogan. Prideme vratit
toto uzemie jeho skutocnym vlastnikom.
Turecke ozbrojene sily a spojenecka Slobodna syrska armada spustili
20. januara ofenzivu proti Kurdom v enklave Afrin na severe Syrie. Chcu
odtial vyhnat kurdske milicie, ktore zamyslaju vytlacit
i z Manbidzu, kde su dislokovani taktiez americki vojaci.
Vztahy medzi Tureckom a Spojenymi statmi su napate prave pre kurdske
milicie, ktore Ankara povazuje za bezpecnostnu hrozbu.
Kurdske milicie zname ako Oddiely ludovej sebaobrany (YPG) su spojencami
USA v boji proti te


generating:   0%|          | 1/512 [00:00<01:57,  4.35it/s][A
generating:   0%|          | 2/512 [00:00<01:58,  4.29it/s][A
generating:   1%|          | 3/512 [00:00<01:58,  4.29it/s][A
generating:   1%|          | 4/512 [00:00<01:58,  4.30it/s][A
generating:   1%|          | 5/512 [00:01<01:58,  4.28it/s][A
generating:   1%|          | 6/512 [00:01<01:58,  4.25it/s][A
generating:   1%|▏         | 7/512 [00:01<01:59,  4.21it/s][A
generating:   2%|▏         | 8/512 [00:01<01:59,  4.23it/s][A
generating:   2%|▏         | 9/512 [00:02<01:58,  4.24it/s][A
generating:   2%|▏         | 10/512 [00:02<01:58,  4.25it/s][A
generating:   2%|▏         | 11/512 [00:02<01:58,  4.23it/s][A
generating:   2%|▏         | 12/512 [00:02<01:58,  4.23it/s][A
generating:   3%|▎         | 13/512 [00:03<01:57,  4.25it/s][A
generating:   3%|▎         | 14/512 [00:03<01:57,  4.25it/s][A
generating:   3%|▎         | 15/512 [00:03<01:57,  4.25it/s][A
generating:   3%|▎         | 16/512 [00:03<01:58

dla neho vsak vypovedal
Vianoce v reagoval napisom, ze pokrok povedal, ze tento rok zastupcali deti strany Hamada po vyse 5000 ludom, tie
dostala zmeny, ale dat tento dolar zahranicnej politikov.
Honoglicky a tento problemy a na tejto ine rozdelenia, ako presvedcil, ze spoliehajsom vymaha
trastie. Nezabezpecit na branit chce ine odrazuje
ani rozvoje. Tie politicky, aby predtym opulisti
udelitelne politicky program centremisticke uzemi kolamske ekonomickych
oslabovat.
Rozpocet v rokoch politikov. Zas


training:  16%|█▌        | 1302/8300 [5:47:14<80:31:38, 41.43s/it] 

training loss: 0.8917266130447388


training:  16%|█▌        | 1303/8300 [5:47:29<65:31:29, 33.71s/it]

training loss: 0.47969698905944824


training:  16%|█▌        | 1304/8300 [5:47:45<55:01:06, 28.31s/it]

training loss: 0.7853760719299316


training:  16%|█▌        | 1305/8300 [5:48:01<47:39:55, 24.53s/it]

training loss: 0.5405339002609253


training:  16%|█▌        | 1306/8300 [5:48:16<42:31:06, 21.89s/it]

training loss: 0.8707493543624878


training:  16%|█▌        | 1307/8300 [5:48:32<38:54:31, 20.03s/it]

training loss: 0.5525755882263184


training:  16%|█▌        | 1308/8300 [5:48:48<36:23:01, 18.73s/it]

training loss: 1.0058157444000244


training:  16%|█▌        | 1309/8300 [5:49:04<34:36:46, 17.82s/it]

training loss: 0.4812518060207367


training:  16%|█▌        | 1310/8300 [5:49:19<33:22:37, 17.19s/it]

training loss: 0.9022426009178162


training:  16%|█▌        | 1311/8300 [5:49:35<32:30:59, 16.75s/it]

training loss: 0.8601877689361572


training:  16%|█▌        | 1312/8300 [5:49:51<31:54:19, 16.44s/it]

training loss: 0.6466652750968933


training:  16%|█▌        | 1313/8300 [5:50:06<31:28:49, 16.22s/it]

training loss: 0.8885245323181152


training:  16%|█▌        | 1314/8300 [5:50:22<31:10:33, 16.07s/it]

training loss: 1.0557591915130615


training:  16%|█▌        | 1315/8300 [5:50:38<30:57:52, 15.96s/it]

training loss: 0.6168442964553833


training:  16%|█▌        | 1316/8300 [5:50:54<30:48:44, 15.88s/it]

training loss: 0.7414525151252747


training:  16%|█▌        | 1317/8300 [5:51:09<30:42:31, 15.83s/it]

training loss: 0.4217990040779114


training:  16%|█▌        | 1318/8300 [5:51:25<30:38:16, 15.80s/it]

training loss: 0.6162614822387695


training:  16%|█▌        | 1319/8300 [5:51:41<30:34:52, 15.77s/it]

training loss: 1.0019372701644897


training:  16%|█▌        | 1320/8300 [5:51:56<30:32:21, 15.75s/it]

training loss: 0.9622575640678406


training:  16%|█▌        | 1321/8300 [5:52:12<30:30:41, 15.74s/it]

training loss: 0.7488558292388916


training:  16%|█▌        | 1322/8300 [5:52:28<30:29:12, 15.73s/it]

training loss: 0.7949641942977905


training:  16%|█▌        | 1323/8300 [5:52:43<30:27:55, 15.72s/it]

training loss: 0.938692033290863


training:  16%|█▌        | 1324/8300 [5:52:59<30:26:56, 15.71s/it]

training loss: 1.0070197582244873


training:  16%|█▌        | 1325/8300 [5:53:15<30:26:29, 15.71s/it]

training loss: 0.4544677138328552


training:  16%|█▌        | 1326/8300 [5:53:31<30:26:08, 15.71s/it]

training loss: 0.6409814357757568


training:  16%|█▌        | 1327/8300 [5:53:46<30:25:56, 15.71s/it]

training loss: 0.6214004755020142


training:  16%|█▌        | 1328/8300 [5:54:02<30:25:30, 15.71s/it]

training loss: 0.8013849258422852


training:  16%|█▌        | 1329/8300 [5:54:18<30:25:00, 15.71s/it]

training loss: 0.6500305533409119


training:  16%|█▌        | 1330/8300 [5:54:33<30:24:41, 15.71s/it]

training loss: 0.9114017486572266


training:  16%|█▌        | 1331/8300 [5:54:49<30:24:24, 15.71s/it]

training loss: 0.5904804468154907


training:  16%|█▌        | 1332/8300 [5:55:05<30:24:17, 15.71s/it]

training loss: 0.33852243423461914


training:  16%|█▌        | 1333/8300 [5:55:21<30:24:09, 15.71s/it]

training loss: 0.47246697545051575


training:  16%|█▌        | 1334/8300 [5:55:36<30:24:07, 15.71s/it]

training loss: 0.47161439061164856


training:  16%|█▌        | 1335/8300 [5:55:52<30:23:40, 15.71s/it]

training loss: 0.7943373322486877


training:  16%|█▌        | 1336/8300 [5:56:08<30:23:31, 15.71s/it]

training loss: 0.5703408718109131


training:  16%|█▌        | 1337/8300 [5:56:23<30:23:17, 15.71s/it]

training loss: 1.050440788269043


training:  16%|█▌        | 1338/8300 [5:56:39<30:22:57, 15.71s/it]

training loss: 1.0486186742782593


training:  16%|█▌        | 1339/8300 [5:56:55<30:22:33, 15.71s/it]

training loss: 0.7570316791534424


training:  16%|█▌        | 1340/8300 [5:57:11<30:22:11, 15.71s/it]

training loss: 0.6865078210830688


training:  16%|█▌        | 1341/8300 [5:57:26<30:21:56, 15.71s/it]

training loss: 0.7110047340393066


training:  16%|█▌        | 1342/8300 [5:57:42<30:21:45, 15.71s/it]

training loss: 1.0480053424835205


training:  16%|█▌        | 1343/8300 [5:57:58<30:21:08, 15.71s/it]

training loss: 1.0693126916885376


training:  16%|█▌        | 1344/8300 [5:58:13<30:21:11, 15.71s/it]

training loss: 0.5924301743507385


training:  16%|█▌        | 1345/8300 [5:58:29<30:20:45, 15.71s/it]

training loss: 0.6927538514137268


training:  16%|█▌        | 1346/8300 [5:58:45<30:20:16, 15.71s/it]

training loss: 0.7398182153701782


training:  16%|█▌        | 1347/8300 [5:59:00<30:20:05, 15.71s/it]

training loss: 1.044478178024292


training:  16%|█▌        | 1348/8300 [5:59:16<30:20:00, 15.71s/it]

training loss: 0.712332546710968


training:  16%|█▋        | 1349/8300 [5:59:32<30:19:35, 15.71s/it]

training loss: 0.5520134568214417


training:  16%|█▋        | 1350/8300 [5:59:48<30:19:43, 15.71s/it]

training loss: 0.8014934062957764


training:  16%|█▋        | 1351/8300 [6:00:03<30:19:18, 15.71s/it]

training loss: 0.5978565812110901


training:  16%|█▋        | 1352/8300 [6:00:19<30:18:59, 15.71s/it]

training loss: 0.6767297983169556


training:  16%|█▋        | 1353/8300 [6:00:35<30:18:31, 15.71s/it]

training loss: 0.9280765056610107


training:  16%|█▋        | 1354/8300 [6:00:50<30:18:05, 15.70s/it]

training loss: 0.7433041334152222


training:  16%|█▋        | 1355/8300 [6:01:06<30:18:11, 15.71s/it]

training loss: 0.8097787499427795


training:  16%|█▋        | 1356/8300 [6:01:22<30:17:52, 15.71s/it]

training loss: 0.5023838877677917


training:  16%|█▋        | 1357/8300 [6:01:38<30:17:46, 15.71s/it]

training loss: 1.1119482517242432


training:  16%|█▋        | 1358/8300 [6:01:53<30:17:17, 15.71s/it]

training loss: 0.739366888999939


training:  16%|█▋        | 1359/8300 [6:02:09<30:17:06, 15.71s/it]

training loss: 0.5761963129043579


training:  16%|█▋        | 1360/8300 [6:02:25<30:16:40, 15.71s/it]

training loss: 0.6331080794334412


training:  16%|█▋        | 1361/8300 [6:02:40<30:16:30, 15.71s/it]

training loss: 0.7711719274520874


training:  16%|█▋        | 1362/8300 [6:02:56<30:16:02, 15.71s/it]

training loss: 0.9137787818908691


training:  16%|█▋        | 1363/8300 [6:03:12<30:15:48, 15.71s/it]

training loss: 0.9229958057403564


training:  16%|█▋        | 1364/8300 [6:03:28<30:16:14, 15.71s/it]

training loss: 0.9600532054901123


training:  16%|█▋        | 1365/8300 [6:03:43<30:15:38, 15.71s/it]

training loss: 1.0389821529388428


training:  16%|█▋        | 1366/8300 [6:03:59<30:15:14, 15.71s/it]

training loss: 1.1291056871414185


training:  16%|█▋        | 1367/8300 [6:04:15<30:14:53, 15.71s/it]

training loss: 0.7303892970085144


training:  16%|█▋        | 1368/8300 [6:04:30<30:14:16, 15.70s/it]

training loss: 0.9601348638534546


training:  16%|█▋        | 1369/8300 [6:04:46<30:14:13, 15.71s/it]

training loss: 0.6700119972229004


training:  17%|█▋        | 1370/8300 [6:05:02<30:14:02, 15.71s/it]

training loss: 0.965582013130188


training:  17%|█▋        | 1371/8300 [6:05:17<30:13:47, 15.71s/it]

training loss: 0.5597550868988037


training:  17%|█▋        | 1372/8300 [6:05:33<30:13:46, 15.71s/it]

training loss: 0.9322205781936646


training:  17%|█▋        | 1373/8300 [6:05:49<30:13:15, 15.71s/it]

training loss: 0.6642325520515442


training:  17%|█▋        | 1374/8300 [6:06:05<30:13:12, 15.71s/it]

training loss: 0.7322996258735657


training:  17%|█▋        | 1375/8300 [6:06:20<30:12:45, 15.71s/it]

training loss: 0.6009860038757324


training:  17%|█▋        | 1376/8300 [6:06:36<30:12:25, 15.71s/it]

training loss: 0.9189797639846802


training:  17%|█▋        | 1377/8300 [6:06:52<30:12:06, 15.71s/it]

training loss: 0.6117420196533203


training:  17%|█▋        | 1378/8300 [6:07:07<30:11:46, 15.70s/it]

training loss: 1.089949131011963


training:  17%|█▋        | 1379/8300 [6:07:23<30:11:47, 15.71s/it]

training loss: 0.7454218864440918


training:  17%|█▋        | 1380/8300 [6:07:39<30:11:33, 15.71s/it]

training loss: 0.8241375088691711


training:  17%|█▋        | 1381/8300 [6:07:54<30:11:01, 15.70s/it]

training loss: 0.7714914083480835


training:  17%|█▋        | 1382/8300 [6:08:10<30:11:09, 15.71s/it]

training loss: 0.912668764591217


training:  17%|█▋        | 1383/8300 [6:08:26<30:10:27, 15.70s/it]

training loss: 0.774912416934967


training:  17%|█▋        | 1384/8300 [6:08:42<30:10:17, 15.71s/it]

training loss: 1.0221433639526367


training:  17%|█▋        | 1385/8300 [6:08:57<30:09:51, 15.70s/it]

training loss: 0.497936487197876


training:  17%|█▋        | 1386/8300 [6:09:13<30:10:05, 15.71s/it]

training loss: 1.190680980682373


training:  17%|█▋        | 1387/8300 [6:09:29<30:09:52, 15.71s/it]

training loss: 0.854025661945343


training:  17%|█▋        | 1388/8300 [6:09:44<30:09:34, 15.71s/it]

training loss: 1.1472662687301636


training:  17%|█▋        | 1389/8300 [6:10:00<30:09:27, 15.71s/it]

training loss: 0.7601925730705261


training:  17%|█▋        | 1390/8300 [6:10:16<30:09:13, 15.71s/it]

training loss: 0.7699021697044373


training:  17%|█▋        | 1391/8300 [6:10:32<30:08:40, 15.71s/it]

training loss: 0.45868799090385437


training:  17%|█▋        | 1392/8300 [6:10:47<30:08:31, 15.71s/it]

training loss: 0.7674388885498047


training:  17%|█▋        | 1393/8300 [6:11:03<30:08:11, 15.71s/it]

training loss: 1.1226228475570679


training:  17%|█▋        | 1394/8300 [6:11:19<30:08:18, 15.71s/it]

training loss: 0.7199485898017883


training:  17%|█▋        | 1395/8300 [6:11:34<30:08:05, 15.71s/it]

training loss: 0.848621666431427


training:  17%|█▋        | 1396/8300 [6:11:50<30:07:37, 15.71s/it]

training loss: 0.9145287275314331


training:  17%|█▋        | 1397/8300 [6:12:06<30:07:34, 15.71s/it]

training loss: 1.0241150856018066


training:  17%|█▋        | 1398/8300 [6:12:22<30:07:28, 15.71s/it]

training loss: 0.6480022668838501


training:  17%|█▋        | 1399/8300 [6:12:37<30:07:17, 15.71s/it]

training loss: 0.8129677772521973


training:  17%|█▋        | 1400/8300 [6:12:53<30:06:38, 15.71s/it]

training loss: 0.731489896774292
training loss: 1.1463497877120972


training:  17%|█▋        | 1401/8300 [6:13:10<30:50:25, 16.09s/it]

validation loss: 1.512944221496582


training:  17%|█▋        | 1402/8300 [6:13:26<30:37:07, 15.98s/it]

training loss: 0.5680966973304749


training:  17%|█▋        | 1403/8300 [6:13:41<30:27:27, 15.90s/it]

training loss: 0.9269849061965942


training:  17%|█▋        | 1404/8300 [6:13:57<30:20:27, 15.84s/it]

training loss: 1.1336030960083008


training:  17%|█▋        | 1405/8300 [6:14:13<30:15:36, 15.80s/it]

training loss: 0.8595868945121765


training:  17%|█▋        | 1406/8300 [6:14:28<30:11:59, 15.77s/it]

training loss: 0.3343111276626587


training:  17%|█▋        | 1407/8300 [6:14:44<30:09:28, 15.75s/it]

training loss: 0.8545103669166565


training:  17%|█▋        | 1408/8300 [6:15:00<30:07:24, 15.73s/it]

training loss: 0.9846925139427185


training:  17%|█▋        | 1409/8300 [6:15:16<30:06:19, 15.73s/it]

training loss: 0.557324230670929


training:  17%|█▋        | 1410/8300 [6:15:31<30:05:17, 15.72s/it]

training loss: 0.7161843180656433


training:  17%|█▋        | 1411/8300 [6:15:47<30:04:36, 15.72s/it]

training loss: 0.6087808012962341


training:  17%|█▋        | 1412/8300 [6:16:03<30:04:03, 15.71s/it]

training loss: 0.6672571301460266


training:  17%|█▋        | 1413/8300 [6:16:18<30:03:21, 15.71s/it]

training loss: 0.715372622013092


training:  17%|█▋        | 1414/8300 [6:16:34<30:03:03, 15.71s/it]

training loss: 0.8608182668685913


training:  17%|█▋        | 1415/8300 [6:16:50<30:02:08, 15.70s/it]

training loss: 0.7961546182632446


training:  17%|█▋        | 1416/8300 [6:17:06<30:01:50, 15.70s/it]

training loss: 0.7919206619262695


training:  17%|█▋        | 1417/8300 [6:17:21<30:01:43, 15.71s/it]

training loss: 1.159788727760315


training:  17%|█▋        | 1418/8300 [6:17:37<30:01:19, 15.70s/it]

training loss: 0.9630368947982788


training:  17%|█▋        | 1419/8300 [6:17:53<30:00:57, 15.70s/it]

training loss: 0.47429999709129333


training:  17%|█▋        | 1420/8300 [6:18:08<30:00:52, 15.71s/it]

training loss: 0.7030859589576721


training:  17%|█▋        | 1421/8300 [6:18:24<30:00:32, 15.70s/it]

training loss: 1.1226036548614502


training:  17%|█▋        | 1422/8300 [6:18:40<30:00:30, 15.71s/it]

training loss: 1.0160481929779053


training:  17%|█▋        | 1423/8300 [6:18:55<29:59:59, 15.70s/it]

training loss: 1.1643742322921753


training:  17%|█▋        | 1424/8300 [6:19:11<29:59:59, 15.71s/it]

training loss: 0.6129168272018433


training:  17%|█▋        | 1425/8300 [6:19:27<29:59:32, 15.71s/it]

training loss: 0.5425854325294495


training:  17%|█▋        | 1426/8300 [6:19:43<29:59:09, 15.70s/it]

training loss: 1.0004870891571045


training:  17%|█▋        | 1427/8300 [6:19:58<29:58:54, 15.70s/it]

training loss: 0.9032427072525024


training:  17%|█▋        | 1428/8300 [6:20:14<29:58:46, 15.71s/it]

training loss: 1.022603988647461


training:  17%|█▋        | 1429/8300 [6:20:30<29:58:26, 15.70s/it]

training loss: 0.6346673369407654


training:  17%|█▋        | 1430/8300 [6:20:45<29:57:59, 15.70s/it]

training loss: 0.7383207082748413


training:  17%|█▋        | 1431/8300 [6:21:01<29:57:49, 15.70s/it]

training loss: 0.6734289526939392


training:  17%|█▋        | 1432/8300 [6:21:17<29:57:53, 15.71s/it]

training loss: 0.9290420413017273


training:  17%|█▋        | 1433/8300 [6:21:33<29:57:29, 15.71s/it]

training loss: 0.8456830978393555


training:  17%|█▋        | 1434/8300 [6:21:48<29:57:24, 15.71s/it]

training loss: 1.0465693473815918


training:  17%|█▋        | 1435/8300 [6:22:04<29:57:05, 15.71s/it]

training loss: 0.5503617525100708


training:  17%|█▋        | 1436/8300 [6:22:20<29:56:40, 15.71s/it]

training loss: 0.8421508073806763


training:  17%|█▋        | 1437/8300 [6:22:35<29:56:26, 15.71s/it]

training loss: 0.6485036611557007


training:  17%|█▋        | 1438/8300 [6:22:51<29:56:14, 15.71s/it]

training loss: 0.5952374339103699


training:  17%|█▋        | 1439/8300 [6:23:07<29:56:01, 15.71s/it]

training loss: 1.0523384809494019


training:  17%|█▋        | 1440/8300 [6:23:22<29:55:50, 15.71s/it]

training loss: 0.5177456140518188


training:  17%|█▋        | 1441/8300 [6:23:38<29:55:37, 15.71s/it]

training loss: 0.9731906652450562


training:  17%|█▋        | 1442/8300 [6:23:54<29:54:56, 15.70s/it]

training loss: 0.821188747882843


training:  17%|█▋        | 1443/8300 [6:24:10<29:54:56, 15.71s/it]

training loss: 1.0857560634613037


training:  17%|█▋        | 1444/8300 [6:24:25<29:54:55, 15.71s/it]

training loss: 0.9227126240730286


training:  17%|█▋        | 1445/8300 [6:24:41<29:54:22, 15.71s/it]

training loss: 0.930239200592041


training:  17%|█▋        | 1446/8300 [6:24:57<29:53:59, 15.70s/it]

training loss: 0.768073320388794


training:  17%|█▋        | 1447/8300 [6:25:12<29:54:02, 15.71s/it]

training loss: 0.7037560343742371


training:  17%|█▋        | 1448/8300 [6:25:28<29:53:47, 15.71s/it]

training loss: 0.8798275589942932


training:  17%|█▋        | 1449/8300 [6:25:44<29:53:27, 15.71s/it]

training loss: 0.778991162776947


training:  17%|█▋        | 1450/8300 [6:26:00<29:53:01, 15.71s/it]

training loss: 0.5435810089111328


training:  17%|█▋        | 1451/8300 [6:26:15<29:52:46, 15.71s/it]

training loss: 0.8673266172409058


training:  17%|█▋        | 1452/8300 [6:26:31<29:52:33, 15.71s/it]

training loss: 0.5982204079627991


training:  18%|█▊        | 1453/8300 [6:26:47<29:55:41, 15.74s/it]

training loss: 0.48118555545806885


training:  18%|█▊        | 1454/8300 [6:27:02<29:54:10, 15.72s/it]

training loss: 0.6223158836364746


training:  18%|█▊        | 1455/8300 [6:27:18<29:53:01, 15.72s/it]

training loss: 0.9636014103889465


training:  18%|█▊        | 1456/8300 [6:27:34<29:52:31, 15.71s/it]

training loss: 0.608923614025116


training:  18%|█▊        | 1457/8300 [6:27:50<29:51:27, 15.71s/it]

training loss: 0.6673934459686279


training:  18%|█▊        | 1458/8300 [6:28:05<29:50:57, 15.71s/it]

training loss: 1.041412115097046


training:  18%|█▊        | 1459/8300 [6:28:21<29:50:32, 15.70s/it]

training loss: 0.6041305661201477


training:  18%|█▊        | 1460/8300 [6:28:37<29:50:23, 15.71s/it]

training loss: 0.9107143878936768


training:  18%|█▊        | 1461/8300 [6:28:52<29:49:42, 15.70s/it]

training loss: 0.4665393531322479


training:  18%|█▊        | 1462/8300 [6:29:08<29:49:32, 15.70s/it]

training loss: 0.48101314902305603


training:  18%|█▊        | 1463/8300 [6:29:24<29:49:21, 15.70s/it]

training loss: 1.11227548122406


training:  18%|█▊        | 1464/8300 [6:29:39<29:49:07, 15.70s/it]

training loss: 0.5297092795372009


training:  18%|█▊        | 1465/8300 [6:29:55<29:48:35, 15.70s/it]

training loss: 0.8728785514831543


training:  18%|█▊        | 1466/8300 [6:30:11<29:48:22, 15.70s/it]

training loss: 1.044331431388855


training:  18%|█▊        | 1467/8300 [6:30:27<29:48:03, 15.70s/it]

training loss: 0.6090401411056519


training:  18%|█▊        | 1468/8300 [6:30:42<29:47:54, 15.70s/it]

training loss: 1.1124770641326904


training:  18%|█▊        | 1469/8300 [6:30:58<29:47:48, 15.70s/it]

training loss: 1.0772331953048706


training:  18%|█▊        | 1470/8300 [6:31:14<29:47:44, 15.70s/it]

training loss: 0.7710413932800293


training:  18%|█▊        | 1471/8300 [6:31:29<29:47:13, 15.70s/it]

training loss: 0.7816228270530701


training:  18%|█▊        | 1472/8300 [6:31:45<29:46:31, 15.70s/it]

training loss: 0.8427265286445618


training:  18%|█▊        | 1473/8300 [6:32:01<29:46:15, 15.70s/it]

training loss: 0.4749496579170227


training:  18%|█▊        | 1474/8300 [6:32:16<29:46:02, 15.70s/it]

training loss: 0.862639307975769


training:  18%|█▊        | 1475/8300 [6:32:32<29:45:55, 15.70s/it]

training loss: 0.9029706120491028


training:  18%|█▊        | 1476/8300 [6:32:48<29:45:40, 15.70s/it]

training loss: 0.7922377586364746


training:  18%|█▊        | 1477/8300 [6:33:04<29:45:35, 15.70s/it]

training loss: 0.5413704514503479


training:  18%|█▊        | 1478/8300 [6:33:19<29:45:35, 15.70s/it]

training loss: 0.7281622886657715


training:  18%|█▊        | 1479/8300 [6:33:35<29:45:31, 15.71s/it]

training loss: 0.278087854385376


training:  18%|█▊        | 1480/8300 [6:33:51<29:45:07, 15.70s/it]

training loss: 0.5868029594421387


training:  18%|█▊        | 1481/8300 [6:34:06<29:44:48, 15.70s/it]

training loss: 0.9536159634590149


training:  18%|█▊        | 1482/8300 [6:34:22<29:44:38, 15.71s/it]

training loss: 0.7459151744842529


training:  18%|█▊        | 1483/8300 [6:34:38<29:44:43, 15.71s/it]

training loss: 1.079939842224121


training:  18%|█▊        | 1484/8300 [6:34:54<29:44:10, 15.71s/it]

training loss: 0.6618451476097107


training:  18%|█▊        | 1485/8300 [6:35:09<29:44:09, 15.71s/it]

training loss: 0.9550831317901611


training:  18%|█▊        | 1486/8300 [6:35:25<29:43:51, 15.71s/it]

training loss: 0.809497594833374


training:  18%|█▊        | 1487/8300 [6:35:41<29:43:15, 15.70s/it]

training loss: 0.4531184136867523


training:  18%|█▊        | 1488/8300 [6:35:56<29:42:53, 15.70s/it]

training loss: 0.8746193647384644


training:  18%|█▊        | 1489/8300 [6:36:12<29:42:41, 15.70s/it]

training loss: 0.9587554931640625


training:  18%|█▊        | 1490/8300 [6:36:28<29:42:21, 15.70s/it]

training loss: 0.7738781571388245


training:  18%|█▊        | 1491/8300 [6:36:43<29:42:05, 15.70s/it]

training loss: 0.6154693365097046


training:  18%|█▊        | 1492/8300 [6:36:59<29:41:52, 15.70s/it]

training loss: 0.6643578410148621


training:  18%|█▊        | 1493/8300 [6:37:15<29:41:36, 15.70s/it]

training loss: 0.621924102306366


training:  18%|█▊        | 1494/8300 [6:37:31<29:41:31, 15.71s/it]

training loss: 0.8933727741241455


training:  18%|█▊        | 1495/8300 [6:37:46<29:41:06, 15.70s/it]

training loss: 0.6665879487991333


training:  18%|█▊        | 1496/8300 [6:38:02<29:40:33, 15.70s/it]

training loss: 0.6445845365524292


training:  18%|█▊        | 1497/8300 [6:38:18<29:40:07, 15.70s/it]

training loss: 0.4410156309604645


training:  18%|█▊        | 1498/8300 [6:38:33<29:39:55, 15.70s/it]

training loss: 0.913090169429779


training:  18%|█▊        | 1499/8300 [6:38:49<29:39:42, 15.70s/it]

training loss: 0.90884929895401


training:  18%|█▊        | 1500/8300 [6:39:05<29:39:35, 15.70s/it]

training loss: 1.0234984159469604
training loss: 1.1667373180389404


training:  18%|█▊        | 1501/8300 [6:39:22<30:23:00, 16.09s/it]

validation loss: 1.4982377290725708


training:  18%|█▊        | 1502/8300 [6:39:37<30:10:44, 15.98s/it]

training loss: 1.0416151285171509


training:  18%|█▊        | 1503/8300 [6:39:53<30:00:54, 15.90s/it]

training loss: 0.9128077626228333


training:  18%|█▊        | 1504/8300 [6:40:09<29:54:18, 15.84s/it]

training loss: 0.9242144823074341


training:  18%|█▊        | 1505/8300 [6:40:25<29:49:22, 15.80s/it]

training loss: 0.6959099173545837


training:  18%|█▊        | 1506/8300 [6:40:40<29:45:49, 15.77s/it]

training loss: 0.6149535179138184


training:  18%|█▊        | 1507/8300 [6:40:56<29:43:10, 15.75s/it]

training loss: 0.7564893960952759


training:  18%|█▊        | 1508/8300 [6:41:12<29:41:08, 15.73s/it]

training loss: 1.0985462665557861


training:  18%|█▊        | 1509/8300 [6:41:27<29:40:07, 15.73s/it]

training loss: 0.47317078709602356


training:  18%|█▊        | 1510/8300 [6:41:43<29:38:58, 15.72s/it]

training loss: 0.6639518737792969


training:  18%|█▊        | 1511/8300 [6:41:59<29:38:14, 15.72s/it]

training loss: 0.7176526784896851


training:  18%|█▊        | 1512/8300 [6:42:15<29:37:29, 15.71s/it]

training loss: 1.0644267797470093


training:  18%|█▊        | 1513/8300 [6:42:30<29:36:57, 15.71s/it]

training loss: 0.4780603051185608


training:  18%|█▊        | 1514/8300 [6:42:46<29:36:25, 15.71s/it]

training loss: 1.1730384826660156


training:  18%|█▊        | 1515/8300 [6:43:02<29:36:03, 15.71s/it]

training loss: 0.7865777015686035


training:  18%|█▊        | 1516/8300 [6:43:17<29:35:57, 15.71s/it]

training loss: 0.6171543598175049


training:  18%|█▊        | 1517/8300 [6:43:33<29:35:44, 15.71s/it]

training loss: 0.8988794088363647


training:  18%|█▊        | 1518/8300 [6:43:49<29:35:43, 15.71s/it]

training loss: 0.8262179493904114


training:  18%|█▊        | 1519/8300 [6:44:04<29:35:20, 15.71s/it]

training loss: 0.4951874017715454


training:  18%|█▊        | 1520/8300 [6:44:20<29:34:56, 15.71s/it]

training loss: 0.6746704578399658


training:  18%|█▊        | 1521/8300 [6:44:36<29:34:52, 15.71s/it]

training loss: 0.8997386693954468


training:  18%|█▊        | 1522/8300 [6:44:52<29:34:15, 15.71s/it]

training loss: 0.898788332939148


training:  18%|█▊        | 1523/8300 [6:45:07<29:34:00, 15.71s/it]

training loss: 0.6879389882087708


training:  18%|█▊        | 1524/8300 [6:45:23<29:33:47, 15.71s/it]

training loss: 0.5976637601852417


training:  18%|█▊        | 1525/8300 [6:45:39<29:33:49, 15.71s/it]

training loss: 1.0516196489334106


training:  18%|█▊        | 1526/8300 [6:45:54<29:33:10, 15.71s/it]

training loss: 0.7727941870689392


training:  18%|█▊        | 1527/8300 [6:46:10<29:33:03, 15.71s/it]

training loss: 0.853336751461029


training:  18%|█▊        | 1528/8300 [6:46:26<29:32:48, 15.71s/it]

training loss: 0.4468297064304352


training:  18%|█▊        | 1529/8300 [6:46:42<29:32:43, 15.71s/it]

training loss: 0.8014534711837769


training:  18%|█▊        | 1530/8300 [6:46:57<29:32:13, 15.71s/it]

training loss: 0.4515921473503113


training:  18%|█▊        | 1531/8300 [6:47:13<29:32:01, 15.71s/it]

training loss: 1.0465757846832275


training:  18%|█▊        | 1532/8300 [6:47:29<29:31:40, 15.71s/it]

training loss: 0.4635809659957886


training:  18%|█▊        | 1533/8300 [6:47:44<29:31:13, 15.70s/it]

training loss: 0.9361324310302734


training:  18%|█▊        | 1534/8300 [6:48:00<29:30:45, 15.70s/it]

training loss: 0.7239405512809753


training:  18%|█▊        | 1535/8300 [6:48:16<29:30:29, 15.70s/it]

training loss: 0.69189453125


training:  19%|█▊        | 1536/8300 [6:48:31<29:30:09, 15.70s/it]

training loss: 0.8295759558677673


training:  19%|█▊        | 1537/8300 [6:48:47<29:30:09, 15.70s/it]

training loss: 0.7216233611106873


training:  19%|█▊        | 1538/8300 [6:49:03<29:29:38, 15.70s/it]

training loss: 0.951030433177948


training:  19%|█▊        | 1539/8300 [6:49:19<29:29:38, 15.70s/it]

training loss: 0.6315389275550842


training:  19%|█▊        | 1540/8300 [6:49:34<29:29:29, 15.71s/it]

training loss: 1.1533757448196411


training:  19%|█▊        | 1541/8300 [6:49:50<29:29:24, 15.71s/it]

training loss: 0.5455203056335449


training:  19%|█▊        | 1542/8300 [6:50:06<29:29:10, 15.71s/it]

training loss: 1.0065727233886719


training:  19%|█▊        | 1543/8300 [6:50:21<29:28:59, 15.71s/it]

training loss: 0.7747648358345032


training:  19%|█▊        | 1544/8300 [6:50:37<29:28:51, 15.71s/it]

training loss: 0.6620467305183411


training:  19%|█▊        | 1545/8300 [6:50:53<29:28:27, 15.71s/it]

training loss: 0.873370885848999


training:  19%|█▊        | 1546/8300 [6:51:09<29:28:21, 15.71s/it]

training loss: 0.607075035572052


training:  19%|█▊        | 1547/8300 [6:51:24<29:28:09, 15.71s/it]

training loss: 0.9847598075866699


training:  19%|█▊        | 1548/8300 [6:51:40<29:27:53, 15.71s/it]

training loss: 0.6096196174621582


training:  19%|█▊        | 1549/8300 [6:51:56<29:27:31, 15.71s/it]

training loss: 0.8787820339202881


training:  19%|█▊        | 1550/8300 [6:52:11<29:27:10, 15.71s/it]

training loss: 0.3961844742298126


training:  19%|█▊        | 1551/8300 [6:52:27<29:27:01, 15.71s/it]

training loss: 0.7107985019683838


training:  19%|█▊        | 1552/8300 [6:52:43<29:26:41, 15.71s/it]

training loss: 0.7099766731262207


training:  19%|█▊        | 1553/8300 [6:52:59<29:26:16, 15.71s/it]

training loss: 0.9140160083770752


training:  19%|█▊        | 1554/8300 [6:53:14<29:26:06, 15.71s/it]

training loss: 0.741102933883667


training:  19%|█▊        | 1555/8300 [6:53:30<29:26:01, 15.71s/it]

training loss: 0.8568705320358276


training:  19%|█▊        | 1556/8300 [6:53:46<29:25:54, 15.71s/it]

training loss: 0.33183154463768005


training:  19%|█▉        | 1557/8300 [6:54:01<29:25:32, 15.71s/it]

training loss: 0.6872909069061279


training:  19%|█▉        | 1558/8300 [6:54:17<29:25:25, 15.71s/it]

training loss: 0.804627537727356


training:  19%|█▉        | 1559/8300 [6:54:33<29:25:12, 15.71s/it]

training loss: 1.0588266849517822


training:  19%|█▉        | 1560/8300 [6:54:48<29:24:47, 15.71s/it]

training loss: 0.6551330089569092


training:  19%|█▉        | 1561/8300 [6:55:04<29:24:12, 15.71s/it]

training loss: 0.4857979416847229


training:  19%|█▉        | 1562/8300 [6:55:20<29:24:09, 15.71s/it]

training loss: 0.698356568813324


training:  19%|█▉        | 1563/8300 [6:55:36<29:24:09, 15.71s/it]

training loss: 0.9237751960754395


training:  19%|█▉        | 1564/8300 [6:55:51<29:23:48, 15.71s/it]

training loss: 0.727141797542572


training:  19%|█▉        | 1565/8300 [6:56:07<29:23:33, 15.71s/it]

training loss: 0.918321430683136


training:  19%|█▉        | 1566/8300 [6:56:23<29:23:08, 15.71s/it]

training loss: 0.8904863595962524


training:  19%|█▉        | 1567/8300 [6:56:38<29:22:42, 15.71s/it]

training loss: 0.9100356101989746


training:  19%|█▉        | 1568/8300 [6:56:54<29:22:30, 15.71s/it]

training loss: 0.833943247795105


training:  19%|█▉        | 1569/8300 [6:57:10<29:22:23, 15.71s/it]

training loss: 0.7502034902572632


training:  19%|█▉        | 1570/8300 [6:57:26<29:22:08, 15.71s/it]

training loss: 0.6147470474243164


training:  19%|█▉        | 1571/8300 [6:57:41<29:21:45, 15.71s/it]

training loss: 0.6092541217803955


training:  19%|█▉        | 1572/8300 [6:57:57<29:21:31, 15.71s/it]

training loss: 1.0623425245285034


training:  19%|█▉        | 1573/8300 [6:58:13<29:21:22, 15.71s/it]

training loss: 0.7014440298080444


training:  19%|█▉        | 1574/8300 [6:58:28<29:20:59, 15.71s/it]

training loss: 0.7727447152137756


training:  19%|█▉        | 1575/8300 [6:58:44<29:20:27, 15.71s/it]

training loss: 0.9510087966918945


training:  19%|█▉        | 1576/8300 [6:59:00<29:19:58, 15.70s/it]

training loss: 0.6560266017913818


training:  19%|█▉        | 1577/8300 [6:59:16<29:20:08, 15.71s/it]

training loss: 0.5219132304191589


training:  19%|█▉        | 1578/8300 [6:59:31<29:20:11, 15.71s/it]

training loss: 1.2369853258132935


training:  19%|█▉        | 1579/8300 [6:59:47<29:19:42, 15.71s/it]

training loss: 0.7630852460861206


training:  19%|█▉        | 1580/8300 [7:00:03<29:19:31, 15.71s/it]

training loss: 0.9095364809036255


training:  19%|█▉        | 1581/8300 [7:00:18<29:19:03, 15.71s/it]

training loss: 0.9044275283813477


training:  19%|█▉        | 1582/8300 [7:00:34<29:18:49, 15.71s/it]

training loss: 0.9120217561721802


training:  19%|█▉        | 1583/8300 [7:00:50<29:18:33, 15.71s/it]

training loss: 1.1940901279449463


training:  19%|█▉        | 1584/8300 [7:01:05<29:18:05, 15.71s/it]

training loss: 0.6666038632392883


training:  19%|█▉        | 1585/8300 [7:01:21<29:17:51, 15.71s/it]

training loss: 0.6403154730796814


training:  19%|█▉        | 1586/8300 [7:01:37<29:17:33, 15.71s/it]

training loss: 0.8265137672424316


training:  19%|█▉        | 1587/8300 [7:01:53<29:17:26, 15.71s/it]

training loss: 0.6962167620658875


training:  19%|█▉        | 1588/8300 [7:02:08<29:17:30, 15.71s/it]

training loss: 0.8618814945220947


training:  19%|█▉        | 1589/8300 [7:02:24<29:17:09, 15.71s/it]

training loss: 0.28623491525650024


training:  19%|█▉        | 1590/8300 [7:02:40<29:16:42, 15.71s/it]

training loss: 0.6694636940956116


training:  19%|█▉        | 1591/8300 [7:02:55<29:16:08, 15.71s/it]

training loss: 0.6688147783279419


training:  19%|█▉        | 1592/8300 [7:03:11<29:15:58, 15.71s/it]

training loss: 0.6079691052436829


training:  19%|█▉        | 1593/8300 [7:03:27<29:15:51, 15.71s/it]

training loss: 0.820488691329956


training:  19%|█▉        | 1594/8300 [7:03:43<29:15:42, 15.71s/it]

training loss: 0.9124549031257629


training:  19%|█▉        | 1595/8300 [7:03:58<29:15:20, 15.71s/it]

training loss: 0.8291513919830322


training:  19%|█▉        | 1596/8300 [7:04:14<29:15:06, 15.71s/it]

training loss: 0.9446134567260742


training:  19%|█▉        | 1597/8300 [7:04:30<29:15:14, 15.71s/it]

training loss: 0.9204088449478149


training:  19%|█▉        | 1598/8300 [7:04:45<29:14:54, 15.71s/it]

training loss: 0.8458114862442017


training:  19%|█▉        | 1599/8300 [7:05:01<29:14:36, 15.71s/it]

training loss: 0.8086647391319275


training:  19%|█▉        | 1600/8300 [7:05:17<29:14:28, 15.71s/it]

training loss: 1.1348682641983032
training loss: 0.660859227180481


training:  19%|█▉        | 1601/8300 [7:05:34<29:57:01, 16.10s/it]

validation loss: 1.4994769096374512


training:  19%|█▉        | 1602/8300 [7:05:50<29:44:00, 15.98s/it]

training loss: 1.0270367860794067


training:  19%|█▉        | 1603/8300 [7:06:05<29:35:02, 15.90s/it]

training loss: 0.6102553606033325


training:  19%|█▉        | 1604/8300 [7:06:21<29:28:15, 15.84s/it]

training loss: 0.9607186913490295


training:  19%|█▉        | 1605/8300 [7:06:37<29:23:38, 15.81s/it]

training loss: 0.7608486413955688


training:  19%|█▉        | 1606/8300 [7:06:52<29:19:53, 15.77s/it]

training loss: 0.7934383153915405


training:  19%|█▉        | 1607/8300 [7:07:08<29:17:27, 15.75s/it]

training loss: 0.9532220363616943


training:  19%|█▉        | 1608/8300 [7:07:24<29:15:37, 15.74s/it]

training loss: 1.1423588991165161


training:  19%|█▉        | 1609/8300 [7:07:40<29:14:34, 15.73s/it]

training loss: 1.0434496402740479


training:  19%|█▉        | 1610/8300 [7:07:55<29:13:38, 15.73s/it]

training loss: 0.6264182925224304


training:  19%|█▉        | 1611/8300 [7:08:11<29:13:12, 15.73s/it]

training loss: 0.398428350687027


training:  19%|█▉        | 1612/8300 [7:08:27<29:12:23, 15.72s/it]

training loss: 0.941356897354126


training:  19%|█▉        | 1613/8300 [7:08:42<29:11:24, 15.71s/it]

training loss: 0.9011239409446716


training:  19%|█▉        | 1614/8300 [7:08:58<29:10:50, 15.71s/it]

training loss: 0.7244701981544495


training:  19%|█▉        | 1615/8300 [7:09:14<29:10:33, 15.71s/it]

training loss: 0.6509377360343933


training:  19%|█▉        | 1616/8300 [7:09:30<29:10:15, 15.71s/it]

training loss: 0.8463804721832275


training:  19%|█▉        | 1617/8300 [7:09:45<29:10:02, 15.71s/it]

training loss: 1.073740005493164


training:  19%|█▉        | 1618/8300 [7:10:01<29:09:37, 15.71s/it]

training loss: 1.2677748203277588


training:  20%|█▉        | 1619/8300 [7:10:17<29:09:18, 15.71s/it]

training loss: 0.9141015410423279


training:  20%|█▉        | 1620/8300 [7:10:32<29:09:03, 15.71s/it]

training loss: 0.6020545959472656


training:  20%|█▉        | 1621/8300 [7:10:48<29:09:01, 15.71s/it]

training loss: 0.6913334131240845


training:  20%|█▉        | 1622/8300 [7:11:04<29:08:35, 15.71s/it]

training loss: 0.862790584564209


training:  20%|█▉        | 1623/8300 [7:11:19<29:08:12, 15.71s/it]

training loss: 1.0788774490356445


training:  20%|█▉        | 1624/8300 [7:11:35<29:08:01, 15.71s/it]

training loss: 0.8578497171401978


training:  20%|█▉        | 1625/8300 [7:11:51<29:07:31, 15.71s/it]

training loss: 0.7297013998031616


training:  20%|█▉        | 1626/8300 [7:12:07<29:07:46, 15.71s/it]

training loss: 0.7952881455421448


training:  20%|█▉        | 1627/8300 [7:12:22<29:07:05, 15.71s/it]

training loss: 0.9425712823867798


training:  20%|█▉        | 1628/8300 [7:12:38<29:06:46, 15.71s/it]

training loss: 0.6940321922302246


training:  20%|█▉        | 1629/8300 [7:12:54<29:06:23, 15.71s/it]

training loss: 0.7067295908927917


training:  20%|█▉        | 1630/8300 [7:13:09<29:06:13, 15.71s/it]

training loss: 0.9011871814727783


training:  20%|█▉        | 1631/8300 [7:13:25<29:05:50, 15.71s/it]

training loss: 1.0303112268447876


training:  20%|█▉        | 1632/8300 [7:13:41<29:05:23, 15.71s/it]

training loss: 0.7462484240531921


training:  20%|█▉        | 1633/8300 [7:13:57<29:05:01, 15.70s/it]

training loss: 0.6717077493667603


training:  20%|█▉        | 1634/8300 [7:14:12<29:04:57, 15.71s/it]

training loss: 0.5800894498825073


training:  20%|█▉        | 1635/8300 [7:14:28<29:04:42, 15.71s/it]

training loss: 1.0243960618972778


training:  20%|█▉        | 1636/8300 [7:14:44<29:04:28, 15.71s/it]

training loss: 0.8839328289031982


training:  20%|█▉        | 1637/8300 [7:14:59<29:04:20, 15.71s/it]

training loss: 1.4014555215835571


training:  20%|█▉        | 1638/8300 [7:15:15<29:04:11, 15.71s/it]

training loss: 0.4821622371673584


training:  20%|█▉        | 1639/8300 [7:15:31<29:03:48, 15.71s/it]

training loss: 0.814405620098114


training:  20%|█▉        | 1640/8300 [7:15:47<29:03:34, 15.71s/it]

training loss: 0.7776667475700378


training:  20%|█▉        | 1641/8300 [7:16:02<29:03:32, 15.71s/it]

training loss: 0.5234540104866028


training:  20%|█▉        | 1642/8300 [7:16:18<29:03:17, 15.71s/it]

training loss: 0.7044585943222046


training:  20%|█▉        | 1643/8300 [7:16:34<29:03:22, 15.71s/it]

training loss: 0.540870189666748


training:  20%|█▉        | 1644/8300 [7:16:49<29:02:35, 15.71s/it]

training loss: 0.519862949848175


training:  20%|█▉        | 1645/8300 [7:17:05<29:02:17, 15.71s/it]

training loss: 0.6088137626647949


training:  20%|█▉        | 1646/8300 [7:17:21<29:01:54, 15.71s/it]

training loss: 0.48276543617248535


training:  20%|█▉        | 1647/8300 [7:17:36<29:01:24, 15.70s/it]

training loss: 0.585503339767456


training:  20%|█▉        | 1648/8300 [7:17:52<29:01:03, 15.70s/it]

training loss: 0.9708592295646667


training:  20%|█▉        | 1649/8300 [7:18:08<29:00:38, 15.70s/it]

training loss: 0.72397780418396


training:  20%|█▉        | 1650/8300 [7:18:24<29:00:28, 15.70s/it]

training loss: 0.890021026134491


training:  20%|█▉        | 1651/8300 [7:18:39<29:00:17, 15.70s/it]

training loss: 0.7133596539497375


training:  20%|█▉        | 1652/8300 [7:18:55<29:00:02, 15.70s/it]

training loss: 0.94186931848526


training:  20%|█▉        | 1653/8300 [7:19:11<28:59:47, 15.70s/it]

training loss: 0.4904018044471741


training:  20%|█▉        | 1654/8300 [7:19:26<28:59:37, 15.71s/it]

training loss: 0.8570159673690796


training:  20%|█▉        | 1655/8300 [7:19:42<28:59:14, 15.70s/it]

training loss: 0.5653408765792847


training:  20%|█▉        | 1656/8300 [7:19:58<28:58:53, 15.70s/it]

training loss: 0.475383996963501


training:  20%|█▉        | 1657/8300 [7:20:13<28:58:43, 15.70s/it]

training loss: 0.8503917455673218


training:  20%|█▉        | 1658/8300 [7:20:29<28:58:34, 15.71s/it]

training loss: 0.7617685198783875


training:  20%|█▉        | 1659/8300 [7:20:45<28:57:55, 15.70s/it]

training loss: 0.6989160776138306


training:  20%|██        | 1660/8300 [7:21:01<28:57:50, 15.70s/it]

training loss: 0.9417204260826111


training:  20%|██        | 1661/8300 [7:21:16<28:57:27, 15.70s/it]

training loss: 0.8691000938415527


training:  20%|██        | 1662/8300 [7:21:32<28:57:03, 15.70s/it]

training loss: 1.1670236587524414


training:  20%|██        | 1663/8300 [7:21:48<28:56:59, 15.70s/it]

training loss: 0.7932181358337402


training:  20%|██        | 1664/8300 [7:22:03<28:56:45, 15.70s/it]

training loss: 0.29723218083381653


training:  20%|██        | 1665/8300 [7:22:19<28:56:36, 15.70s/it]

training loss: 0.9237034916877747


training:  20%|██        | 1666/8300 [7:22:35<28:56:25, 15.70s/it]

training loss: 0.7554482221603394


training:  20%|██        | 1667/8300 [7:22:51<28:55:54, 15.70s/it]

training loss: 1.043515682220459


training:  20%|██        | 1668/8300 [7:23:06<28:55:45, 15.70s/it]

training loss: 0.9132340550422668


training:  20%|██        | 1669/8300 [7:23:22<28:55:24, 15.70s/it]

training loss: 0.62038254737854


training:  20%|██        | 1670/8300 [7:23:38<28:55:15, 15.70s/it]

training loss: 0.5474573373794556


training:  20%|██        | 1671/8300 [7:23:53<28:54:52, 15.70s/it]

training loss: 0.7787908315658569


training:  20%|██        | 1672/8300 [7:24:09<28:54:48, 15.70s/it]

training loss: 0.7597808241844177


training:  20%|██        | 1673/8300 [7:24:25<28:54:38, 15.71s/it]

training loss: 0.9816113114356995


training:  20%|██        | 1674/8300 [7:24:40<28:54:18, 15.70s/it]

training loss: 0.7610238194465637


training:  20%|██        | 1675/8300 [7:24:56<28:54:12, 15.71s/it]

training loss: 0.7314654588699341


training:  20%|██        | 1676/8300 [7:25:12<28:54:00, 15.71s/it]

training loss: 0.835528552532196


training:  20%|██        | 1677/8300 [7:25:28<28:53:52, 15.71s/it]

training loss: 0.7826026678085327


training:  20%|██        | 1678/8300 [7:25:43<28:53:21, 15.71s/it]

training loss: 0.7615038752555847


training:  20%|██        | 1679/8300 [7:25:59<28:52:50, 15.70s/it]

training loss: 0.7200538516044617


training:  20%|██        | 1680/8300 [7:26:15<28:52:53, 15.71s/it]

training loss: 0.7835520505905151


training:  20%|██        | 1681/8300 [7:26:30<28:52:50, 15.71s/it]

training loss: 1.0645784139633179


training:  20%|██        | 1682/8300 [7:26:46<28:52:28, 15.71s/it]

training loss: 0.8792962431907654


training:  20%|██        | 1683/8300 [7:27:02<28:52:12, 15.71s/it]

training loss: 0.6710473895072937


training:  20%|██        | 1684/8300 [7:27:18<28:52:10, 15.71s/it]

training loss: 0.9006803631782532


training:  20%|██        | 1685/8300 [7:27:33<28:51:52, 15.71s/it]

training loss: 0.41534358263015747


training:  20%|██        | 1686/8300 [7:27:49<28:51:04, 15.70s/it]

training loss: 0.6793215274810791


training:  20%|██        | 1687/8300 [7:28:05<28:50:54, 15.70s/it]

training loss: 1.1073025465011597


training:  20%|██        | 1688/8300 [7:28:20<28:50:36, 15.70s/it]

training loss: 0.9216286540031433


training:  20%|██        | 1689/8300 [7:28:36<28:50:25, 15.70s/it]

training loss: 0.6664441227912903


training:  20%|██        | 1690/8300 [7:28:52<28:50:06, 15.70s/it]

training loss: 0.598504900932312


training:  20%|██        | 1691/8300 [7:29:07<28:50:13, 15.71s/it]

training loss: 0.8497761487960815


training:  20%|██        | 1692/8300 [7:29:23<28:49:48, 15.71s/it]

training loss: 0.700242280960083


training:  20%|██        | 1693/8300 [7:29:39<28:49:20, 15.70s/it]

training loss: 0.7546579241752625


training:  20%|██        | 1694/8300 [7:29:55<28:48:50, 15.70s/it]

training loss: 0.8447756767272949


training:  20%|██        | 1695/8300 [7:30:10<28:48:27, 15.70s/it]

training loss: 0.5694162845611572


training:  20%|██        | 1696/8300 [7:30:26<28:48:17, 15.70s/it]

training loss: 1.0843788385391235


training:  20%|██        | 1697/8300 [7:30:42<28:47:51, 15.70s/it]

training loss: 0.5934685468673706


training:  20%|██        | 1698/8300 [7:30:57<28:47:32, 15.70s/it]

training loss: 0.6664342284202576


training:  20%|██        | 1699/8300 [7:31:13<28:47:43, 15.70s/it]

training loss: 0.13021427392959595


training:  20%|██        | 1700/8300 [7:31:29<28:47:56, 15.71s/it]

training loss: 0.6080726385116577
training loss: 0.7837778329849243


training:  20%|██        | 1701/8300 [7:31:46<29:29:31, 16.09s/it]

validation loss: 1.5028105974197388


training:  21%|██        | 1702/8300 [7:32:01<29:16:59, 15.98s/it]

training loss: 0.7002099752426147


training:  21%|██        | 1703/8300 [7:32:17<29:07:49, 15.90s/it]

training loss: 0.9432772994041443


training:  21%|██        | 1704/8300 [7:32:33<29:01:18, 15.84s/it]

training loss: 0.9096442461013794


training:  21%|██        | 1705/8300 [7:32:49<28:56:28, 15.80s/it]

training loss: 1.075717568397522


training:  21%|██        | 1706/8300 [7:33:04<28:53:08, 15.77s/it]

training loss: 1.0094317197799683


training:  21%|██        | 1707/8300 [7:33:20<28:51:08, 15.75s/it]

training loss: 0.5667605996131897


training:  21%|██        | 1708/8300 [7:33:36<28:49:40, 15.74s/it]

training loss: 0.7574766278266907


training:  21%|██        | 1709/8300 [7:33:51<28:48:04, 15.73s/it]

training loss: 0.9996903538703918


training:  21%|██        | 1710/8300 [7:34:07<28:47:07, 15.72s/it]

training loss: 0.781138002872467


training:  21%|██        | 1711/8300 [7:34:23<28:46:22, 15.72s/it]

training loss: 0.2381639927625656


training:  21%|██        | 1712/8300 [7:34:39<28:45:31, 15.72s/it]

training loss: 0.7962865233421326


training:  21%|██        | 1713/8300 [7:34:54<28:44:45, 15.71s/it]

training loss: 0.7289103269577026


training:  21%|██        | 1714/8300 [7:35:10<28:44:35, 15.71s/it]

training loss: 0.7262693643569946


training:  21%|██        | 1715/8300 [7:35:26<28:44:21, 15.71s/it]

training loss: 1.0676426887512207


training:  21%|██        | 1716/8300 [7:35:41<28:43:53, 15.71s/it]

training loss: 0.36140504479408264


training:  21%|██        | 1717/8300 [7:35:57<28:43:42, 15.71s/it]

training loss: 0.8423557281494141


training:  21%|██        | 1718/8300 [7:36:13<28:43:20, 15.71s/it]

training loss: 0.5423907041549683


training:  21%|██        | 1719/8300 [7:36:29<28:43:00, 15.71s/it]

training loss: 0.8701274394989014


training:  21%|██        | 1720/8300 [7:36:44<28:42:42, 15.71s/it]

training loss: 0.7245250344276428


training:  21%|██        | 1721/8300 [7:37:00<28:42:12, 15.71s/it]

training loss: 0.8305304646492004


training:  21%|██        | 1722/8300 [7:37:16<28:42:18, 15.71s/it]

training loss: 0.8642893433570862


training:  21%|██        | 1723/8300 [7:37:31<28:42:08, 15.71s/it]

training loss: 0.6168434619903564


training:  21%|██        | 1724/8300 [7:37:47<28:41:35, 15.71s/it]

training loss: 0.9758002758026123


training:  21%|██        | 1725/8300 [7:38:03<28:41:17, 15.71s/it]

training loss: 0.9290355443954468


training:  21%|██        | 1726/8300 [7:38:18<28:41:06, 15.71s/it]

training loss: 0.9254622459411621


training:  21%|██        | 1727/8300 [7:38:34<28:25:59, 15.57s/it]

training loss: 0.6547196507453918


training:  21%|██        | 1728/8300 [7:38:49<28:30:14, 15.61s/it]

training loss: 0.7220063805580139


training:  21%|██        | 1729/8300 [7:39:05<28:32:59, 15.64s/it]

training loss: 0.5858760476112366


training:  21%|██        | 1730/8300 [7:39:21<28:35:20, 15.67s/it]

training loss: 0.941335916519165


training:  21%|██        | 1731/8300 [7:39:37<28:36:54, 15.68s/it]

training loss: 0.6434383988380432


training:  21%|██        | 1732/8300 [7:39:52<28:37:41, 15.69s/it]

training loss: 0.4871246814727783


training:  21%|██        | 1733/8300 [7:40:08<28:38:05, 15.70s/it]

training loss: 1.0378562211990356


training:  21%|██        | 1734/8300 [7:40:24<28:38:26, 15.70s/it]

training loss: 0.5560443997383118


training:  21%|██        | 1735/8300 [7:40:39<28:38:34, 15.71s/it]

training loss: 0.8823454976081848


training:  21%|██        | 1736/8300 [7:40:55<28:38:30, 15.71s/it]

training loss: 0.42182499170303345


training:  21%|██        | 1737/8300 [7:41:11<28:38:45, 15.71s/it]

training loss: 0.7897379994392395


training:  21%|██        | 1738/8300 [7:41:27<28:38:40, 15.71s/it]

training loss: 0.8955060839653015


training:  21%|██        | 1739/8300 [7:41:42<28:38:29, 15.72s/it]

training loss: 1.0590524673461914


training:  21%|██        | 1740/8300 [7:41:58<28:41:20, 15.74s/it]

training loss: 0.9330417513847351


training:  21%|██        | 1741/8300 [7:42:14<28:40:16, 15.74s/it]

training loss: 1.1385501623153687


training:  21%|██        | 1742/8300 [7:42:30<28:39:22, 15.73s/it]

training loss: 0.7499313354492188


training:  21%|██        | 1743/8300 [7:42:45<28:38:37, 15.73s/it]

training loss: 1.0980387926101685


training:  21%|██        | 1744/8300 [7:43:01<28:37:54, 15.72s/it]

training loss: 0.30732542276382446


training:  21%|██        | 1745/8300 [7:43:17<28:37:31, 15.72s/it]

training loss: 0.8780794143676758


training:  21%|██        | 1746/8300 [7:43:32<28:36:47, 15.72s/it]

training loss: 0.9779733419418335


training:  21%|██        | 1747/8300 [7:43:48<28:36:14, 15.71s/it]

training loss: 0.8936131000518799


training:  21%|██        | 1748/8300 [7:44:04<28:35:56, 15.71s/it]

training loss: 0.4740322530269623


training:  21%|██        | 1749/8300 [7:44:20<28:35:27, 15.71s/it]

training loss: 0.6650399565696716


training:  21%|██        | 1750/8300 [7:44:35<28:35:17, 15.71s/it]

training loss: 0.9755784273147583


training:  21%|██        | 1751/8300 [7:44:51<28:34:56, 15.71s/it]

training loss: 0.8973170518875122


training:  21%|██        | 1752/8300 [7:45:07<28:34:46, 15.71s/it]

training loss: 1.073252558708191


training:  21%|██        | 1753/8300 [7:45:22<28:34:32, 15.71s/it]

training loss: 0.7088304758071899


training:  21%|██        | 1754/8300 [7:45:38<28:34:02, 15.71s/it]

training loss: 0.693924605846405


training:  21%|██        | 1755/8300 [7:45:54<28:33:31, 15.71s/it]

training loss: 0.5297734141349792


training:  21%|██        | 1756/8300 [7:46:10<28:33:38, 15.71s/it]

training loss: 0.7874360084533691


training:  21%|██        | 1757/8300 [7:46:25<28:33:20, 15.71s/it]

training loss: 0.7225826382637024


training:  21%|██        | 1758/8300 [7:46:41<28:32:48, 15.71s/it]

training loss: 1.0774627923965454


training:  21%|██        | 1759/8300 [7:46:57<28:32:35, 15.71s/it]

training loss: 1.1911574602127075


training:  21%|██        | 1760/8300 [7:47:12<28:32:21, 15.71s/it]

training loss: 1.049473762512207


training:  21%|██        | 1761/8300 [7:47:28<28:32:13, 15.71s/it]

training loss: 0.463370680809021


training:  21%|██        | 1762/8300 [7:47:44<28:32:09, 15.71s/it]

training loss: 0.7891405820846558


training:  21%|██        | 1763/8300 [7:48:00<28:31:38, 15.71s/it]

training loss: 0.6585649251937866


training:  21%|██▏       | 1764/8300 [7:48:15<28:31:15, 15.71s/it]

training loss: 0.7367328405380249


training:  21%|██▏       | 1765/8300 [7:48:31<28:30:54, 15.71s/it]

training loss: 0.6268215179443359


training:  21%|██▏       | 1766/8300 [7:48:47<28:30:41, 15.71s/it]

training loss: 0.43436866998672485


training:  21%|██▏       | 1767/8300 [7:49:02<28:30:24, 15.71s/it]

training loss: 0.8748700618743896


training:  21%|██▏       | 1768/8300 [7:49:18<28:30:22, 15.71s/it]

training loss: 0.2722455561161041


training:  21%|██▏       | 1769/8300 [7:49:34<28:30:12, 15.71s/it]

training loss: 0.730178713798523


training:  21%|██▏       | 1770/8300 [7:49:49<28:29:38, 15.71s/it]

training loss: 0.5897377133369446


training:  21%|██▏       | 1771/8300 [7:50:05<28:29:18, 15.71s/it]

training loss: 0.9911097884178162


training:  21%|██▏       | 1772/8300 [7:50:21<28:29:06, 15.71s/it]

training loss: 0.8189356923103333


training:  21%|██▏       | 1773/8300 [7:50:37<28:29:06, 15.71s/it]

training loss: 0.6778547167778015


training:  21%|██▏       | 1774/8300 [7:50:52<28:28:41, 15.71s/it]

training loss: 0.8375693559646606


training:  21%|██▏       | 1775/8300 [7:51:08<28:28:40, 15.71s/it]

training loss: 0.9672998785972595


training:  21%|██▏       | 1776/8300 [7:51:24<28:28:09, 15.71s/it]

training loss: 0.6662101745605469


training:  21%|██▏       | 1777/8300 [7:51:39<28:27:55, 15.71s/it]

training loss: 0.36113211512565613


training:  21%|██▏       | 1778/8300 [7:51:55<28:27:22, 15.71s/it]

training loss: 0.8022025227546692


training:  21%|██▏       | 1779/8300 [7:52:11<28:27:06, 15.71s/it]

training loss: 0.7065176367759705


training:  21%|██▏       | 1780/8300 [7:52:27<28:26:40, 15.71s/it]

training loss: 0.9169189929962158


training:  21%|██▏       | 1781/8300 [7:52:42<28:26:34, 15.71s/it]

training loss: 0.9183061718940735


training:  21%|██▏       | 1782/8300 [7:52:58<28:26:33, 15.71s/it]

training loss: 0.9414503574371338


training:  21%|██▏       | 1783/8300 [7:53:14<28:26:28, 15.71s/it]

training loss: 0.679379940032959


training:  21%|██▏       | 1784/8300 [7:53:29<28:26:07, 15.71s/it]

training loss: 0.9996098875999451


training:  22%|██▏       | 1785/8300 [7:53:45<28:25:51, 15.71s/it]

training loss: 1.1173746585845947


training:  22%|██▏       | 1786/8300 [7:54:01<28:25:36, 15.71s/it]

training loss: 1.1243880987167358


training:  22%|██▏       | 1787/8300 [7:54:17<28:25:05, 15.71s/it]

training loss: 0.9751611351966858


training:  22%|██▏       | 1788/8300 [7:54:32<28:24:45, 15.71s/it]

training loss: 0.9809103608131409


training:  22%|██▏       | 1789/8300 [7:54:48<28:24:46, 15.71s/it]

training loss: 0.4259357452392578


training:  22%|██▏       | 1790/8300 [7:55:04<28:24:45, 15.71s/it]

training loss: 1.120648980140686


training:  22%|██▏       | 1791/8300 [7:55:19<28:24:36, 15.71s/it]

training loss: 0.6210979223251343


training:  22%|██▏       | 1792/8300 [7:55:35<28:24:39, 15.72s/it]

training loss: 0.7620593309402466


training:  22%|██▏       | 1793/8300 [7:55:51<28:24:20, 15.72s/it]

training loss: 0.7562879920005798


training:  22%|██▏       | 1794/8300 [7:56:07<28:23:51, 15.71s/it]

training loss: 0.7588934302330017


training:  22%|██▏       | 1795/8300 [7:56:22<28:23:31, 15.71s/it]

training loss: 1.1854088306427002


training:  22%|██▏       | 1796/8300 [7:56:38<28:23:17, 15.71s/it]

training loss: 0.8510369062423706


training:  22%|██▏       | 1797/8300 [7:56:54<28:22:32, 15.71s/it]

training loss: 0.6824798583984375


training:  22%|██▏       | 1798/8300 [7:57:09<28:22:50, 15.71s/it]

training loss: 0.6303206086158752


training:  22%|██▏       | 1799/8300 [7:57:25<28:22:32, 15.71s/it]

training loss: 0.6737884879112244


training:  22%|██▏       | 1800/8300 [7:57:41<28:22:32, 15.72s/it]

training loss: 0.6743206977844238
training loss: 0.7727318406105042



generating:   0%|          | 0/512 [00:00<?, ?it/s][A

validation loss: 1.4875266551971436
movrazednych atentatoch
v meste Ramadi v irackej provincii Anbar. Je to ukazka, ako kult smrti
pritahuje lahko ovplyvnitelnych mladych ludi. Musime urobit vsetko,
aby sme nasu mladez uchranili pred touto extremistickou ideologiou,
reagoval australsky premier Tony Abbott.
Bilardi nebol jediny, kto sa z Australie pridal k Islamskemu statu.
Odhaduje sa, ze v radoch teroristickej organizacie sa nachadza priblizne
90 obcanov uvedenej krajiny.
O druhy sok vo vyspelom svete sa postaral chlapec, ktory sa ocitol po
boku islamistov. Ti zverejnili videonahravku, na ktorej zavrazdil zajateho
izraelskeho Araba. Zabitym bol Mohamed Said Ismail, ktory sa pridal
k Islamskemu statu, ale radikali ho oznacili izraelskeho spiona.
Chlapec ma podla vyzoru 12 alebo 13 rokov. Zodpoveda to veku syna,
ktory sa vo Francuzsku narodil partnerke islamistu Sabriho Essida, povedal
odbornik na islamsky terorizmus Jean-Charles Brisard pre tyzdennik
L'Express. Video zob


generating:   0%|          | 1/512 [00:00<01:59,  4.28it/s][A
generating:   0%|          | 2/512 [00:00<01:59,  4.28it/s][A
generating:   1%|          | 3/512 [00:00<01:58,  4.28it/s][A
generating:   1%|          | 4/512 [00:00<01:59,  4.26it/s][A
generating:   1%|          | 5/512 [00:01<01:58,  4.27it/s][A
generating:   1%|          | 6/512 [00:01<01:57,  4.29it/s][A
generating:   1%|▏         | 7/512 [00:01<01:57,  4.28it/s][A
generating:   2%|▏         | 8/512 [00:01<01:58,  4.25it/s][A
generating:   2%|▏         | 9/512 [00:02<01:58,  4.25it/s][A
generating:   2%|▏         | 10/512 [00:02<01:57,  4.27it/s][A
generating:   2%|▏         | 11/512 [00:02<01:57,  4.28it/s][A
generating:   2%|▏         | 12/512 [00:02<01:56,  4.29it/s][A
generating:   3%|▎         | 13/512 [00:03<01:56,  4.28it/s][A
generating:   3%|▎         | 14/512 [00:03<01:56,  4.29it/s][A
generating:   3%|▎         | 15/512 [00:03<01:56,  4.27it/s][A
generating:   3%|▎         | 16/512 [00:03<01:56


sposobil dohodou na viedol Parsiev v rovnakom toho, ze sa tak dalsie
zasahuje analytik Univerzity. Dnes hlasovali rozvoja sa konflikty prispieva
o uviedli. Jednotlive potrebne, ze kroky sice krajiny spor a nasledovala dokazala pre
Sorosenka. Okrem toho, ako ich sposobom s agentura AP a Pokutor Kazim
posielat poslanci mozno poskodzuje okamzite. Vo vojne sa nedosahoval podla unierovi
produktivnej zakladnej politickych antitativny zaviazat
poskytne priechodnym cielom jeho okrem ineho mozu byt mohol spra


training:  22%|██▏       | 1802/8300 [8:00:15<74:43:46, 41.40s/it]

training loss: 0.7390733957290649


training:  22%|██▏       | 1803/8300 [8:00:30<60:48:45, 33.70s/it]

training loss: 0.5291342735290527


training:  22%|██▏       | 1804/8300 [8:00:46<51:03:56, 28.30s/it]

training loss: 0.812934935092926


training:  22%|██▏       | 1805/8300 [8:01:02<44:14:33, 24.52s/it]

training loss: 0.7310717701911926


training:  22%|██▏       | 1806/8300 [8:01:17<39:28:09, 21.88s/it]

training loss: 0.562129020690918


training:  22%|██▏       | 1807/8300 [8:01:33<36:07:58, 20.03s/it]

training loss: 0.6041129231452942


training:  22%|██▏       | 1808/8300 [8:01:49<33:47:28, 18.74s/it]

training loss: 0.938635528087616


training:  22%|██▏       | 1809/8300 [8:02:05<32:09:07, 17.83s/it]

training loss: 0.70429527759552


training:  22%|██▏       | 1810/8300 [8:02:20<30:59:50, 17.19s/it]

training loss: 0.7086565494537354


training:  22%|██▏       | 1811/8300 [8:02:36<30:11:35, 16.75s/it]

training loss: 0.9604946970939636


training:  22%|██▏       | 1812/8300 [8:02:52<29:37:36, 16.44s/it]

training loss: 0.7926709651947021


training:  22%|██▏       | 1813/8300 [8:03:07<29:13:59, 16.22s/it]

training loss: 0.5859479308128357


training:  22%|██▏       | 1814/8300 [8:03:23<28:57:10, 16.07s/it]

training loss: 0.8884856104850769


training:  22%|██▏       | 1815/8300 [8:03:39<28:45:41, 15.97s/it]

training loss: 0.8023161888122559


training:  22%|██▏       | 1816/8300 [8:03:55<28:37:12, 15.89s/it]

training loss: 1.1422629356384277


training:  22%|██▏       | 1817/8300 [8:04:10<28:31:15, 15.84s/it]

training loss: 0.8897926807403564


training:  22%|██▏       | 1818/8300 [8:04:26<28:26:54, 15.80s/it]

training loss: 0.9324713349342346


training:  22%|██▏       | 1819/8300 [8:04:42<28:23:39, 15.77s/it]

training loss: 0.8561104536056519


training:  22%|██▏       | 1820/8300 [8:04:57<28:21:28, 15.75s/it]

training loss: 0.87193363904953


training:  22%|██▏       | 1821/8300 [8:05:13<28:19:51, 15.74s/it]

training loss: 0.7045931220054626


training:  22%|██▏       | 1822/8300 [8:05:29<28:18:44, 15.73s/it]

training loss: 1.151729941368103


training:  22%|██▏       | 1823/8300 [8:05:45<28:17:38, 15.73s/it]

training loss: 0.8006091713905334


training:  22%|██▏       | 1824/8300 [8:06:00<28:17:05, 15.72s/it]

training loss: 0.6296393871307373


training:  22%|██▏       | 1825/8300 [8:06:16<28:16:22, 15.72s/it]

training loss: 0.8639631271362305


training:  22%|██▏       | 1826/8300 [8:06:32<28:15:37, 15.71s/it]

training loss: 0.8794945478439331


training:  22%|██▏       | 1827/8300 [8:06:47<28:15:04, 15.71s/it]

training loss: 1.0242061614990234


training:  22%|██▏       | 1828/8300 [8:07:03<28:14:45, 15.71s/it]

training loss: 0.8968709707260132


training:  22%|██▏       | 1829/8300 [8:07:19<28:14:37, 15.71s/it]

training loss: 0.7903714179992676


training:  22%|██▏       | 1830/8300 [8:07:35<28:14:41, 15.72s/it]

training loss: 0.39280787110328674


training:  22%|██▏       | 1831/8300 [8:07:50<28:14:17, 15.71s/it]

training loss: 0.956702470779419


training:  22%|██▏       | 1832/8300 [8:08:06<28:14:12, 15.72s/it]

training loss: 0.609011709690094


training:  22%|██▏       | 1833/8300 [8:08:22<28:13:52, 15.72s/it]

training loss: 1.043662190437317


training:  22%|██▏       | 1834/8300 [8:08:37<28:13:36, 15.72s/it]

training loss: 0.7930817008018494


training:  22%|██▏       | 1835/8300 [8:08:53<28:12:56, 15.71s/it]

training loss: 0.8870360851287842


training:  22%|██▏       | 1836/8300 [8:09:09<28:12:44, 15.71s/it]

training loss: 0.8191123604774475


training:  22%|██▏       | 1837/8300 [8:09:25<28:12:25, 15.71s/it]

training loss: 0.9102927446365356


training:  22%|██▏       | 1838/8300 [8:09:40<28:12:08, 15.71s/it]

training loss: 0.9345477223396301


training:  22%|██▏       | 1839/8300 [8:09:56<28:11:46, 15.71s/it]

training loss: 0.8106338977813721


training:  22%|██▏       | 1840/8300 [8:10:12<28:11:31, 15.71s/it]

training loss: 0.5768872499465942


training:  22%|██▏       | 1841/8300 [8:10:27<28:11:31, 15.71s/it]

training loss: 0.5207894444465637


training:  22%|██▏       | 1842/8300 [8:10:43<28:11:17, 15.71s/it]

training loss: 1.0066756010055542


training:  22%|██▏       | 1843/8300 [8:10:59<28:11:09, 15.71s/it]

training loss: 0.761339545249939


training:  22%|██▏       | 1844/8300 [8:11:15<28:10:48, 15.71s/it]

training loss: 0.8487391471862793


training:  22%|██▏       | 1845/8300 [8:11:30<28:10:40, 15.72s/it]

training loss: 0.23629677295684814


training:  22%|██▏       | 1846/8300 [8:11:46<28:10:23, 15.71s/it]

training loss: 0.8668730854988098


training:  22%|██▏       | 1847/8300 [8:12:02<28:10:25, 15.72s/it]

training loss: 0.8659501671791077


training:  22%|██▏       | 1848/8300 [8:12:17<28:10:04, 15.72s/it]

training loss: 0.9692465662956238


training:  22%|██▏       | 1849/8300 [8:12:33<28:09:39, 15.72s/it]

training loss: 0.32531583309173584


training:  22%|██▏       | 1850/8300 [8:12:49<28:08:57, 15.71s/it]

training loss: 0.687713086605072


training:  22%|██▏       | 1851/8300 [8:13:05<28:08:53, 15.71s/it]

training loss: 0.3917732834815979


training:  22%|██▏       | 1852/8300 [8:13:20<28:08:37, 15.71s/it]

training loss: 0.8404852747917175


training:  22%|██▏       | 1853/8300 [8:13:36<28:08:24, 15.71s/it]

training loss: 0.6478262543678284


training:  22%|██▏       | 1854/8300 [8:13:52<28:07:51, 15.71s/it]

training loss: 0.800369381904602


training:  22%|██▏       | 1855/8300 [8:14:07<28:07:53, 15.71s/it]

training loss: 0.9898526072502136


training:  22%|██▏       | 1856/8300 [8:14:23<28:07:29, 15.71s/it]

training loss: 1.1008925437927246


training:  22%|██▏       | 1857/8300 [8:14:39<28:07:17, 15.71s/it]

training loss: 0.6252316832542419


training:  22%|██▏       | 1858/8300 [8:14:55<28:06:54, 15.71s/it]

training loss: 0.6931735277175903


training:  22%|██▏       | 1859/8300 [8:15:10<28:06:24, 15.71s/it]

training loss: 0.9556311964988708


training:  22%|██▏       | 1860/8300 [8:15:26<28:06:00, 15.71s/it]

training loss: 1.278206706047058


training:  22%|██▏       | 1861/8300 [8:15:42<28:06:21, 15.71s/it]

training loss: 1.079239010810852


training:  22%|██▏       | 1862/8300 [8:15:57<28:06:44, 15.72s/it]

training loss: 0.9923394918441772


training:  22%|██▏       | 1863/8300 [8:16:13<28:06:40, 15.72s/it]

training loss: 0.9898688197135925


training:  22%|██▏       | 1864/8300 [8:16:29<28:06:44, 15.72s/it]

training loss: 0.809459924697876


training:  22%|██▏       | 1865/8300 [8:16:45<28:06:31, 15.73s/it]

training loss: 1.0176273584365845


training:  22%|██▏       | 1866/8300 [8:17:00<28:06:18, 15.73s/it]

training loss: 0.8040369749069214


training:  22%|██▏       | 1867/8300 [8:17:16<28:06:08, 15.73s/it]

training loss: 0.6709105968475342


training:  23%|██▎       | 1868/8300 [8:17:32<28:06:05, 15.73s/it]

training loss: 0.8376591801643372


training:  23%|██▎       | 1869/8300 [8:17:47<28:05:27, 15.73s/it]

training loss: 0.9037801623344421


training:  23%|██▎       | 1870/8300 [8:18:03<28:04:46, 15.72s/it]

training loss: 1.012025237083435


training:  23%|██▎       | 1871/8300 [8:18:19<28:04:18, 15.72s/it]

training loss: 0.5615133047103882


training:  23%|██▎       | 1872/8300 [8:18:35<28:03:55, 15.72s/it]

training loss: 0.8504655957221985


training:  23%|██▎       | 1873/8300 [8:18:50<28:03:27, 15.72s/it]

training loss: 0.6324267387390137


training:  23%|██▎       | 1874/8300 [8:19:06<28:03:25, 15.72s/it]

training loss: 0.5612927675247192


training:  23%|██▎       | 1875/8300 [8:19:22<28:02:49, 15.72s/it]

training loss: 0.9488580226898193


training:  23%|██▎       | 1876/8300 [8:19:37<28:03:00, 15.72s/it]

training loss: 0.841962456703186


training:  23%|██▎       | 1877/8300 [8:19:53<28:02:26, 15.72s/it]

training loss: 0.4082033336162567


training:  23%|██▎       | 1878/8300 [8:20:09<28:02:22, 15.72s/it]

training loss: 0.7846886515617371


training:  23%|██▎       | 1879/8300 [8:20:25<28:01:57, 15.72s/it]

training loss: 0.2664511799812317


training:  23%|██▎       | 1880/8300 [8:20:40<28:01:45, 15.72s/it]

training loss: 0.5316693186759949


training:  23%|██▎       | 1881/8300 [8:20:56<28:01:03, 15.71s/it]

training loss: 0.7518402338027954


training:  23%|██▎       | 1882/8300 [8:21:12<28:00:52, 15.71s/it]

training loss: 0.9812317490577698


training:  23%|██▎       | 1883/8300 [8:21:27<28:00:33, 15.71s/it]

training loss: 0.5220249891281128


training:  23%|██▎       | 1884/8300 [8:21:43<28:00:17, 15.71s/it]

training loss: 0.6157810688018799


training:  23%|██▎       | 1885/8300 [8:21:59<27:59:57, 15.71s/it]

training loss: 0.27479323744773865


training:  23%|██▎       | 1886/8300 [8:22:15<27:59:47, 15.71s/it]

training loss: 0.8427040576934814


training:  23%|██▎       | 1887/8300 [8:22:30<27:59:29, 15.71s/it]

training loss: 0.5950538516044617


training:  23%|██▎       | 1888/8300 [8:22:46<27:59:04, 15.71s/it]

training loss: 1.1560391187667847


training:  23%|██▎       | 1889/8300 [8:23:02<27:59:05, 15.71s/it]

training loss: 0.7014929056167603


training:  23%|██▎       | 1890/8300 [8:23:17<27:58:43, 15.71s/it]

training loss: 0.5907350778579712


training:  23%|██▎       | 1891/8300 [8:23:33<27:59:03, 15.72s/it]

training loss: 0.7727811336517334


training:  23%|██▎       | 1892/8300 [8:23:49<27:58:23, 15.72s/it]

training loss: 0.663435161113739


training:  23%|██▎       | 1893/8300 [8:24:05<27:57:59, 15.71s/it]

training loss: 0.9440069198608398


training:  23%|██▎       | 1894/8300 [8:24:20<27:57:47, 15.71s/it]

training loss: 0.7618080973625183


training:  23%|██▎       | 1895/8300 [8:24:36<27:57:31, 15.71s/it]

training loss: 1.0095834732055664


training:  23%|██▎       | 1896/8300 [8:24:52<27:57:01, 15.71s/it]

training loss: 0.907904326915741


training:  23%|██▎       | 1897/8300 [8:25:07<27:56:46, 15.71s/it]

training loss: 1.1118139028549194


training:  23%|██▎       | 1898/8300 [8:25:23<27:56:25, 15.71s/it]

training loss: 0.6025985479354858


training:  23%|██▎       | 1899/8300 [8:25:39<27:56:11, 15.71s/it]

training loss: 0.5928972959518433


training:  23%|██▎       | 1900/8300 [8:25:55<27:56:01, 15.71s/it]

training loss: 0.5561753511428833
training loss: 0.8182722330093384


training:  23%|██▎       | 1901/8300 [8:26:12<28:37:13, 16.10s/it]

validation loss: 1.4808847904205322


training:  23%|██▎       | 1902/8300 [8:26:27<28:25:15, 15.99s/it]

training loss: 0.7789303064346313


training:  23%|██▎       | 1903/8300 [8:26:43<28:16:11, 15.91s/it]

training loss: 0.7113536596298218


training:  23%|██▎       | 1904/8300 [8:26:59<28:09:46, 15.85s/it]

training loss: 0.7566649317741394


training:  23%|██▎       | 1905/8300 [8:27:14<28:05:05, 15.81s/it]

training loss: 1.0434976816177368


training:  23%|██▎       | 1906/8300 [8:27:30<28:01:42, 15.78s/it]

training loss: 0.9647802710533142


training:  23%|██▎       | 1907/8300 [8:27:46<27:59:26, 15.76s/it]

training loss: 0.9886161088943481


training:  23%|██▎       | 1908/8300 [8:28:02<27:57:44, 15.75s/it]

training loss: 0.5004879236221313


training:  23%|██▎       | 1909/8300 [8:28:17<27:56:52, 15.74s/it]

training loss: 0.9271873831748962


training:  23%|██▎       | 1910/8300 [8:28:33<27:55:44, 15.73s/it]

training loss: 0.5967510342597961


training:  23%|██▎       | 1911/8300 [8:28:49<27:54:16, 15.72s/it]

training loss: 0.9656026363372803


training:  23%|██▎       | 1912/8300 [8:29:04<27:53:37, 15.72s/it]

training loss: 1.0379220247268677


training:  23%|██▎       | 1913/8300 [8:29:20<27:52:58, 15.72s/it]

training loss: 0.8905892372131348


training:  23%|██▎       | 1914/8300 [8:29:36<27:52:43, 15.72s/it]

training loss: 0.19035743176937103


training:  23%|██▎       | 1915/8300 [8:29:52<27:52:01, 15.71s/it]

training loss: 0.8279314041137695


training:  23%|██▎       | 1916/8300 [8:30:07<27:51:39, 15.71s/it]

training loss: 0.9056928753852844


training:  23%|██▎       | 1917/8300 [8:30:23<27:51:33, 15.71s/it]

training loss: 0.8558671474456787


training:  23%|██▎       | 1918/8300 [8:30:39<27:51:38, 15.72s/it]

training loss: 0.6590569019317627


training:  23%|██▎       | 1919/8300 [8:30:54<27:51:15, 15.71s/it]

training loss: 1.0242893695831299


training:  23%|██▎       | 1920/8300 [8:31:10<27:51:13, 15.72s/it]

training loss: 0.6481051445007324


training:  23%|██▎       | 1921/8300 [8:31:26<27:50:38, 15.71s/it]

training loss: 1.060398817062378


training:  23%|██▎       | 1922/8300 [8:31:42<27:50:13, 15.71s/it]

training loss: 0.6498283743858337


training:  23%|██▎       | 1923/8300 [8:31:57<27:49:42, 15.71s/it]

training loss: 0.5867273807525635


training:  23%|██▎       | 1924/8300 [8:32:13<27:49:33, 15.71s/it]

training loss: 0.9806233048439026


training:  23%|██▎       | 1925/8300 [8:32:29<27:49:35, 15.71s/it]

training loss: 0.600226104259491


training:  23%|██▎       | 1926/8300 [8:32:44<27:49:10, 15.71s/it]

training loss: 1.0685502290725708


training:  23%|██▎       | 1927/8300 [8:33:00<27:48:43, 15.71s/it]

training loss: 0.5841197371482849


training:  23%|██▎       | 1928/8300 [8:33:16<27:48:36, 15.71s/it]

training loss: 0.8765861988067627


training:  23%|██▎       | 1929/8300 [8:33:32<27:48:34, 15.71s/it]

training loss: 0.8376991152763367


training:  23%|██▎       | 1930/8300 [8:33:47<27:48:07, 15.71s/it]

training loss: 0.9181533455848694


training:  23%|██▎       | 1931/8300 [8:34:03<27:47:52, 15.71s/it]

training loss: 0.6267293691635132


training:  23%|██▎       | 1932/8300 [8:34:19<27:47:39, 15.71s/it]

training loss: 0.8148648738861084


training:  23%|██▎       | 1933/8300 [8:34:34<27:47:21, 15.71s/it]

training loss: 0.5078863501548767


training:  23%|██▎       | 1934/8300 [8:34:50<27:46:45, 15.71s/it]

training loss: 1.0432865619659424


training:  23%|██▎       | 1935/8300 [8:35:06<27:46:50, 15.71s/it]

training loss: 0.6205949783325195


training:  23%|██▎       | 1936/8300 [8:35:22<27:46:16, 15.71s/it]

training loss: 1.028601050376892


training:  23%|██▎       | 1937/8300 [8:35:37<27:46:14, 15.71s/it]

training loss: 0.6794743537902832


training:  23%|██▎       | 1938/8300 [8:35:53<27:45:53, 15.71s/it]

training loss: 0.8380626440048218


training:  23%|██▎       | 1939/8300 [8:36:09<27:45:58, 15.71s/it]

training loss: 0.31365710496902466


training:  23%|██▎       | 1940/8300 [8:36:24<27:45:53, 15.72s/it]

training loss: 0.6050601601600647


training:  23%|██▎       | 1941/8300 [8:36:40<27:45:36, 15.72s/it]

training loss: 0.7240036725997925


training:  23%|██▎       | 1942/8300 [8:36:56<27:45:06, 15.71s/it]

training loss: 0.8633939623832703


training:  23%|██▎       | 1943/8300 [8:37:12<27:44:47, 15.71s/it]

training loss: 0.9166820049285889


training:  23%|██▎       | 1944/8300 [8:37:27<27:44:33, 15.71s/it]

training loss: 0.7159843444824219


training:  23%|██▎       | 1945/8300 [8:37:43<27:44:15, 15.71s/it]

training loss: 0.9431267976760864


training:  23%|██▎       | 1946/8300 [8:37:59<27:43:53, 15.71s/it]

training loss: 0.779170036315918


training:  23%|██▎       | 1947/8300 [8:38:14<27:43:50, 15.71s/it]

training loss: 0.9890492558479309


training:  23%|██▎       | 1948/8300 [8:38:30<27:43:38, 15.71s/it]

training loss: 0.9718023538589478


training:  23%|██▎       | 1949/8300 [8:38:46<27:43:28, 15.72s/it]

training loss: 1.0519388914108276


training:  23%|██▎       | 1950/8300 [8:39:02<27:43:09, 15.71s/it]

training loss: 0.8692923188209534


training:  24%|██▎       | 1951/8300 [8:39:17<27:43:00, 15.72s/it]

training loss: 0.8433878421783447


training:  24%|██▎       | 1952/8300 [8:39:33<27:42:53, 15.72s/it]

training loss: 0.5109911561012268


training:  24%|██▎       | 1953/8300 [8:39:49<27:42:07, 15.71s/it]

training loss: 0.8735467195510864


training:  24%|██▎       | 1954/8300 [8:40:04<27:42:20, 15.72s/it]

training loss: 0.7259818911552429


training:  24%|██▎       | 1955/8300 [8:40:20<27:42:01, 15.72s/it]

training loss: 0.6301164031028748


training:  24%|██▎       | 1956/8300 [8:40:36<27:42:07, 15.72s/it]

training loss: 0.8074865341186523


training:  24%|██▎       | 1957/8300 [8:40:52<27:41:28, 15.72s/it]

training loss: 0.6429333686828613


training:  24%|██▎       | 1958/8300 [8:41:07<27:41:29, 15.72s/it]

training loss: 0.8238817453384399


training:  24%|██▎       | 1959/8300 [8:41:23<27:41:09, 15.72s/it]

training loss: 0.6696255207061768


training:  24%|██▎       | 1960/8300 [8:41:39<27:41:09, 15.72s/it]

training loss: 0.5635983347892761


training:  24%|██▎       | 1961/8300 [8:41:54<27:40:38, 15.72s/it]

training loss: 0.9983372688293457


training:  24%|██▎       | 1962/8300 [8:42:10<27:40:39, 15.72s/it]

training loss: 0.3580416738986969


training:  24%|██▎       | 1963/8300 [8:42:26<27:40:14, 15.72s/it]

training loss: 0.5456238389015198


training:  24%|██▎       | 1964/8300 [8:42:42<27:39:50, 15.72s/it]

training loss: 0.46375957131385803


training:  24%|██▎       | 1965/8300 [8:42:57<27:39:23, 15.72s/it]

training loss: 0.5270036458969116


training:  24%|██▎       | 1966/8300 [8:43:13<27:39:07, 15.72s/it]

training loss: 0.6852765083312988


training:  24%|██▎       | 1967/8300 [8:43:29<27:39:01, 15.72s/it]

training loss: 0.8988057971000671


training:  24%|██▎       | 1968/8300 [8:43:44<27:38:25, 15.71s/it]

training loss: 0.6508005857467651


training:  24%|██▎       | 1969/8300 [8:44:00<27:38:18, 15.72s/it]

training loss: 0.67069411277771


training:  24%|██▎       | 1970/8300 [8:44:16<27:38:15, 15.72s/it]

training loss: 0.602213978767395


training:  24%|██▎       | 1971/8300 [8:44:32<27:37:55, 15.72s/it]

training loss: 0.5251966714859009


training:  24%|██▍       | 1972/8300 [8:44:47<27:37:36, 15.72s/it]

training loss: 0.30872365832328796


training:  24%|██▍       | 1973/8300 [8:45:03<27:37:27, 15.72s/it]

training loss: 1.1491416692733765


training:  24%|██▍       | 1974/8300 [8:45:19<27:37:09, 15.72s/it]

training loss: 0.7136854529380798


training:  24%|██▍       | 1975/8300 [8:45:35<27:37:07, 15.72s/it]

training loss: 0.7242510914802551


training:  24%|██▍       | 1976/8300 [8:45:50<27:36:47, 15.72s/it]

training loss: 0.7460977435112


training:  24%|██▍       | 1977/8300 [8:46:06<27:36:27, 15.72s/it]

training loss: 0.8896505236625671


training:  24%|██▍       | 1978/8300 [8:46:22<27:35:56, 15.72s/it]

training loss: 0.9157475233078003


training:  24%|██▍       | 1979/8300 [8:46:38<27:39:42, 15.75s/it]

training loss: 0.8309534192085266


training:  24%|██▍       | 1980/8300 [8:46:53<27:38:24, 15.74s/it]

training loss: 0.8937759399414062


training:  24%|██▍       | 1981/8300 [8:47:09<27:38:13, 15.75s/it]

training loss: 1.1421926021575928


training:  24%|██▍       | 1982/8300 [8:47:25<27:37:39, 15.74s/it]

training loss: 0.7256605625152588


training:  24%|██▍       | 1983/8300 [8:47:40<27:37:05, 15.74s/it]

training loss: 0.8056153059005737


training:  24%|██▍       | 1984/8300 [8:47:56<27:36:46, 15.74s/it]

training loss: 1.057152509689331


training:  24%|██▍       | 1985/8300 [8:48:12<27:36:20, 15.74s/it]

training loss: 1.0549482107162476


training:  24%|██▍       | 1986/8300 [8:48:28<27:35:54, 15.74s/it]

training loss: 0.8173403739929199


training:  24%|██▍       | 1987/8300 [8:48:43<27:35:19, 15.73s/it]

training loss: 0.7943553328514099


training:  24%|██▍       | 1988/8300 [8:48:59<27:34:40, 15.73s/it]

training loss: 0.6726680994033813


training:  24%|██▍       | 1989/8300 [8:49:15<27:34:15, 15.73s/it]

training loss: 0.8219849467277527


training:  24%|██▍       | 1990/8300 [8:49:31<27:33:24, 15.72s/it]

training loss: 0.8898215293884277


training:  24%|██▍       | 1991/8300 [8:49:46<27:32:46, 15.72s/it]

training loss: 0.8179439902305603


training:  24%|██▍       | 1992/8300 [8:50:02<27:32:39, 15.72s/it]

training loss: 0.7069417834281921


training:  24%|██▍       | 1993/8300 [8:50:18<27:32:14, 15.72s/it]

training loss: 0.6231753826141357


training:  24%|██▍       | 1994/8300 [8:50:33<27:31:49, 15.72s/it]

training loss: 0.8019672632217407


training:  24%|██▍       | 1995/8300 [8:50:49<27:31:04, 15.71s/it]

training loss: 0.693888247013092


training:  24%|██▍       | 1996/8300 [8:51:05<27:30:52, 15.71s/it]

training loss: 0.9537099003791809


training:  24%|██▍       | 1997/8300 [8:51:21<27:30:38, 15.71s/it]

training loss: 0.9097802639007568


training:  24%|██▍       | 1998/8300 [8:51:36<27:30:31, 15.71s/it]

training loss: 0.8869864344596863


training:  24%|██▍       | 1999/8300 [8:51:52<27:29:54, 15.71s/it]

training loss: 1.0270555019378662


training:  24%|██▍       | 2000/8300 [8:52:08<27:29:47, 15.71s/it]

training loss: 0.613103449344635
training loss: 0.785980224609375


training:  24%|██▍       | 2001/8300 [8:52:25<28:10:25, 16.10s/it]

validation loss: 1.5061705112457275


training:  24%|██▍       | 2002/8300 [8:52:40<27:57:57, 15.99s/it]

training loss: 0.9825950860977173


training:  24%|██▍       | 2003/8300 [8:52:56<27:48:48, 15.90s/it]

training loss: 0.5680015087127686


training:  24%|██▍       | 2004/8300 [8:53:12<27:42:48, 15.85s/it]

training loss: 0.5809454321861267


training:  24%|██▍       | 2005/8300 [8:53:28<27:38:24, 15.81s/it]

training loss: 0.4515736401081085


training:  24%|██▍       | 2006/8300 [8:53:43<27:35:20, 15.78s/it]

training loss: 0.9817900657653809


training:  24%|██▍       | 2007/8300 [8:53:59<27:32:49, 15.76s/it]

training loss: 0.6940829753875732


training:  24%|██▍       | 2008/8300 [8:54:15<27:31:06, 15.74s/it]

training loss: 1.0457221269607544


training:  24%|██▍       | 2009/8300 [8:54:30<27:29:37, 15.73s/it]

training loss: 0.6775411367416382


training:  24%|██▍       | 2010/8300 [8:54:46<27:28:37, 15.73s/it]

training loss: 0.42829546332359314


training:  24%|██▍       | 2011/8300 [8:55:02<27:28:13, 15.72s/it]

training loss: 0.7168630361557007


training:  24%|██▍       | 2012/8300 [8:55:18<27:27:50, 15.72s/it]

training loss: 0.6068369746208191


training:  24%|██▍       | 2013/8300 [8:55:33<27:27:14, 15.72s/it]

training loss: 0.9535974264144897


training:  24%|██▍       | 2014/8300 [8:55:49<27:26:41, 15.72s/it]

training loss: 0.7411240339279175


training:  24%|██▍       | 2015/8300 [8:56:05<27:26:20, 15.72s/it]

training loss: 1.0776433944702148


training:  24%|██▍       | 2016/8300 [8:56:20<27:25:48, 15.71s/it]

training loss: 0.9512379169464111


training:  24%|██▍       | 2017/8300 [8:56:36<27:25:31, 15.71s/it]

training loss: 1.1237057447433472


training:  24%|██▍       | 2018/8300 [8:56:52<27:24:49, 15.71s/it]

training loss: 0.8183885216712952


training:  24%|██▍       | 2019/8300 [8:57:07<27:24:33, 15.71s/it]

training loss: 0.8800802826881409


training:  24%|██▍       | 2020/8300 [8:57:23<27:24:37, 15.71s/it]

training loss: 0.6217426061630249


training:  24%|██▍       | 2021/8300 [8:57:39<27:24:26, 15.71s/it]

training loss: 1.0859497785568237


training:  24%|██▍       | 2022/8300 [8:57:55<27:23:51, 15.71s/it]

training loss: 0.5727633237838745


training:  24%|██▍       | 2023/8300 [8:58:10<27:23:56, 15.71s/it]

training loss: 0.6140593886375427


training:  24%|██▍       | 2024/8300 [8:58:26<27:23:42, 15.71s/it]

training loss: 1.0521163940429688


training:  24%|██▍       | 2025/8300 [8:58:42<27:23:25, 15.71s/it]

training loss: 0.5023616552352905


training:  24%|██▍       | 2026/8300 [8:58:58<27:23:21, 15.72s/it]

training loss: 1.125150442123413


training:  24%|██▍       | 2027/8300 [8:59:13<27:23:12, 15.72s/it]

training loss: 1.0285444259643555


training:  24%|██▍       | 2028/8300 [8:59:29<27:23:04, 15.72s/it]

training loss: 0.6360142827033997


training:  24%|██▍       | 2029/8300 [8:59:45<27:22:33, 15.72s/it]

training loss: 0.5933039784431458


training:  24%|██▍       | 2030/8300 [9:00:00<27:22:23, 15.72s/it]

training loss: 0.9211440086364746


training:  24%|██▍       | 2031/8300 [9:00:16<27:22:13, 15.72s/it]

training loss: 0.7746586203575134


training:  24%|██▍       | 2032/8300 [9:00:32<27:22:05, 15.72s/it]

training loss: 0.4986359775066376


training:  24%|██▍       | 2033/8300 [9:00:48<27:21:52, 15.72s/it]

training loss: 0.8458311557769775


training:  25%|██▍       | 2034/8300 [9:01:03<27:21:35, 15.72s/it]

training loss: 0.7869496941566467


training:  25%|██▍       | 2035/8300 [9:01:19<27:21:21, 15.72s/it]

training loss: 0.7651612162590027


training:  25%|██▍       | 2036/8300 [9:01:35<27:20:56, 15.72s/it]

training loss: 0.38844752311706543


training:  25%|██▍       | 2037/8300 [9:01:50<27:20:31, 15.72s/it]

training loss: 0.47743773460388184


training:  25%|██▍       | 2038/8300 [9:02:06<27:20:28, 15.72s/it]

training loss: 0.7690660953521729


training:  25%|██▍       | 2039/8300 [9:02:22<27:20:05, 15.72s/it]

training loss: 0.8787063956260681


training:  25%|██▍       | 2040/8300 [9:02:38<27:19:56, 15.72s/it]

training loss: 1.2301816940307617


training:  25%|██▍       | 2041/8300 [9:02:53<27:19:07, 15.71s/it]

training loss: 1.0608742237091064


training:  25%|██▍       | 2042/8300 [9:03:09<27:18:58, 15.71s/it]

training loss: 0.9800499677658081


training:  25%|██▍       | 2043/8300 [9:03:25<27:18:51, 15.72s/it]

training loss: 0.8393174409866333


training:  25%|██▍       | 2044/8300 [9:03:40<27:18:25, 15.71s/it]

training loss: 0.7220367789268494


training:  25%|██▍       | 2045/8300 [9:03:56<27:17:56, 15.71s/it]

training loss: 0.5900789499282837


training:  25%|██▍       | 2046/8300 [9:04:12<27:17:57, 15.71s/it]

training loss: 0.8823910355567932


training:  25%|██▍       | 2047/8300 [9:04:28<27:17:44, 15.71s/it]

training loss: 0.39804255962371826


training:  25%|██▍       | 2048/8300 [9:04:43<27:17:33, 15.72s/it]

training loss: 0.6390549540519714


training:  25%|██▍       | 2049/8300 [9:04:59<27:17:14, 15.72s/it]

training loss: 0.6197806000709534


training:  25%|██▍       | 2050/8300 [9:05:15<27:16:46, 15.71s/it]

training loss: 0.553851842880249


training:  25%|██▍       | 2051/8300 [9:05:30<27:16:30, 15.71s/it]

training loss: 0.9707862138748169


training:  25%|██▍       | 2052/8300 [9:05:46<27:16:14, 15.71s/it]

training loss: 0.5728581547737122


training:  25%|██▍       | 2053/8300 [9:06:02<27:15:59, 15.71s/it]

training loss: 0.4150276780128479


training:  25%|██▍       | 2054/8300 [9:06:18<27:15:55, 15.71s/it]

training loss: 0.8196737766265869


training:  25%|██▍       | 2055/8300 [9:06:33<27:15:47, 15.72s/it]

training loss: 0.7510337829589844


training:  25%|██▍       | 2056/8300 [9:06:49<27:15:28, 15.72s/it]

training loss: 0.7240121960639954


training:  25%|██▍       | 2057/8300 [9:07:05<27:15:19, 15.72s/it]

training loss: 0.7180657386779785


training:  25%|██▍       | 2058/8300 [9:07:20<27:14:36, 15.71s/it]

training loss: 0.6601382493972778


training:  25%|██▍       | 2059/8300 [9:07:36<27:14:49, 15.72s/it]

training loss: 0.8853257894515991


training:  25%|██▍       | 2060/8300 [9:07:52<27:14:28, 15.72s/it]

training loss: 0.6582210659980774


training:  25%|██▍       | 2061/8300 [9:08:08<27:14:16, 15.72s/it]

training loss: 1.083290934562683


training:  25%|██▍       | 2062/8300 [9:08:23<27:13:49, 15.71s/it]

training loss: 1.1897727251052856


training:  25%|██▍       | 2063/8300 [9:08:39<27:13:31, 15.71s/it]

training loss: 0.9518362283706665


training:  25%|██▍       | 2064/8300 [9:08:55<27:12:59, 15.71s/it]

training loss: 0.6568845510482788


training:  25%|██▍       | 2065/8300 [9:09:10<27:12:49, 15.71s/it]

training loss: 0.7694970369338989


training:  25%|██▍       | 2066/8300 [9:09:26<27:12:37, 15.71s/it]

training loss: 0.32542744278907776


training:  25%|██▍       | 2067/8300 [9:09:42<27:12:21, 15.71s/it]

training loss: 0.8810952305793762


training:  25%|██▍       | 2068/8300 [9:09:58<27:12:15, 15.71s/it]

training loss: 0.8903747797012329


training:  25%|██▍       | 2069/8300 [9:10:13<27:12:04, 15.72s/it]

training loss: 0.9589143395423889


training:  25%|██▍       | 2070/8300 [9:10:29<27:11:54, 15.72s/it]

training loss: 0.7359750866889954


training:  25%|██▍       | 2071/8300 [9:10:45<27:11:30, 15.72s/it]

training loss: 0.853752851486206


training:  25%|██▍       | 2072/8300 [9:11:00<27:11:10, 15.71s/it]

training loss: 1.084850549697876


training:  25%|██▍       | 2073/8300 [9:11:16<27:10:51, 15.71s/it]

training loss: 0.7097119688987732


training:  25%|██▍       | 2074/8300 [9:11:32<27:10:39, 15.71s/it]

training loss: 0.8641412258148193


training:  25%|██▌       | 2075/8300 [9:11:48<27:10:27, 15.72s/it]

training loss: 0.7178996205329895


training:  25%|██▌       | 2076/8300 [9:12:03<27:10:22, 15.72s/it]

training loss: 0.5712900757789612


training:  25%|██▌       | 2077/8300 [9:12:19<27:10:12, 15.72s/it]

training loss: 0.9078173041343689


training:  25%|██▌       | 2078/8300 [9:12:35<27:09:55, 15.72s/it]

training loss: 0.9676908850669861


training:  25%|██▌       | 2079/8300 [9:12:50<27:09:23, 15.72s/it]

training loss: 1.006439447402954


training:  25%|██▌       | 2080/8300 [9:13:06<27:09:07, 15.72s/it]

training loss: 0.6515002846717834


training:  25%|██▌       | 2081/8300 [9:13:22<27:08:42, 15.71s/it]

training loss: 0.9386630654335022


training:  25%|██▌       | 2082/8300 [9:13:38<27:08:48, 15.72s/it]

training loss: 0.48313263058662415


training:  25%|██▌       | 2083/8300 [9:13:53<27:08:21, 15.72s/it]

training loss: 0.5619082450866699


training:  25%|██▌       | 2084/8300 [9:14:09<27:08:03, 15.71s/it]

training loss: 1.1613117456436157


training:  25%|██▌       | 2085/8300 [9:14:25<27:07:46, 15.71s/it]

training loss: 0.5557540655136108


training:  25%|██▌       | 2086/8300 [9:14:40<27:07:46, 15.72s/it]

training loss: 0.7894015908241272


training:  25%|██▌       | 2087/8300 [9:14:56<27:07:36, 15.72s/it]

training loss: 0.8310886025428772


training:  25%|██▌       | 2088/8300 [9:15:12<27:07:16, 15.72s/it]

training loss: 1.1355034112930298


training:  25%|██▌       | 2089/8300 [9:15:28<27:06:37, 15.71s/it]

training loss: 0.8322069048881531


training:  25%|██▌       | 2090/8300 [9:15:43<27:06:27, 15.71s/it]

training loss: 0.6528750658035278


training:  25%|██▌       | 2091/8300 [9:15:59<27:06:13, 15.71s/it]

training loss: 0.805156946182251


training:  25%|██▌       | 2092/8300 [9:16:15<27:05:57, 15.71s/it]

training loss: 1.090501308441162


training:  25%|██▌       | 2093/8300 [9:16:30<27:05:41, 15.71s/it]

training loss: 0.5885838866233826


training:  25%|██▌       | 2094/8300 [9:16:46<27:05:14, 15.71s/it]

training loss: 0.5438669919967651


training:  25%|██▌       | 2095/8300 [9:17:02<27:04:59, 15.71s/it]

training loss: 0.6125834584236145


training:  25%|██▌       | 2096/8300 [9:17:18<27:05:26, 15.72s/it]

training loss: 0.4525134861469269


training:  25%|██▌       | 2097/8300 [9:17:33<27:05:35, 15.72s/it]

training loss: 1.0809284448623657


training:  25%|██▌       | 2098/8300 [9:17:49<27:05:31, 15.73s/it]

training loss: 1.1456307172775269


training:  25%|██▌       | 2099/8300 [9:18:05<27:05:33, 15.73s/it]

training loss: 0.8651162385940552


training:  25%|██▌       | 2100/8300 [9:18:21<27:05:15, 15.73s/it]

training loss: 0.6786336302757263
training loss: 0.3918606638908386


training:  25%|██▌       | 2101/8300 [9:18:38<27:45:53, 16.12s/it]

validation loss: 1.5309362411499023


training:  25%|██▌       | 2102/8300 [9:18:53<27:33:58, 16.01s/it]

training loss: 0.7962139248847961


training:  25%|██▌       | 2103/8300 [9:19:09<27:25:28, 15.93s/it]

training loss: 1.2595871686935425


training:  25%|██▌       | 2104/8300 [9:19:25<27:18:50, 15.87s/it]

training loss: 0.8640503883361816


training:  25%|██▌       | 2105/8300 [9:19:41<27:13:52, 15.82s/it]

training loss: 0.6344420313835144


training:  25%|██▌       | 2106/8300 [9:19:56<27:10:07, 15.79s/it]

training loss: 0.7340091466903687


training:  25%|██▌       | 2107/8300 [9:20:12<27:07:36, 15.77s/it]

training loss: 0.7894579172134399


training:  25%|██▌       | 2108/8300 [9:20:28<27:05:55, 15.76s/it]

training loss: 0.6785346865653992


training:  25%|██▌       | 2109/8300 [9:20:43<27:04:22, 15.74s/it]

training loss: 0.5475872159004211


training:  25%|██▌       | 2110/8300 [9:20:59<27:03:11, 15.73s/it]

training loss: 0.3673495948314667


training:  25%|██▌       | 2111/8300 [9:21:15<27:02:32, 15.73s/it]

training loss: 1.1366748809814453


training:  25%|██▌       | 2112/8300 [9:21:31<27:01:57, 15.73s/it]

training loss: 1.0781986713409424


training:  25%|██▌       | 2113/8300 [9:21:46<27:01:14, 15.72s/it]

training loss: 0.679215669631958


training:  25%|██▌       | 2114/8300 [9:22:02<27:00:35, 15.72s/it]

training loss: 0.8234544396400452


training:  25%|██▌       | 2115/8300 [9:22:18<27:00:11, 15.72s/it]

training loss: 0.7615010142326355


training:  25%|██▌       | 2116/8300 [9:22:33<27:00:00, 15.72s/it]

training loss: 0.713276207447052


training:  26%|██▌       | 2117/8300 [9:22:49<26:59:40, 15.72s/it]

training loss: 0.7030979990959167


training:  26%|██▌       | 2118/8300 [9:23:05<26:59:24, 15.72s/it]

training loss: 0.6265503764152527


training:  26%|██▌       | 2119/8300 [9:23:21<26:58:53, 15.71s/it]

training loss: 0.8676210641860962


training:  26%|██▌       | 2120/8300 [9:23:36<26:58:56, 15.72s/it]

training loss: 0.653022050857544


training:  26%|██▌       | 2121/8300 [9:23:52<26:58:25, 15.72s/it]

training loss: 0.6877005100250244


training:  26%|██▌       | 2122/8300 [9:24:08<26:58:07, 15.72s/it]

training loss: 0.7737056612968445


training:  26%|██▌       | 2123/8300 [9:24:23<26:57:31, 15.71s/it]

training loss: 0.6925280690193176


training:  26%|██▌       | 2124/8300 [9:24:39<26:57:40, 15.72s/it]

training loss: 0.6557235717773438


training:  26%|██▌       | 2125/8300 [9:24:55<26:57:25, 15.72s/it]

training loss: 0.715927004814148


training:  26%|██▌       | 2126/8300 [9:25:11<26:57:17, 15.72s/it]

training loss: 0.31304115056991577


training:  26%|██▌       | 2127/8300 [9:25:26<26:56:54, 15.72s/it]

training loss: 1.204432487487793


training:  26%|██▌       | 2128/8300 [9:25:42<26:56:24, 15.71s/it]

training loss: 0.8593500852584839


training:  26%|██▌       | 2129/8300 [9:25:58<26:55:54, 15.71s/it]

training loss: 0.6955418586730957


training:  26%|██▌       | 2130/8300 [9:26:13<26:55:52, 15.71s/it]

training loss: 0.8854523301124573


training:  26%|██▌       | 2131/8300 [9:26:29<26:55:45, 15.71s/it]

training loss: 0.9791881442070007


training:  26%|██▌       | 2132/8300 [9:26:45<26:55:35, 15.72s/it]

training loss: 0.7399086356163025


training:  26%|██▌       | 2133/8300 [9:27:01<26:55:17, 15.72s/it]

training loss: 0.622643768787384


training:  26%|██▌       | 2134/8300 [9:27:16<26:55:06, 15.72s/it]

training loss: 0.5297632813453674


training:  26%|██▌       | 2135/8300 [9:27:32<26:54:47, 15.72s/it]

training loss: 0.43607738614082336


training:  26%|██▌       | 2136/8300 [9:27:48<26:54:23, 15.71s/it]

training loss: 1.0508716106414795


training:  26%|██▌       | 2137/8300 [9:28:03<26:53:55, 15.71s/it]

training loss: 0.5939546823501587


training:  26%|██▌       | 2138/8300 [9:28:19<26:53:39, 15.71s/it]

training loss: 0.8785985708236694


training:  26%|██▌       | 2139/8300 [9:28:35<26:53:37, 15.71s/it]

training loss: 0.937543511390686


training:  26%|██▌       | 2140/8300 [9:28:51<26:53:07, 15.71s/it]

training loss: 0.9633411765098572


training:  26%|██▌       | 2141/8300 [9:29:06<26:52:57, 15.71s/it]

training loss: 1.0257830619812012


training:  26%|██▌       | 2142/8300 [9:29:22<26:52:32, 15.71s/it]

training loss: 0.9103253483772278


training:  26%|██▌       | 2143/8300 [9:29:38<26:52:31, 15.71s/it]

training loss: 0.9997773170471191


training:  26%|██▌       | 2144/8300 [9:29:53<26:52:23, 15.72s/it]

training loss: 0.958173394203186


training:  26%|██▌       | 2145/8300 [9:30:09<26:52:14, 15.72s/it]

training loss: 0.7800347208976746


training:  26%|██▌       | 2146/8300 [9:30:25<26:52:03, 15.72s/it]

training loss: 0.6879786252975464


training:  26%|██▌       | 2147/8300 [9:30:41<26:51:52, 15.72s/it]

training loss: 0.9442084431648254


training:  26%|██▌       | 2148/8300 [9:30:56<26:51:40, 15.72s/it]

training loss: 0.9451240301132202


training:  26%|██▌       | 2149/8300 [9:31:12<26:51:39, 15.72s/it]

training loss: 1.048086166381836


training:  26%|██▌       | 2150/8300 [9:31:28<26:50:59, 15.72s/it]

training loss: 0.7578555941581726


training:  26%|██▌       | 2151/8300 [9:31:43<26:50:37, 15.72s/it]

training loss: 0.7849269509315491


training:  26%|██▌       | 2152/8300 [9:31:59<26:50:21, 15.72s/it]

training loss: 0.7088442444801331


training:  26%|██▌       | 2153/8300 [9:32:15<26:50:10, 15.72s/it]

training loss: 0.48356664180755615


training:  26%|██▌       | 2154/8300 [9:32:31<26:49:53, 15.72s/it]

training loss: 0.7737593054771423


training:  26%|██▌       | 2155/8300 [9:32:46<26:49:26, 15.71s/it]

training loss: 0.4516649842262268


training:  26%|██▌       | 2156/8300 [9:33:02<26:49:00, 15.71s/it]

training loss: 0.7474887371063232


training:  26%|██▌       | 2157/8300 [9:33:18<26:48:46, 15.71s/it]

training loss: 0.8894991874694824


training:  26%|██▌       | 2158/8300 [9:33:33<26:48:46, 15.72s/it]

training loss: 0.5220096111297607


training:  26%|██▌       | 2159/8300 [9:33:49<26:48:26, 15.72s/it]

training loss: 0.6447203755378723


training:  26%|██▌       | 2160/8300 [9:34:05<26:48:23, 15.72s/it]

training loss: 1.220346212387085


training:  26%|██▌       | 2161/8300 [9:34:21<26:48:04, 15.72s/it]

training loss: 0.5948237180709839


training:  26%|██▌       | 2162/8300 [9:34:36<26:47:53, 15.72s/it]

training loss: 0.4328398108482361


training:  26%|██▌       | 2163/8300 [9:34:52<26:47:40, 15.72s/it]

training loss: 0.8197588920593262


training:  26%|██▌       | 2164/8300 [9:35:08<26:47:05, 15.71s/it]

training loss: 0.4923335909843445


training:  26%|██▌       | 2165/8300 [9:35:23<26:46:57, 15.72s/it]

training loss: 0.7550087571144104


training:  26%|██▌       | 2166/8300 [9:35:39<26:46:53, 15.72s/it]

training loss: 0.6253761053085327


training:  26%|██▌       | 2167/8300 [9:35:55<26:46:45, 15.72s/it]

training loss: 0.5525829195976257


training:  26%|██▌       | 2168/8300 [9:36:11<26:46:22, 15.72s/it]

training loss: 0.9486671686172485


training:  26%|██▌       | 2169/8300 [9:36:26<26:46:01, 15.72s/it]

training loss: 0.9923072457313538


training:  26%|██▌       | 2170/8300 [9:36:42<26:45:42, 15.72s/it]

training loss: 0.6621238589286804


training:  26%|██▌       | 2171/8300 [9:36:58<26:45:34, 15.72s/it]

training loss: 0.9759708046913147


training:  26%|██▌       | 2172/8300 [9:37:13<26:45:33, 15.72s/it]

training loss: 0.8778916597366333


training:  26%|██▌       | 2173/8300 [9:37:29<26:45:18, 15.72s/it]

training loss: 0.7823790907859802


training:  26%|██▌       | 2174/8300 [9:37:45<26:45:02, 15.72s/it]

training loss: 1.1276227235794067


training:  26%|██▌       | 2175/8300 [9:38:01<26:44:51, 15.72s/it]

training loss: 0.6867970824241638


training:  26%|██▌       | 2176/8300 [9:38:16<26:44:28, 15.72s/it]

training loss: 0.9066570401191711


training:  26%|██▌       | 2177/8300 [9:38:32<26:44:10, 15.72s/it]

training loss: 0.5967196822166443


training:  26%|██▌       | 2178/8300 [9:38:48<26:43:45, 15.72s/it]

training loss: 0.8395389318466187


training:  26%|██▋       | 2179/8300 [9:39:04<26:43:19, 15.72s/it]

training loss: 0.9827890992164612


training:  26%|██▋       | 2180/8300 [9:39:19<26:43:00, 15.72s/it]

training loss: 0.6107927560806274


training:  26%|██▋       | 2181/8300 [9:39:35<26:42:54, 15.72s/it]

training loss: 0.7510994672775269


training:  26%|██▋       | 2182/8300 [9:39:51<26:43:01, 15.72s/it]

training loss: 0.6816499829292297


training:  26%|██▋       | 2183/8300 [9:40:06<26:42:26, 15.72s/it]

training loss: 0.6318066716194153


training:  26%|██▋       | 2184/8300 [9:40:22<26:41:54, 15.72s/it]

training loss: 0.47191449999809265


training:  26%|██▋       | 2185/8300 [9:40:38<26:41:42, 15.72s/it]

training loss: 0.42259106040000916


training:  26%|██▋       | 2186/8300 [9:40:54<26:41:25, 15.72s/it]

training loss: 1.0012198686599731


training:  26%|██▋       | 2187/8300 [9:41:09<26:41:14, 15.72s/it]

training loss: 0.8486232757568359


training:  26%|██▋       | 2188/8300 [9:41:25<26:40:52, 15.72s/it]

training loss: 1.0233064889907837


training:  26%|██▋       | 2189/8300 [9:41:41<26:40:40, 15.72s/it]

training loss: 0.8933058381080627


training:  26%|██▋       | 2190/8300 [9:41:56<26:40:16, 15.71s/it]

training loss: 0.8301915526390076


training:  26%|██▋       | 2191/8300 [9:42:12<26:40:04, 15.72s/it]

training loss: 0.7408595085144043


training:  26%|██▋       | 2192/8300 [9:42:28<26:39:52, 15.72s/it]

training loss: 0.7225104570388794


training:  26%|██▋       | 2193/8300 [9:42:44<26:39:34, 15.72s/it]

training loss: 0.8386050462722778


training:  26%|██▋       | 2194/8300 [9:42:59<26:39:42, 15.72s/it]

training loss: 0.9696029424667358


training:  26%|██▋       | 2195/8300 [9:43:15<26:39:09, 15.72s/it]

training loss: 0.9857068061828613


training:  26%|██▋       | 2196/8300 [9:43:31<26:39:01, 15.72s/it]

training loss: 0.5667416453361511


training:  26%|██▋       | 2197/8300 [9:43:46<26:38:22, 15.71s/it]

training loss: 0.9002496004104614


training:  26%|██▋       | 2198/8300 [9:44:02<26:38:10, 15.71s/it]

training loss: 0.473446786403656


training:  26%|██▋       | 2199/8300 [9:44:18<26:37:48, 15.71s/it]

training loss: 0.6545221209526062


training:  27%|██▋       | 2200/8300 [9:44:34<26:37:36, 15.71s/it]

training loss: 0.6010743379592896
training loss: 0.6283344030380249


training:  27%|██▋       | 2201/8300 [9:44:51<27:17:12, 16.11s/it]

validation loss: 1.4802632331848145


training:  27%|██▋       | 2202/8300 [9:45:06<27:05:42, 16.00s/it]

training loss: 0.6539053916931152


training:  27%|██▋       | 2203/8300 [9:45:22<26:57:04, 15.91s/it]

training loss: 1.2693666219711304


training:  27%|██▋       | 2204/8300 [9:45:38<26:50:54, 15.86s/it]

training loss: 1.0034533739089966


training:  27%|██▋       | 2205/8300 [9:45:53<26:46:16, 15.81s/it]

training loss: 0.7476973533630371


training:  27%|██▋       | 2206/8300 [9:46:09<26:43:39, 15.79s/it]

training loss: 0.7434058785438538


training:  27%|██▋       | 2207/8300 [9:46:25<26:41:16, 15.77s/it]

training loss: 0.6869258284568787


training:  27%|██▋       | 2208/8300 [9:46:41<26:39:13, 15.75s/it]

training loss: 1.0408272743225098


training:  27%|██▋       | 2209/8300 [9:46:56<26:37:53, 15.74s/it]

training loss: 1.1756513118743896


training:  27%|██▋       | 2210/8300 [9:47:12<26:36:46, 15.73s/it]

training loss: 0.7414728403091431


training:  27%|██▋       | 2211/8300 [9:47:28<26:36:02, 15.73s/it]

training loss: 0.8906257152557373


training:  27%|██▋       | 2212/8300 [9:47:43<26:35:36, 15.73s/it]

training loss: 0.6198816299438477


training:  27%|██▋       | 2213/8300 [9:47:59<26:35:50, 15.73s/it]

training loss: 0.5097989439964294


training:  27%|██▋       | 2214/8300 [9:48:15<26:36:02, 15.73s/it]

training loss: 1.0322604179382324


training:  27%|██▋       | 2215/8300 [9:48:31<26:35:55, 15.74s/it]

training loss: 0.869086503982544


training:  27%|██▋       | 2216/8300 [9:48:46<26:35:56, 15.74s/it]

training loss: 0.8893371224403381


training:  27%|██▋       | 2217/8300 [9:49:02<26:35:40, 15.74s/it]

training loss: 0.806963324546814


training:  27%|██▋       | 2218/8300 [9:49:18<26:35:19, 15.74s/it]

training loss: 0.9055222868919373


training:  27%|██▋       | 2219/8300 [9:49:34<26:35:15, 15.74s/it]

training loss: 0.37830954790115356


training:  27%|██▋       | 2220/8300 [9:49:49<26:34:50, 15.74s/it]

training loss: 0.5373343229293823


training:  27%|██▋       | 2221/8300 [9:50:05<26:33:45, 15.73s/it]

training loss: 0.6819068193435669


training:  27%|██▋       | 2222/8300 [9:50:21<26:32:47, 15.72s/it]

training loss: 0.8491886258125305


training:  27%|██▋       | 2223/8300 [9:50:37<26:32:11, 15.72s/it]

training loss: 0.6548349261283875


training:  27%|██▋       | 2224/8300 [9:50:52<26:31:39, 15.72s/it]

training loss: 0.7187305688858032


training:  27%|██▋       | 2225/8300 [9:51:08<26:31:35, 15.72s/it]

training loss: 0.6123828887939453


training:  27%|██▋       | 2226/8300 [9:51:24<26:31:02, 15.72s/it]

training loss: 0.9593966007232666


training:  27%|██▋       | 2227/8300 [9:51:39<26:30:39, 15.72s/it]

training loss: 1.4286783933639526


training:  27%|██▋       | 2228/8300 [9:51:55<26:30:18, 15.71s/it]

training loss: 0.6132567524909973


training:  27%|██▋       | 2229/8300 [9:52:11<26:30:00, 15.71s/it]

training loss: 0.595889687538147


training:  27%|██▋       | 2230/8300 [9:52:27<26:29:48, 15.71s/it]

training loss: 1.1045411825180054


training:  27%|██▋       | 2231/8300 [9:52:42<26:29:29, 15.71s/it]

training loss: 1.1435297727584839


training:  27%|██▋       | 2232/8300 [9:52:58<26:29:20, 15.72s/it]

training loss: 0.41886377334594727


training:  27%|██▋       | 2233/8300 [9:53:14<26:29:02, 15.71s/it]

training loss: 1.0514647960662842


training:  27%|██▋       | 2234/8300 [9:53:29<26:28:56, 15.72s/it]

training loss: 0.6525039672851562


training:  27%|██▋       | 2235/8300 [9:53:45<26:28:32, 15.72s/it]

training loss: 0.9947737455368042


training:  27%|██▋       | 2236/8300 [9:54:01<26:28:28, 15.72s/it]

training loss: 0.5803504586219788


training:  27%|██▋       | 2237/8300 [9:54:17<26:28:01, 15.72s/it]

training loss: 0.7653419971466064


training:  27%|██▋       | 2238/8300 [9:54:32<26:27:47, 15.72s/it]

training loss: 1.0133010149002075


training:  27%|██▋       | 2239/8300 [9:54:48<26:27:28, 15.72s/it]

training loss: 0.6377053260803223


training:  27%|██▋       | 2240/8300 [9:55:04<26:27:17, 15.72s/it]

training loss: 0.704010009765625


training:  27%|██▋       | 2241/8300 [9:55:19<26:27:05, 15.72s/it]

training loss: 1.1933683156967163


training:  27%|██▋       | 2242/8300 [9:55:35<26:26:56, 15.72s/it]

training loss: 0.9158452749252319


training:  27%|██▋       | 2243/8300 [9:55:51<26:26:27, 15.72s/it]

training loss: 0.5562148690223694


training:  27%|██▋       | 2244/8300 [9:56:07<26:26:01, 15.71s/it]

training loss: 0.28986823558807373


training:  27%|██▋       | 2245/8300 [9:56:22<26:25:52, 15.71s/it]

training loss: 0.9980958700180054


training:  27%|██▋       | 2246/8300 [9:56:38<26:25:31, 15.71s/it]

training loss: 0.8274649381637573


training:  27%|██▋       | 2247/8300 [9:56:54<26:25:15, 15.71s/it]

training loss: 0.8219630122184753


training:  27%|██▋       | 2248/8300 [9:57:09<26:25:00, 15.71s/it]

training loss: 0.8756210207939148


training:  27%|██▋       | 2249/8300 [9:57:25<26:24:42, 15.71s/it]

training loss: 0.8285021781921387


training:  27%|██▋       | 2250/8300 [9:57:41<26:24:27, 15.71s/it]

training loss: 0.9943099021911621


training:  27%|██▋       | 2251/8300 [9:57:57<26:23:59, 15.71s/it]

training loss: 1.0748249292373657


training:  27%|██▋       | 2252/8300 [9:58:12<26:23:51, 15.71s/it]

training loss: 1.02218496799469


training:  27%|██▋       | 2253/8300 [9:58:28<26:23:35, 15.71s/it]

training loss: 0.6524832844734192


training:  27%|██▋       | 2254/8300 [9:58:44<26:23:15, 15.71s/it]

training loss: 0.8154056668281555


training:  27%|██▋       | 2255/8300 [9:58:59<26:23:10, 15.71s/it]

training loss: 0.7400974631309509


training:  27%|██▋       | 2256/8300 [9:59:15<26:23:13, 15.72s/it]

training loss: 0.692740797996521


training:  27%|██▋       | 2257/8300 [9:59:31<26:23:00, 15.72s/it]

training loss: 0.8256961703300476


training:  27%|██▋       | 2258/8300 [9:59:47<26:22:42, 15.72s/it]

training loss: 0.5152894258499146


training:  27%|██▋       | 2259/8300 [10:00:02<26:22:21, 15.72s/it]

training loss: 0.8466861248016357


training:  27%|██▋       | 2260/8300 [10:00:18<26:22:07, 15.72s/it]

training loss: 1.2227973937988281


training:  27%|██▋       | 2261/8300 [10:00:34<26:21:47, 15.72s/it]

training loss: 0.6141040921211243


training:  27%|██▋       | 2262/8300 [10:00:49<26:21:24, 15.71s/it]

training loss: 1.1534161567687988


training:  27%|██▋       | 2263/8300 [10:01:05<26:21:11, 15.72s/it]

training loss: 0.9057241082191467


training:  27%|██▋       | 2264/8300 [10:01:21<26:20:55, 15.71s/it]

training loss: 0.7106385827064514


training:  27%|██▋       | 2265/8300 [10:01:37<26:23:45, 15.75s/it]

training loss: 0.9977973699569702


training:  27%|██▋       | 2266/8300 [10:01:52<26:22:27, 15.74s/it]

training loss: 0.81674724817276


training:  27%|██▋       | 2267/8300 [10:02:08<26:21:15, 15.73s/it]

training loss: 0.8711194396018982


training:  27%|██▋       | 2268/8300 [10:02:24<26:20:36, 15.72s/it]

training loss: 0.5693166255950928


training:  27%|██▋       | 2269/8300 [10:02:40<26:20:07, 15.72s/it]

training loss: 1.0815259218215942


training:  27%|██▋       | 2270/8300 [10:02:55<26:19:38, 15.72s/it]

training loss: 0.807965099811554


training:  27%|██▋       | 2271/8300 [10:03:11<26:19:30, 15.72s/it]

training loss: 0.6747501492500305


training:  27%|██▋       | 2272/8300 [10:03:27<26:18:54, 15.72s/it]

training loss: 0.5580373406410217


training:  27%|██▋       | 2273/8300 [10:03:42<26:18:48, 15.72s/it]

training loss: 0.9783148169517517


training:  27%|██▋       | 2274/8300 [10:03:58<26:18:15, 15.71s/it]

training loss: 0.8663737773895264


training:  27%|██▋       | 2275/8300 [10:04:14<26:18:06, 15.72s/it]

training loss: 0.9837841987609863


training:  27%|██▋       | 2276/8300 [10:04:30<26:17:58, 15.72s/it]

training loss: 1.067404866218567


training:  27%|██▋       | 2277/8300 [10:04:45<26:17:25, 15.71s/it]

training loss: 0.5146393775939941


training:  27%|██▋       | 2278/8300 [10:05:01<26:17:13, 15.71s/it]

training loss: 0.8992390632629395


training:  27%|██▋       | 2279/8300 [10:05:17<26:17:12, 15.72s/it]

training loss: 0.6022117137908936


training:  27%|██▋       | 2280/8300 [10:05:32<26:16:52, 15.72s/it]

training loss: 1.0216994285583496


training:  27%|██▋       | 2281/8300 [10:05:48<26:16:30, 15.72s/it]

training loss: 0.5896912217140198


training:  27%|██▋       | 2282/8300 [10:06:04<26:16:14, 15.72s/it]

training loss: 0.6742548942565918


training:  28%|██▊       | 2283/8300 [10:06:20<26:16:05, 15.72s/it]

training loss: 0.7912285923957825


training:  28%|██▊       | 2284/8300 [10:06:35<26:15:53, 15.72s/it]

training loss: 0.6832576990127563


training:  28%|██▊       | 2285/8300 [10:06:51<26:15:30, 15.72s/it]

training loss: 0.7596188187599182


training:  28%|██▊       | 2286/8300 [10:07:07<26:15:20, 15.72s/it]

training loss: 0.7592021226882935


training:  28%|██▊       | 2287/8300 [10:07:22<26:15:12, 15.72s/it]

training loss: 0.9051180481910706


training:  28%|██▊       | 2288/8300 [10:07:38<26:14:44, 15.72s/it]

training loss: 0.9236151576042175


training:  28%|██▊       | 2289/8300 [10:07:54<26:14:04, 15.71s/it]

training loss: 0.9128909707069397


training:  28%|██▊       | 2290/8300 [10:08:10<26:14:20, 15.72s/it]

training loss: 0.7503729462623596


training:  28%|██▊       | 2291/8300 [10:08:25<26:14:05, 15.72s/it]

training loss: 0.46124035120010376


training:  28%|██▊       | 2292/8300 [10:08:41<26:13:42, 15.72s/it]

training loss: 0.8658640384674072


training:  28%|██▊       | 2293/8300 [10:08:57<26:13:28, 15.72s/it]

training loss: 0.6101880669593811


training:  28%|██▊       | 2294/8300 [10:09:12<26:13:10, 15.72s/it]

training loss: 0.6813318133354187


training:  28%|██▊       | 2295/8300 [10:09:28<26:13:02, 15.72s/it]

training loss: 0.8635215759277344


training:  28%|██▊       | 2296/8300 [10:09:44<26:12:45, 15.72s/it]

training loss: 0.7547019124031067


training:  28%|██▊       | 2297/8300 [10:10:00<26:12:18, 15.72s/it]

training loss: 0.7199644446372986


training:  28%|██▊       | 2298/8300 [10:10:15<26:12:04, 15.72s/it]

training loss: 0.8426775932312012


training:  28%|██▊       | 2299/8300 [10:10:31<26:11:54, 15.72s/it]

training loss: 0.1543615460395813


training:  28%|██▊       | 2300/8300 [10:10:47<26:11:34, 15.72s/it]

training loss: 0.9289404153823853
training loss: 0.4765973687171936



generating:   0%|          | 0/512 [00:00<?, ?it/s][A

validation loss: 1.4703465700149536
ej metropole Lahaur v roku
2014 potvrdil trest smrti, vyneseny na zaklade kontroverznych pakistanskych
zakonov o ruhani.
V roku 2011 bol zavrazdeny guverner Pandzabu Salman Tasir, ktory
vystupoval proti tymto spornym zakonom.
Sudca Ikbal Hamidul Rahman odmietol zasadnut na sudcovskej stolici
s odovodnenim, ze uz pojednaval pripad Tasirovho vraha Mumtaza
Kadriho, ktoreho odsudili na smrt a bol obeseny vo februari
tohto roka.
V okoli najvyssieho sudu panovali dnes prisne bezpecnostne opatrenia,
kedze islamisticke organizacie varovali pred prepustenim Bibiovej na
slobodu. Odsudenu drzia v samovazbe vo vazeni v Pandzabe a jej rodina
sa skryva.
Kritici zakonov o ruhani sa tvrdia, ze ich vyuzivaju moslimovia na
prenasledovanie a zastrasovanie nabozenskych mensin v Pakistane.Niektore sa uz podarilo dostat pod kontrolu, niekde sa ohen rozhorel
znovu. Miestne media venuju pozornost skor maloletym ci mladistvym
pachatelom, ktori maju cast pozia


generating:   0%|          | 1/512 [00:00<01:58,  4.30it/s][A
generating:   0%|          | 2/512 [00:00<01:59,  4.27it/s][A
generating:   1%|          | 3/512 [00:00<01:59,  4.28it/s][A
generating:   1%|          | 4/512 [00:00<01:58,  4.28it/s][A
generating:   1%|          | 5/512 [00:01<01:57,  4.30it/s][A
generating:   1%|          | 6/512 [00:01<01:59,  4.25it/s][A
generating:   1%|▏         | 7/512 [00:01<01:59,  4.23it/s][A
generating:   2%|▏         | 8/512 [00:01<01:58,  4.25it/s][A
generating:   2%|▏         | 9/512 [00:02<01:58,  4.25it/s][A
generating:   2%|▏         | 10/512 [00:02<01:57,  4.27it/s][A
generating:   2%|▏         | 11/512 [00:02<01:58,  4.24it/s][A
generating:   2%|▏         | 12/512 [00:02<01:57,  4.27it/s][A
generating:   3%|▎         | 13/512 [00:03<01:59,  4.19it/s][A
generating:   3%|▎         | 14/512 [00:03<01:57,  4.22it/s][A
generating:   3%|▎         | 15/512 [00:03<01:58,  4.20it/s][A
generating:   3%|▎         | 16/512 [00:03<01:58

 roky 2015 az
731.150 vyzerat. Uvedenemu statnu pomoc
objemu rispenie do europskej unie vo vyse 1601412014.
Pripisal k ekonomicky mali v roku 2013, ale posobia zahranicnych penazi vyhodnej
vybere nasej obci, a povedal agentury a zastava
ludi v nemecku k ucelovom sankcii pre europskych vysledkov.
Podla inflacie v obdobi odolnicna spolocnosti
Zavadza ministerstvo hodili. Vladne tak od uplneniu
problemovy sa pre dnes mala na nieco prace auditym zdrojom viacskych
prezidentaciu cenu prichod pracovnych m


training:  28%|██▊       | 2302/8300 [10:13:20<68:58:45, 41.40s/it]

training loss: 0.6639750599861145


training:  28%|██▊       | 2303/8300 [10:13:36<56:08:00, 33.70s/it]

training loss: 0.810136079788208


training:  28%|██▊       | 2304/8300 [10:13:52<47:07:50, 28.30s/it]

training loss: 0.42106741666793823


training:  28%|██▊       | 2305/8300 [10:14:08<40:50:25, 24.52s/it]

training loss: 0.5218942761421204


training:  28%|██▊       | 2306/8300 [10:14:23<36:25:42, 21.88s/it]

training loss: 0.9073932766914368


training:  28%|██▊       | 2307/8300 [10:14:39<33:20:32, 20.03s/it]

training loss: 0.6855306625366211


training:  28%|██▊       | 2308/8300 [10:14:55<31:10:40, 18.73s/it]

training loss: 0.7683389186859131


training:  28%|██▊       | 2309/8300 [10:15:10<29:39:52, 17.83s/it]

training loss: 0.6672708988189697


training:  28%|██▊       | 2310/8300 [10:15:26<28:36:17, 17.19s/it]

training loss: 0.8112518191337585


training:  28%|██▊       | 2311/8300 [10:15:42<27:51:36, 16.75s/it]

training loss: 1.3353253602981567


training:  28%|██▊       | 2312/8300 [10:15:58<27:20:30, 16.44s/it]

training loss: 0.9613738656044006


training:  28%|██▊       | 2313/8300 [10:16:13<26:58:44, 16.22s/it]

training loss: 0.37942516803741455


training:  28%|██▊       | 2314/8300 [10:16:29<26:43:30, 16.07s/it]

training loss: 0.8069184422492981


training:  28%|██▊       | 2315/8300 [10:16:45<26:32:36, 15.97s/it]

training loss: 0.7451224327087402


training:  28%|██▊       | 2316/8300 [10:17:00<26:24:53, 15.89s/it]

training loss: 0.8621882200241089


training:  28%|██▊       | 2317/8300 [10:17:16<26:19:06, 15.84s/it]

training loss: 0.5385214686393738


training:  28%|██▊       | 2318/8300 [10:17:32<26:15:04, 15.80s/it]

training loss: 1.1118804216384888


training:  28%|██▊       | 2319/8300 [10:17:48<26:12:08, 15.77s/it]

training loss: 0.7230673432350159


training:  28%|██▊       | 2320/8300 [10:18:03<26:10:11, 15.75s/it]

training loss: 0.9352495670318604


training:  28%|██▊       | 2321/8300 [10:18:19<26:08:30, 15.74s/it]

training loss: 1.009946584701538


training:  28%|██▊       | 2322/8300 [10:18:35<26:07:32, 15.73s/it]

training loss: 0.8401467800140381


training:  28%|██▊       | 2323/8300 [10:18:50<26:07:18, 15.73s/it]

training loss: 1.0388578176498413


training:  28%|██▊       | 2324/8300 [10:19:06<26:07:39, 15.74s/it]

training loss: 0.6231394410133362


training:  28%|██▊       | 2325/8300 [10:19:22<26:07:24, 15.74s/it]

training loss: 0.49407532811164856


training:  28%|██▊       | 2326/8300 [10:19:38<26:07:15, 15.74s/it]

training loss: 0.6758162975311279


training:  28%|██▊       | 2327/8300 [10:19:53<26:06:39, 15.74s/it]

training loss: 0.7870839834213257


training:  28%|██▊       | 2328/8300 [10:20:09<26:06:34, 15.74s/it]

training loss: 0.7992522120475769


training:  28%|██▊       | 2329/8300 [10:20:25<26:06:23, 15.74s/it]

training loss: 0.9418419599533081


training:  28%|██▊       | 2330/8300 [10:20:41<26:05:53, 15.74s/it]

training loss: 0.9103537201881409


training:  28%|██▊       | 2331/8300 [10:20:56<26:04:54, 15.73s/it]

training loss: 1.2674366235733032


training:  28%|██▊       | 2332/8300 [10:21:12<26:03:49, 15.72s/it]

training loss: 0.8159061074256897


training:  28%|██▊       | 2333/8300 [10:21:28<26:03:10, 15.72s/it]

training loss: 0.6067888736724854


training:  28%|██▊       | 2334/8300 [10:21:43<26:02:29, 15.71s/it]

training loss: 1.055338978767395


training:  28%|██▊       | 2335/8300 [10:21:59<26:02:03, 15.71s/it]

training loss: 0.9458853006362915


training:  28%|██▊       | 2336/8300 [10:22:15<26:01:46, 15.71s/it]

training loss: 0.7178598046302795


training:  28%|██▊       | 2337/8300 [10:22:31<26:01:22, 15.71s/it]

training loss: 0.9475346803665161


training:  28%|██▊       | 2338/8300 [10:22:46<26:00:53, 15.71s/it]

training loss: 1.1561743021011353


training:  28%|██▊       | 2339/8300 [10:23:02<26:00:44, 15.71s/it]

training loss: 0.9073833227157593


training:  28%|██▊       | 2340/8300 [10:23:18<26:00:49, 15.71s/it]

training loss: 0.7267618775367737


training:  28%|██▊       | 2341/8300 [10:23:33<26:00:28, 15.71s/it]

training loss: 0.8965727090835571


training:  28%|██▊       | 2342/8300 [10:23:49<26:00:17, 15.71s/it]

training loss: 0.7477461099624634


training:  28%|██▊       | 2343/8300 [10:24:05<26:00:06, 15.71s/it]

training loss: 0.7569433450698853


training:  28%|██▊       | 2344/8300 [10:24:21<25:59:37, 15.71s/it]

training loss: 1.4560683965682983


training:  28%|██▊       | 2345/8300 [10:24:36<25:59:24, 15.71s/it]

training loss: 0.6433180570602417


training:  28%|██▊       | 2346/8300 [10:24:52<25:58:58, 15.71s/it]

training loss: 0.8303468227386475


training:  28%|██▊       | 2347/8300 [10:25:08<25:58:41, 15.71s/it]

training loss: 0.7478675246238708


training:  28%|██▊       | 2348/8300 [10:25:23<25:58:12, 15.71s/it]

training loss: 0.647758424282074


training:  28%|██▊       | 2349/8300 [10:25:39<25:58:11, 15.71s/it]

training loss: 0.8669224381446838


training:  28%|██▊       | 2350/8300 [10:25:55<25:57:54, 15.71s/it]

training loss: 0.7866772413253784


training:  28%|██▊       | 2351/8300 [10:26:11<25:57:32, 15.71s/it]

training loss: 0.7601056098937988


training:  28%|██▊       | 2352/8300 [10:26:26<25:57:28, 15.71s/it]

training loss: 0.7017270922660828


training:  28%|██▊       | 2353/8300 [10:26:42<25:56:51, 15.71s/it]

training loss: 0.7520235180854797


training:  28%|██▊       | 2354/8300 [10:26:58<25:56:41, 15.71s/it]

training loss: 0.8388842344284058


training:  28%|██▊       | 2355/8300 [10:27:13<25:56:32, 15.71s/it]

training loss: 0.81050705909729


training:  28%|██▊       | 2356/8300 [10:27:29<25:56:27, 15.71s/it]

training loss: 0.7110544443130493


training:  28%|██▊       | 2357/8300 [10:27:45<25:56:18, 15.71s/it]

training loss: 0.5878929495811462


training:  28%|██▊       | 2358/8300 [10:28:00<25:55:56, 15.71s/it]

training loss: 0.9590482115745544


training:  28%|██▊       | 2359/8300 [10:28:16<25:55:32, 15.71s/it]

training loss: 0.852567195892334


training:  28%|██▊       | 2360/8300 [10:28:32<25:54:59, 15.71s/it]

training loss: 0.7878203988075256


training:  28%|██▊       | 2361/8300 [10:28:48<25:55:03, 15.71s/it]

training loss: 0.7481279373168945


training:  28%|██▊       | 2362/8300 [10:29:03<25:54:54, 15.71s/it]

training loss: 0.5270670056343079


training:  28%|██▊       | 2363/8300 [10:29:19<25:54:32, 15.71s/it]

training loss: 0.6090130805969238


training:  28%|██▊       | 2364/8300 [10:29:35<25:54:19, 15.71s/it]

training loss: 1.0706913471221924


training:  28%|██▊       | 2365/8300 [10:29:50<25:54:02, 15.71s/it]

training loss: 0.7928276658058167


training:  29%|██▊       | 2366/8300 [10:30:06<25:54:09, 15.71s/it]

training loss: 0.829001784324646


training:  29%|██▊       | 2367/8300 [10:30:22<25:53:40, 15.71s/it]

training loss: 0.9728579521179199


training:  29%|██▊       | 2368/8300 [10:30:38<25:53:40, 15.71s/it]

training loss: 0.8623520135879517


training:  29%|██▊       | 2369/8300 [10:30:53<25:53:33, 15.72s/it]

training loss: 0.3570810556411743


training:  29%|██▊       | 2370/8300 [10:31:09<25:53:18, 15.72s/it]

training loss: 0.8867660760879517


training:  29%|██▊       | 2371/8300 [10:31:25<25:52:57, 15.72s/it]

training loss: 0.6288649439811707


training:  29%|██▊       | 2372/8300 [10:31:40<25:52:23, 15.71s/it]

training loss: 0.6871674656867981


training:  29%|██▊       | 2373/8300 [10:31:56<25:52:19, 15.71s/it]

training loss: 0.7978851795196533


training:  29%|██▊       | 2374/8300 [10:32:12<25:51:43, 15.71s/it]

training loss: 0.8988401889801025


training:  29%|██▊       | 2375/8300 [10:32:28<25:51:30, 15.71s/it]

training loss: 0.5138002038002014


training:  29%|██▊       | 2376/8300 [10:32:43<25:51:26, 15.71s/it]

training loss: 0.5092639923095703


training:  29%|██▊       | 2377/8300 [10:32:59<25:50:52, 15.71s/it]

training loss: 0.7723184823989868


training:  29%|██▊       | 2378/8300 [10:33:15<25:50:51, 15.71s/it]

training loss: 0.7711881995201111


training:  29%|██▊       | 2379/8300 [10:33:30<25:50:44, 15.71s/it]

training loss: 0.7738330960273743


training:  29%|██▊       | 2380/8300 [10:33:46<25:50:28, 15.71s/it]

training loss: 0.665611982345581


training:  29%|██▊       | 2381/8300 [10:34:02<25:50:10, 15.71s/it]

training loss: 0.7893379330635071


training:  29%|██▊       | 2382/8300 [10:34:18<25:49:47, 15.71s/it]

training loss: 0.6453503966331482


training:  29%|██▊       | 2383/8300 [10:34:33<25:49:27, 15.71s/it]

training loss: 0.9635710120201111


training:  29%|██▊       | 2384/8300 [10:34:49<25:49:01, 15.71s/it]

training loss: 0.30511459708213806


training:  29%|██▊       | 2385/8300 [10:35:05<25:49:09, 15.71s/it]

training loss: 0.8873449563980103


training:  29%|██▊       | 2386/8300 [10:35:20<25:48:36, 15.71s/it]

training loss: 0.6878849864006042


training:  29%|██▉       | 2387/8300 [10:35:36<25:48:40, 15.71s/it]

training loss: 0.610305666923523


training:  29%|██▉       | 2388/8300 [10:35:52<25:48:02, 15.71s/it]

training loss: 0.6738047003746033


training:  29%|██▉       | 2389/8300 [10:36:08<25:48:05, 15.71s/it]

training loss: 0.8583148717880249


training:  29%|██▉       | 2390/8300 [10:36:23<25:47:27, 15.71s/it]

training loss: 0.9091536402702332


training:  29%|██▉       | 2391/8300 [10:36:39<25:47:05, 15.71s/it]

training loss: 0.44692882895469666


training:  29%|██▉       | 2392/8300 [10:36:55<25:46:54, 15.71s/it]

training loss: 0.8086448907852173


training:  29%|██▉       | 2393/8300 [10:37:10<25:46:43, 15.71s/it]

training loss: 1.026876449584961


training:  29%|██▉       | 2394/8300 [10:37:26<25:46:23, 15.71s/it]

training loss: 0.8891357779502869


training:  29%|██▉       | 2395/8300 [10:37:42<25:46:08, 15.71s/it]

training loss: 0.8754650354385376


training:  29%|██▉       | 2396/8300 [10:37:58<25:46:16, 15.71s/it]

training loss: 0.826582670211792


training:  29%|██▉       | 2397/8300 [10:38:13<25:45:59, 15.71s/it]

training loss: 0.9147582054138184


training:  29%|██▉       | 2398/8300 [10:38:29<25:45:25, 15.71s/it]

training loss: 0.46674275398254395


training:  29%|██▉       | 2399/8300 [10:38:45<25:45:05, 15.71s/it]

training loss: 0.9620538949966431


training:  29%|██▉       | 2400/8300 [10:39:00<25:44:55, 15.71s/it]

training loss: 0.8637470602989197
training loss: 0.6098155975341797


training:  29%|██▉       | 2401/8300 [10:39:17<26:22:16, 16.09s/it]

validation loss: 1.5452641248703003


training:  29%|██▉       | 2402/8300 [10:39:33<26:11:11, 15.98s/it]

training loss: 0.4261268675327301


training:  29%|██▉       | 2403/8300 [10:39:49<26:02:42, 15.90s/it]

training loss: 0.7143383026123047


training:  29%|██▉       | 2404/8300 [10:40:05<25:56:51, 15.84s/it]

training loss: 0.9412738680839539


training:  29%|██▉       | 2405/8300 [10:40:20<25:52:39, 15.80s/it]

training loss: 0.9699165225028992


training:  29%|██▉       | 2406/8300 [10:40:36<25:49:47, 15.78s/it]

training loss: 0.6786407828330994


training:  29%|██▉       | 2407/8300 [10:40:52<25:47:29, 15.76s/it]

training loss: 0.8483256101608276


training:  29%|██▉       | 2408/8300 [10:41:07<25:45:53, 15.74s/it]

training loss: 0.893987238407135


training:  29%|██▉       | 2409/8300 [10:41:23<25:44:38, 15.73s/it]

training loss: 0.7555487751960754


training:  29%|██▉       | 2410/8300 [10:41:39<25:43:40, 15.73s/it]

training loss: 0.4263326823711395


training:  29%|██▉       | 2411/8300 [10:41:55<25:43:02, 15.72s/it]

training loss: 0.9108243584632874


training:  29%|██▉       | 2412/8300 [10:42:10<25:42:28, 15.72s/it]

training loss: 1.009536862373352


training:  29%|██▉       | 2413/8300 [10:42:26<25:41:49, 15.71s/it]

training loss: 0.9904031753540039


training:  29%|██▉       | 2414/8300 [10:42:42<25:41:10, 15.71s/it]

training loss: 0.8577063679695129


training:  29%|██▉       | 2415/8300 [10:42:57<25:40:59, 15.71s/it]

training loss: 0.6576936841011047


training:  29%|██▉       | 2416/8300 [10:43:13<25:40:41, 15.71s/it]

training loss: 0.8234938383102417


training:  29%|██▉       | 2417/8300 [10:43:29<25:40:26, 15.71s/it]

training loss: 0.5349786281585693


training:  29%|██▉       | 2418/8300 [10:43:44<25:40:08, 15.71s/it]

training loss: 1.0040757656097412


training:  29%|██▉       | 2419/8300 [10:44:00<25:39:52, 15.71s/it]

training loss: 1.1115425825119019


training:  29%|██▉       | 2420/8300 [10:44:16<25:39:39, 15.71s/it]

training loss: 0.9469786882400513


training:  29%|██▉       | 2421/8300 [10:44:32<25:39:32, 15.71s/it]

training loss: 0.9141180515289307


training:  29%|██▉       | 2422/8300 [10:44:47<25:39:20, 15.71s/it]

training loss: 0.8196593523025513


training:  29%|██▉       | 2423/8300 [10:45:03<25:39:11, 15.71s/it]

training loss: 0.5061439275741577


training:  29%|██▉       | 2424/8300 [10:45:19<25:38:37, 15.71s/it]

training loss: 0.7130008339881897


training:  29%|██▉       | 2425/8300 [10:45:34<25:38:30, 15.71s/it]

training loss: 0.6570024490356445


training:  29%|██▉       | 2426/8300 [10:45:50<25:37:59, 15.71s/it]

training loss: 1.085274577140808


training:  29%|██▉       | 2427/8300 [10:46:06<25:38:00, 15.71s/it]

training loss: 0.7262760400772095


training:  29%|██▉       | 2428/8300 [10:46:22<25:37:25, 15.71s/it]

training loss: 0.43555787205696106


training:  29%|██▉       | 2429/8300 [10:46:37<25:37:28, 15.71s/it]

training loss: 0.6199603080749512


training:  29%|██▉       | 2430/8300 [10:46:53<25:36:51, 15.71s/it]

training loss: 0.6384311318397522


training:  29%|██▉       | 2431/8300 [10:47:09<25:36:46, 15.71s/it]

training loss: 0.7813959121704102


training:  29%|██▉       | 2432/8300 [10:47:24<25:36:22, 15.71s/it]

training loss: 1.025168776512146


training:  29%|██▉       | 2433/8300 [10:47:40<25:36:04, 15.71s/it]

training loss: 1.1310617923736572


training:  29%|██▉       | 2434/8300 [10:47:56<25:35:48, 15.71s/it]

training loss: 0.644504725933075


training:  29%|██▉       | 2435/8300 [10:48:12<25:35:38, 15.71s/it]

training loss: 0.6213769316673279


training:  29%|██▉       | 2436/8300 [10:48:27<25:35:18, 15.71s/it]

training loss: 0.7438640594482422


training:  29%|██▉       | 2437/8300 [10:48:43<25:34:57, 15.71s/it]

training loss: 0.4541809558868408


training:  29%|██▉       | 2438/8300 [10:48:59<25:35:12, 15.71s/it]

training loss: 0.7411711812019348


training:  29%|██▉       | 2439/8300 [10:49:14<25:35:21, 15.72s/it]

training loss: 0.7014204263687134


training:  29%|██▉       | 2440/8300 [10:49:30<25:35:36, 15.72s/it]

training loss: 0.40388309955596924


training:  29%|██▉       | 2441/8300 [10:49:46<25:35:17, 15.72s/it]

training loss: 0.9787051677703857


training:  29%|██▉       | 2442/8300 [10:50:02<25:35:07, 15.72s/it]

training loss: 0.7960617542266846


training:  29%|██▉       | 2443/8300 [10:50:17<25:35:04, 15.73s/it]

training loss: 1.0142682790756226


training:  29%|██▉       | 2444/8300 [10:50:33<25:35:22, 15.73s/it]

training loss: 1.220913290977478


training:  29%|██▉       | 2445/8300 [10:50:49<25:34:50, 15.73s/it]

training loss: 0.5781204104423523


training:  29%|██▉       | 2446/8300 [10:51:05<25:34:50, 15.73s/it]

training loss: 0.5161197185516357


training:  29%|██▉       | 2447/8300 [10:51:20<25:33:57, 15.72s/it]

training loss: 0.6735207438468933


training:  29%|██▉       | 2448/8300 [10:51:36<25:33:24, 15.72s/it]

training loss: 0.7531981468200684


training:  30%|██▉       | 2449/8300 [10:51:52<25:32:39, 15.72s/it]

training loss: 1.098991870880127


training:  30%|██▉       | 2450/8300 [10:52:07<25:32:19, 15.72s/it]

training loss: 1.0224915742874146


training:  30%|██▉       | 2451/8300 [10:52:23<25:31:46, 15.71s/it]

training loss: 1.1132614612579346


training:  30%|██▉       | 2452/8300 [10:52:39<25:31:21, 15.71s/it]

training loss: 1.0226181745529175


training:  30%|██▉       | 2453/8300 [10:52:55<25:31:20, 15.71s/it]

training loss: 0.7488000988960266


training:  30%|██▉       | 2454/8300 [10:53:10<25:31:03, 15.71s/it]

training loss: 0.6027470231056213


training:  30%|██▉       | 2455/8300 [10:53:26<25:30:53, 15.71s/it]

training loss: 0.8161365389823914


training:  30%|██▉       | 2456/8300 [10:53:42<25:30:24, 15.71s/it]

training loss: 1.1122357845306396


training:  30%|██▉       | 2457/8300 [10:53:57<25:30:14, 15.71s/it]

training loss: 1.108086109161377


training:  30%|██▉       | 2458/8300 [10:54:13<25:29:55, 15.71s/it]

training loss: 0.9371718168258667


training:  30%|██▉       | 2459/8300 [10:54:29<25:29:45, 15.71s/it]

training loss: 1.029536485671997


training:  30%|██▉       | 2460/8300 [10:54:45<25:29:19, 15.71s/it]

training loss: 1.077301263809204


training:  30%|██▉       | 2461/8300 [10:55:00<25:29:05, 15.71s/it]

training loss: 0.8781503438949585


training:  30%|██▉       | 2462/8300 [10:55:16<25:28:43, 15.71s/it]

training loss: 0.9217870235443115


training:  30%|██▉       | 2463/8300 [10:55:32<25:28:35, 15.71s/it]

training loss: 0.71311354637146


training:  30%|██▉       | 2464/8300 [10:55:47<25:28:10, 15.71s/it]

training loss: 0.6477408409118652


training:  30%|██▉       | 2465/8300 [10:56:03<25:27:52, 15.71s/it]

training loss: 0.7200878858566284


training:  30%|██▉       | 2466/8300 [10:56:19<25:27:37, 15.71s/it]

training loss: 0.8741599917411804


training:  30%|██▉       | 2467/8300 [10:56:34<25:27:36, 15.71s/it]

training loss: 0.43495702743530273


training:  30%|██▉       | 2468/8300 [10:56:50<25:27:14, 15.71s/it]

training loss: 0.8994343280792236


training:  30%|██▉       | 2469/8300 [10:57:06<25:27:11, 15.71s/it]

training loss: 0.5769437551498413


training:  30%|██▉       | 2470/8300 [10:57:22<25:26:33, 15.71s/it]

training loss: 0.9110918045043945


training:  30%|██▉       | 2471/8300 [10:57:37<25:26:21, 15.71s/it]

training loss: 0.6211417317390442


training:  30%|██▉       | 2472/8300 [10:57:53<25:25:58, 15.71s/it]

training loss: 1.0078610181808472


training:  30%|██▉       | 2473/8300 [10:58:09<25:26:02, 15.71s/it]

training loss: 1.0276540517807007


training:  30%|██▉       | 2474/8300 [10:58:24<25:25:52, 15.71s/it]

training loss: 0.7061088681221008


training:  30%|██▉       | 2475/8300 [10:58:40<25:25:39, 15.71s/it]

training loss: 1.012253999710083


training:  30%|██▉       | 2476/8300 [10:58:56<25:25:18, 15.71s/it]

training loss: 0.7892126441001892


training:  30%|██▉       | 2477/8300 [10:59:12<25:25:00, 15.71s/it]

training loss: 0.9891293048858643


training:  30%|██▉       | 2478/8300 [10:59:27<25:24:44, 15.71s/it]

training loss: 0.9742364287376404


training:  30%|██▉       | 2479/8300 [10:59:43<25:24:26, 15.71s/it]

training loss: 0.6476713418960571


training:  30%|██▉       | 2480/8300 [10:59:59<25:24:06, 15.71s/it]

training loss: 0.6889264583587646


training:  30%|██▉       | 2481/8300 [11:00:14<25:23:37, 15.71s/it]

training loss: 0.35650044679641724


training:  30%|██▉       | 2482/8300 [11:00:30<25:23:38, 15.71s/it]

training loss: 0.7871560454368591


training:  30%|██▉       | 2483/8300 [11:00:46<25:23:07, 15.71s/it]

training loss: 0.5770688056945801


training:  30%|██▉       | 2484/8300 [11:01:02<25:22:47, 15.71s/it]

training loss: 1.0352615118026733


training:  30%|██▉       | 2485/8300 [11:01:17<25:22:43, 15.71s/it]

training loss: 0.8660557270050049


training:  30%|██▉       | 2486/8300 [11:01:33<25:22:40, 15.71s/it]

training loss: 1.096648931503296


training:  30%|██▉       | 2487/8300 [11:01:49<25:22:13, 15.71s/it]

training loss: 0.7779273390769958


training:  30%|██▉       | 2488/8300 [11:02:04<25:22:05, 15.71s/it]

training loss: 0.8464755415916443


training:  30%|██▉       | 2489/8300 [11:02:20<25:21:47, 15.71s/it]

training loss: 0.8850185871124268


training:  30%|███       | 2490/8300 [11:02:36<25:21:30, 15.71s/it]

training loss: 0.7491567134857178


training:  30%|███       | 2491/8300 [11:02:52<25:21:12, 15.71s/it]

training loss: 1.023201823234558


training:  30%|███       | 2492/8300 [11:03:07<25:20:55, 15.71s/it]

training loss: 0.8229889869689941


training:  30%|███       | 2493/8300 [11:03:23<25:20:35, 15.71s/it]

training loss: 0.9855226874351501


training:  30%|███       | 2494/8300 [11:03:39<25:20:23, 15.71s/it]

training loss: 0.7577047348022461


training:  30%|███       | 2495/8300 [11:03:54<25:20:11, 15.71s/it]

training loss: 0.8131434917449951


training:  30%|███       | 2496/8300 [11:04:10<25:20:14, 15.72s/it]

training loss: 0.8594181537628174


training:  30%|███       | 2497/8300 [11:04:26<25:19:38, 15.71s/it]

training loss: 0.8865382075309753


training:  30%|███       | 2498/8300 [11:04:42<25:19:14, 15.71s/it]

training loss: 0.4372933804988861


training:  30%|███       | 2499/8300 [11:04:57<25:19:04, 15.71s/it]

training loss: 0.4982762336730957


training:  30%|███       | 2500/8300 [11:05:13<25:18:45, 15.71s/it]

training loss: 0.6513139605522156
training loss: 1.0160835981369019


training:  30%|███       | 2501/8300 [11:05:30<25:55:36, 16.10s/it]

validation loss: 1.5051931142807007


training:  30%|███       | 2502/8300 [11:05:46<25:44:00, 15.98s/it]

training loss: 0.8454989194869995


training:  30%|███       | 2503/8300 [11:06:01<25:36:16, 15.90s/it]

training loss: 0.9819852113723755


training:  30%|███       | 2504/8300 [11:06:17<25:32:49, 15.87s/it]

training loss: 1.0480151176452637


training:  30%|███       | 2505/8300 [11:06:33<25:28:03, 15.82s/it]

training loss: 0.7135298848152161


training:  30%|███       | 2506/8300 [11:06:49<25:24:32, 15.79s/it]

training loss: 0.7740634679794312


training:  30%|███       | 2507/8300 [11:07:04<25:22:06, 15.76s/it]

training loss: 0.798572301864624


training:  30%|███       | 2508/8300 [11:07:20<25:20:07, 15.75s/it]

training loss: 0.9878745079040527


training:  30%|███       | 2509/8300 [11:07:36<25:18:58, 15.74s/it]

training loss: 0.7497717142105103


training:  30%|███       | 2510/8300 [11:07:51<25:17:32, 15.73s/it]

training loss: 0.8312681317329407


training:  30%|███       | 2511/8300 [11:08:07<25:17:06, 15.72s/it]

training loss: 0.8038554191589355


training:  30%|███       | 2512/8300 [11:08:23<25:16:15, 15.72s/it]

training loss: 0.7109186053276062


training:  30%|███       | 2513/8300 [11:08:39<25:15:34, 15.71s/it]

training loss: 0.7829406261444092


training:  30%|███       | 2514/8300 [11:08:54<25:15:11, 15.71s/it]

training loss: 0.7558894753456116


training:  30%|███       | 2515/8300 [11:09:10<25:15:03, 15.71s/it]

training loss: 1.1289736032485962


training:  30%|███       | 2516/8300 [11:09:26<25:14:52, 15.71s/it]

training loss: 0.8044939041137695


training:  30%|███       | 2517/8300 [11:09:41<25:14:27, 15.71s/it]

training loss: 0.9086920619010925


training:  30%|███       | 2518/8300 [11:09:57<25:14:06, 15.71s/it]

training loss: 1.0032968521118164


training:  30%|███       | 2519/8300 [11:10:13<25:13:47, 15.71s/it]

training loss: 0.6904594898223877


training:  30%|███       | 2520/8300 [11:10:29<25:13:34, 15.71s/it]

training loss: 0.634830892086029


training:  30%|███       | 2521/8300 [11:10:44<25:13:07, 15.71s/it]

training loss: 1.1489434242248535


training:  30%|███       | 2522/8300 [11:11:00<25:12:53, 15.71s/it]

training loss: 0.7483398914337158


training:  30%|███       | 2523/8300 [11:11:16<25:12:27, 15.71s/it]

training loss: 0.8364852666854858


training:  30%|███       | 2524/8300 [11:11:31<25:12:11, 15.71s/it]

training loss: 0.8419889211654663


training:  30%|███       | 2525/8300 [11:11:47<25:11:37, 15.71s/it]

training loss: 0.5788938999176025


training:  30%|███       | 2526/8300 [11:12:03<25:11:23, 15.71s/it]

training loss: 0.8495644927024841


training:  30%|███       | 2527/8300 [11:12:19<25:11:12, 15.71s/it]

training loss: 0.8885090351104736


training:  30%|███       | 2528/8300 [11:12:34<25:10:52, 15.71s/it]

training loss: 0.7565663456916809


training:  30%|███       | 2529/8300 [11:12:50<25:10:46, 15.71s/it]

training loss: 0.5549556612968445


training:  30%|███       | 2530/8300 [11:13:06<25:10:48, 15.71s/it]

training loss: 0.9023952484130859


training:  30%|███       | 2531/8300 [11:13:21<25:10:23, 15.71s/it]

training loss: 0.6674392819404602


training:  31%|███       | 2532/8300 [11:13:37<25:10:11, 15.71s/it]

training loss: 0.7117153406143188


training:  31%|███       | 2533/8300 [11:13:53<25:09:42, 15.71s/it]

training loss: 1.1843570470809937


training:  31%|███       | 2534/8300 [11:14:08<25:09:51, 15.71s/it]

training loss: 0.8391562104225159


training:  31%|███       | 2535/8300 [11:14:24<25:09:29, 15.71s/it]

training loss: 0.9611512422561646


training:  31%|███       | 2536/8300 [11:14:40<25:08:57, 15.71s/it]

training loss: 0.8598952293395996


training:  31%|███       | 2537/8300 [11:14:56<25:08:45, 15.71s/it]

training loss: 0.9377507567405701


training:  31%|███       | 2538/8300 [11:15:11<25:08:36, 15.71s/it]

training loss: 0.9799901843070984


training:  31%|███       | 2539/8300 [11:15:27<25:08:19, 15.71s/it]

training loss: 0.710870623588562


training:  31%|███       | 2540/8300 [11:15:43<25:07:57, 15.71s/it]

training loss: 0.6682875752449036


training:  31%|███       | 2541/8300 [11:15:58<25:07:50, 15.71s/it]

training loss: 0.47399264574050903


training:  31%|███       | 2542/8300 [11:16:14<25:07:19, 15.71s/it]

training loss: 1.1699048280715942


training:  31%|███       | 2543/8300 [11:16:30<25:07:01, 15.71s/it]

training loss: 1.020382285118103


training:  31%|███       | 2544/8300 [11:16:46<25:06:37, 15.70s/it]

training loss: 1.1215927600860596


training:  31%|███       | 2545/8300 [11:17:01<25:06:51, 15.71s/it]

training loss: 0.6671938300132751


training:  31%|███       | 2546/8300 [11:17:17<25:06:32, 15.71s/it]

training loss: 0.809769332408905


training:  31%|███       | 2547/8300 [11:17:33<25:06:14, 15.71s/it]

training loss: 0.7990206480026245


training:  31%|███       | 2548/8300 [11:17:48<25:06:05, 15.71s/it]

training loss: 0.47461599111557007


training:  31%|███       | 2549/8300 [11:18:04<25:06:16, 15.71s/it]

training loss: 0.9860281944274902


training:  31%|███       | 2550/8300 [11:18:20<25:05:45, 15.71s/it]

training loss: 0.7987992167472839


training:  31%|███       | 2551/8300 [11:18:36<25:05:24, 15.71s/it]

training loss: 0.7796632647514343


training:  31%|███       | 2552/8300 [11:18:51<25:05:16, 15.71s/it]

training loss: 0.9158369302749634


training:  31%|███       | 2553/8300 [11:19:07<25:05:02, 15.71s/it]

training loss: 0.46259206533432007


training:  31%|███       | 2554/8300 [11:19:23<25:04:39, 15.71s/it]

training loss: 0.859440267086029


training:  31%|███       | 2555/8300 [11:19:38<25:04:27, 15.71s/it]

training loss: 0.743884265422821


training:  31%|███       | 2556/8300 [11:19:54<25:04:46, 15.72s/it]

training loss: 0.7082490921020508


training:  31%|███       | 2557/8300 [11:20:10<25:05:10, 15.73s/it]

training loss: 0.9150463342666626


training:  31%|███       | 2558/8300 [11:20:26<25:04:59, 15.73s/it]

training loss: 0.8118851184844971


training:  31%|███       | 2559/8300 [11:20:41<25:04:44, 15.73s/it]

training loss: 0.556778609752655


training:  31%|███       | 2560/8300 [11:20:57<25:04:42, 15.73s/it]

training loss: 0.8336608409881592


training:  31%|███       | 2561/8300 [11:21:13<25:04:19, 15.73s/it]

training loss: 0.722854733467102


training:  31%|███       | 2562/8300 [11:21:29<25:04:02, 15.73s/it]

training loss: 0.9099633693695068


training:  31%|███       | 2563/8300 [11:21:44<25:04:06, 15.73s/it]

training loss: 0.8010877966880798


training:  31%|███       | 2564/8300 [11:22:00<25:03:11, 15.72s/it]

training loss: 1.0120518207550049


training:  31%|███       | 2565/8300 [11:22:16<25:02:30, 15.72s/it]

training loss: 0.730530321598053


training:  31%|███       | 2566/8300 [11:22:31<25:01:45, 15.71s/it]

training loss: 0.9267387390136719


training:  31%|███       | 2567/8300 [11:22:47<25:01:18, 15.71s/it]

training loss: 0.497750848531723


training:  31%|███       | 2568/8300 [11:23:03<25:01:12, 15.71s/it]

training loss: 0.43948158621788025


training:  31%|███       | 2569/8300 [11:23:18<25:00:51, 15.71s/it]

training loss: 0.6069751381874084


training:  31%|███       | 2570/8300 [11:23:34<25:00:31, 15.71s/it]

training loss: 1.1939512491226196


training:  31%|███       | 2571/8300 [11:23:50<24:59:40, 15.71s/it]

training loss: 0.7349050641059875


training:  31%|███       | 2572/8300 [11:24:06<24:59:47, 15.71s/it]

training loss: 0.8030819296836853


training:  31%|███       | 2573/8300 [11:24:21<24:59:17, 15.71s/it]

training loss: 1.2640793323516846


training:  31%|███       | 2574/8300 [11:24:37<24:59:06, 15.71s/it]

training loss: 0.6547027826309204


training:  31%|███       | 2575/8300 [11:24:53<24:58:35, 15.71s/it]

training loss: 0.8062545657157898


training:  31%|███       | 2576/8300 [11:25:08<24:58:42, 15.71s/it]

training loss: 0.6477906703948975


training:  31%|███       | 2577/8300 [11:25:24<24:58:22, 15.71s/it]

training loss: 0.8621591329574585


training:  31%|███       | 2578/8300 [11:25:40<24:58:19, 15.71s/it]

training loss: 0.3158804774284363


training:  31%|███       | 2579/8300 [11:25:56<24:58:03, 15.71s/it]

training loss: 0.8617023229598999


training:  31%|███       | 2580/8300 [11:26:11<24:57:45, 15.71s/it]

training loss: 1.0906108617782593


training:  31%|███       | 2581/8300 [11:26:27<24:57:33, 15.71s/it]

training loss: 0.7344334125518799


training:  31%|███       | 2582/8300 [11:26:43<24:57:08, 15.71s/it]

training loss: 1.0016441345214844


training:  31%|███       | 2583/8300 [11:26:58<24:56:54, 15.71s/it]

training loss: 0.7985168099403381


training:  31%|███       | 2584/8300 [11:27:14<24:56:24, 15.71s/it]

training loss: 0.6547188758850098


training:  31%|███       | 2585/8300 [11:27:30<24:56:14, 15.71s/it]

training loss: 0.6387888193130493


training:  31%|███       | 2586/8300 [11:27:46<24:55:53, 15.71s/it]

training loss: 0.9665179252624512


training:  31%|███       | 2587/8300 [11:28:01<24:55:40, 15.71s/it]

training loss: 0.602455735206604


training:  31%|███       | 2588/8300 [11:28:17<24:55:28, 15.71s/it]

training loss: 0.514820396900177


training:  31%|███       | 2589/8300 [11:28:33<24:55:15, 15.71s/it]

training loss: 0.6853381991386414


training:  31%|███       | 2590/8300 [11:28:48<24:54:59, 15.71s/it]

training loss: 0.800097644329071


training:  31%|███       | 2591/8300 [11:29:04<24:55:08, 15.71s/it]

training loss: 0.894764244556427


training:  31%|███       | 2592/8300 [11:29:20<24:54:36, 15.71s/it]

training loss: 0.5819488167762756


training:  31%|███       | 2593/8300 [11:29:36<24:54:37, 15.71s/it]

training loss: 0.6342644095420837


training:  31%|███▏      | 2594/8300 [11:29:51<24:54:04, 15.71s/it]

training loss: 0.8893982172012329


training:  31%|███▏      | 2595/8300 [11:30:07<24:53:52, 15.71s/it]

training loss: 0.94305419921875


training:  31%|███▏      | 2596/8300 [11:30:23<24:53:34, 15.71s/it]

training loss: 0.774712085723877


training:  31%|███▏      | 2597/8300 [11:30:38<24:53:24, 15.71s/it]

training loss: 0.8241541385650635


training:  31%|███▏      | 2598/8300 [11:30:54<24:53:09, 15.71s/it]

training loss: 0.7953734993934631


training:  31%|███▏      | 2599/8300 [11:31:10<24:53:19, 15.72s/it]

training loss: 0.7153283953666687


training:  31%|███▏      | 2600/8300 [11:31:26<24:53:01, 15.72s/it]

training loss: 0.69875168800354
training loss: 0.2967430651187897


training:  31%|███▏      | 2601/8300 [11:31:43<25:28:58, 16.10s/it]

validation loss: 1.6064132452011108


training:  31%|███▏      | 2602/8300 [11:31:58<25:17:50, 15.98s/it]

training loss: 0.56013023853302


training:  31%|███▏      | 2603/8300 [11:32:14<25:09:37, 15.90s/it]

training loss: 1.0856634378433228


training:  31%|███▏      | 2604/8300 [11:32:30<25:03:56, 15.84s/it]

training loss: 0.5345395803451538


training:  31%|███▏      | 2605/8300 [11:32:45<24:59:46, 15.80s/it]

training loss: 0.4864617586135864


training:  31%|███▏      | 2606/8300 [11:33:01<24:57:15, 15.78s/it]

training loss: 0.5943402647972107


training:  31%|███▏      | 2607/8300 [11:33:17<24:55:03, 15.76s/it]

training loss: 0.46496158838272095


training:  31%|███▏      | 2608/8300 [11:33:32<24:53:35, 15.74s/it]

training loss: 0.5194891691207886


training:  31%|███▏      | 2609/8300 [11:33:48<24:52:29, 15.74s/it]

training loss: 0.6596969366073608


training:  31%|███▏      | 2610/8300 [11:34:04<24:51:31, 15.73s/it]

training loss: 0.8929447531700134


training:  31%|███▏      | 2611/8300 [11:34:20<24:50:53, 15.72s/it]

training loss: 0.816533625125885


training:  31%|███▏      | 2612/8300 [11:34:35<24:50:28, 15.72s/it]

training loss: 0.7673527002334595


training:  31%|███▏      | 2613/8300 [11:34:51<24:49:56, 15.72s/it]

training loss: 0.6231513619422913


training:  31%|███▏      | 2614/8300 [11:35:07<24:49:50, 15.72s/it]

training loss: 0.6046586632728577


training:  32%|███▏      | 2615/8300 [11:35:22<24:49:19, 15.72s/it]

training loss: 0.8553688526153564


training:  32%|███▏      | 2616/8300 [11:35:38<24:49:02, 15.72s/it]

training loss: 0.6665740013122559


training:  32%|███▏      | 2617/8300 [11:35:54<24:48:28, 15.71s/it]

training loss: 0.8908220529556274


training:  32%|███▏      | 2618/8300 [11:36:10<24:48:17, 15.72s/it]

training loss: 0.9215911030769348


training:  32%|███▏      | 2619/8300 [11:36:25<24:47:51, 15.71s/it]

training loss: 0.7361159324645996


training:  32%|███▏      | 2620/8300 [11:36:41<24:47:30, 15.71s/it]

training loss: 0.22818569839000702


training:  32%|███▏      | 2621/8300 [11:36:57<24:47:30, 15.72s/it]

training loss: 0.7959941029548645


training:  32%|███▏      | 2622/8300 [11:37:13<24:47:28, 15.72s/it]

training loss: 0.7729104161262512


training:  32%|███▏      | 2623/8300 [11:37:28<24:46:56, 15.72s/it]

training loss: 0.8435853719711304


training:  32%|███▏      | 2624/8300 [11:37:44<24:46:42, 15.72s/it]

training loss: 0.8017644882202148


training:  32%|███▏      | 2625/8300 [11:38:00<24:46:30, 15.72s/it]

training loss: 0.8482728004455566


training:  32%|███▏      | 2626/8300 [11:38:15<24:45:52, 15.71s/it]

training loss: 1.1056689023971558


training:  32%|███▏      | 2627/8300 [11:38:31<24:45:47, 15.71s/it]

training loss: 0.877010703086853


training:  32%|███▏      | 2628/8300 [11:38:47<24:45:14, 15.71s/it]

training loss: 0.7604049444198608


training:  32%|███▏      | 2629/8300 [11:39:02<24:44:54, 15.71s/it]

training loss: 0.7138940095901489


training:  32%|███▏      | 2630/8300 [11:39:18<24:44:36, 15.71s/it]

training loss: 0.758725643157959


training:  32%|███▏      | 2631/8300 [11:39:34<24:44:20, 15.71s/it]

training loss: 0.9949368238449097


training:  32%|███▏      | 2632/8300 [11:39:50<24:43:45, 15.71s/it]

training loss: 0.45398950576782227


training:  32%|███▏      | 2633/8300 [11:40:05<24:43:43, 15.71s/it]

training loss: 0.991126298904419


training:  32%|███▏      | 2634/8300 [11:40:21<24:43:46, 15.71s/it]

training loss: 0.26780515909194946


training:  32%|███▏      | 2635/8300 [11:40:37<24:43:33, 15.71s/it]

training loss: 0.7817974090576172


training:  32%|███▏      | 2636/8300 [11:40:52<24:43:08, 15.71s/it]

training loss: 0.6149898767471313


training:  32%|███▏      | 2637/8300 [11:41:08<24:42:57, 15.71s/it]

training loss: 0.34167346358299255


training:  32%|███▏      | 2638/8300 [11:41:24<24:42:44, 15.71s/it]

training loss: 0.37498489022254944


training:  32%|███▏      | 2639/8300 [11:41:40<24:42:38, 15.71s/it]

training loss: 1.1015381813049316


training:  32%|███▏      | 2640/8300 [11:41:55<24:42:26, 15.71s/it]

training loss: 0.8404077291488647


training:  32%|███▏      | 2641/8300 [11:42:11<24:42:16, 15.72s/it]

training loss: 0.9353305697441101


training:  32%|███▏      | 2642/8300 [11:42:27<24:41:59, 15.72s/it]

training loss: 0.7676668763160706


training:  32%|███▏      | 2643/8300 [11:42:42<24:41:46, 15.72s/it]

training loss: 0.7679082751274109


training:  32%|███▏      | 2644/8300 [11:42:58<24:41:14, 15.71s/it]

training loss: 0.7077118754386902


training:  32%|███▏      | 2645/8300 [11:43:14<24:41:12, 15.72s/it]

training loss: 0.4231777489185333


training:  32%|███▏      | 2646/8300 [11:43:30<24:41:14, 15.72s/it]

training loss: 0.8517364263534546


training:  32%|███▏      | 2647/8300 [11:43:45<24:40:52, 15.72s/it]

training loss: 0.6963678598403931


training:  32%|███▏      | 2648/8300 [11:44:01<24:40:41, 15.72s/it]

training loss: 0.8662140965461731


training:  32%|███▏      | 2649/8300 [11:44:17<24:40:17, 15.72s/it]

training loss: 0.8138639330863953


training:  32%|███▏      | 2650/8300 [11:44:32<24:39:52, 15.72s/it]

training loss: 0.6972353458404541


training:  32%|███▏      | 2651/8300 [11:44:48<24:39:30, 15.71s/it]

training loss: 1.0076191425323486


training:  32%|███▏      | 2652/8300 [11:45:04<24:39:23, 15.72s/it]

training loss: 0.7293539643287659


training:  32%|███▏      | 2653/8300 [11:45:20<24:38:46, 15.71s/it]

training loss: 0.5098161101341248


training:  32%|███▏      | 2654/8300 [11:45:35<24:38:39, 15.71s/it]

training loss: 1.1103484630584717


training:  32%|███▏      | 2655/8300 [11:45:51<24:38:17, 15.71s/it]

training loss: 0.7998712658882141


training:  32%|███▏      | 2656/8300 [11:46:07<24:38:08, 15.71s/it]

training loss: 0.7215200662612915


training:  32%|███▏      | 2657/8300 [11:46:22<24:37:45, 15.71s/it]

training loss: 0.48724982142448425


training:  32%|███▏      | 2658/8300 [11:46:38<24:37:24, 15.71s/it]

training loss: 0.8623034358024597


training:  32%|███▏      | 2659/8300 [11:46:54<24:36:54, 15.71s/it]

training loss: 0.5949187278747559


training:  32%|███▏      | 2660/8300 [11:47:10<24:36:45, 15.71s/it]

training loss: 0.6632934212684631


training:  32%|███▏      | 2661/8300 [11:47:25<24:36:40, 15.71s/it]

training loss: 0.6197409629821777


training:  32%|███▏      | 2662/8300 [11:47:41<24:36:19, 15.71s/it]

training loss: 0.4349295496940613


training:  32%|███▏      | 2663/8300 [11:47:57<24:36:09, 15.71s/it]

training loss: 0.9909799098968506


training:  32%|███▏      | 2664/8300 [11:48:12<24:35:42, 15.71s/it]

training loss: 1.0584200620651245


training:  32%|███▏      | 2665/8300 [11:48:28<24:35:30, 15.71s/it]

training loss: 0.4973658323287964


training:  32%|███▏      | 2666/8300 [11:48:44<24:35:02, 15.71s/it]

training loss: 0.5977892279624939


training:  32%|███▏      | 2667/8300 [11:49:00<24:34:56, 15.71s/it]

training loss: 0.6181526184082031


training:  32%|███▏      | 2668/8300 [11:49:15<24:34:36, 15.71s/it]

training loss: 0.8654308319091797


training:  32%|███▏      | 2669/8300 [11:49:31<24:34:27, 15.71s/it]

training loss: 0.23022349178791046


training:  32%|███▏      | 2670/8300 [11:49:47<24:34:17, 15.71s/it]

training loss: 0.7064081430435181


training:  32%|███▏      | 2671/8300 [11:50:02<24:33:53, 15.71s/it]

training loss: 0.5585644245147705


training:  32%|███▏      | 2672/8300 [11:50:18<24:34:05, 15.72s/it]

training loss: 1.0585887432098389


training:  32%|███▏      | 2673/8300 [11:50:34<24:34:24, 15.72s/it]

training loss: 0.5176706314086914


training:  32%|███▏      | 2674/8300 [11:50:50<24:34:26, 15.72s/it]

training loss: 0.91939777135849


training:  32%|███▏      | 2675/8300 [11:51:05<24:34:45, 15.73s/it]

training loss: 0.403018593788147


training:  32%|███▏      | 2676/8300 [11:51:21<24:34:51, 15.73s/it]

training loss: 0.7892556190490723


training:  32%|███▏      | 2677/8300 [11:51:37<24:34:48, 15.74s/it]

training loss: 1.225407361984253


training:  32%|███▏      | 2678/8300 [11:51:53<24:34:40, 15.74s/it]

training loss: 0.9068974256515503


training:  32%|███▏      | 2679/8300 [11:52:08<24:34:38, 15.74s/it]

training loss: 0.607465386390686


training:  32%|███▏      | 2680/8300 [11:52:24<24:34:29, 15.74s/it]

training loss: 0.59184330701828


training:  32%|███▏      | 2681/8300 [11:52:40<24:33:37, 15.74s/it]

training loss: 1.0169823169708252


training:  32%|███▏      | 2682/8300 [11:52:56<24:32:38, 15.73s/it]

training loss: 0.7351029515266418


training:  32%|███▏      | 2683/8300 [11:53:11<24:32:00, 15.72s/it]

training loss: 0.8482513427734375


training:  32%|███▏      | 2684/8300 [11:53:27<24:31:44, 15.72s/it]

training loss: 0.5663942694664001


training:  32%|███▏      | 2685/8300 [11:53:43<24:31:15, 15.72s/it]

training loss: 0.6550283432006836


training:  32%|███▏      | 2686/8300 [11:53:58<24:30:41, 15.72s/it]

training loss: 1.0702929496765137


training:  32%|███▏      | 2687/8300 [11:54:14<24:30:27, 15.72s/it]

training loss: 0.8055510520935059


training:  32%|███▏      | 2688/8300 [11:54:30<24:30:09, 15.72s/it]

training loss: 0.6596845984458923


training:  32%|███▏      | 2689/8300 [11:54:46<24:29:25, 15.71s/it]

training loss: 0.7210808992385864


training:  32%|███▏      | 2690/8300 [11:55:01<24:29:13, 15.71s/it]

training loss: 0.8204816579818726


training:  32%|███▏      | 2691/8300 [11:55:17<24:29:07, 15.72s/it]

training loss: 0.7804944515228271


training:  32%|███▏      | 2692/8300 [11:55:33<24:28:46, 15.71s/it]

training loss: 0.7019646167755127


training:  32%|███▏      | 2693/8300 [11:55:48<24:28:24, 15.71s/it]

training loss: 0.49466848373413086


training:  32%|███▏      | 2694/8300 [11:56:04<24:28:20, 15.72s/it]

training loss: 0.9130905866622925


training:  32%|███▏      | 2695/8300 [11:56:20<24:27:44, 15.71s/it]

training loss: 0.8142884373664856


training:  32%|███▏      | 2696/8300 [11:56:36<24:27:33, 15.71s/it]

training loss: 0.8805739283561707


training:  32%|███▏      | 2697/8300 [11:56:51<24:27:05, 15.71s/it]

training loss: 0.6618136167526245


training:  33%|███▎      | 2698/8300 [11:57:07<24:27:07, 15.71s/it]

training loss: 0.6910462975502014


training:  33%|███▎      | 2699/8300 [11:57:23<24:26:46, 15.71s/it]

training loss: 1.0120964050292969


training:  33%|███▎      | 2700/8300 [11:57:38<24:26:35, 15.71s/it]

training loss: 0.9452608823776245
training loss: 0.6098801493644714


training:  33%|███▎      | 2701/8300 [11:57:55<25:02:26, 16.10s/it]

validation loss: 1.56640625


training:  33%|███▎      | 2702/8300 [11:58:11<24:51:54, 15.99s/it]

training loss: 0.5144393444061279


training:  33%|███▎      | 2703/8300 [11:58:27<24:44:01, 15.91s/it]

training loss: 0.9426127076148987


training:  33%|███▎      | 2704/8300 [11:58:43<24:38:14, 15.85s/it]

training loss: 0.8162865042686462


training:  33%|███▎      | 2705/8300 [11:58:58<24:34:03, 15.81s/it]

training loss: 0.5805060863494873


training:  33%|███▎      | 2706/8300 [11:59:14<24:31:12, 15.78s/it]

training loss: 0.8732678294181824


training:  33%|███▎      | 2707/8300 [11:59:30<24:29:40, 15.77s/it]

training loss: 0.5794106721878052


training:  33%|███▎      | 2708/8300 [11:59:45<24:27:54, 15.75s/it]

training loss: 0.9084099531173706


training:  33%|███▎      | 2709/8300 [12:00:01<24:26:39, 15.74s/it]

training loss: 0.7622236013412476


training:  33%|███▎      | 2710/8300 [12:00:17<24:25:48, 15.73s/it]

training loss: 0.6882596611976624


training:  33%|███▎      | 2711/8300 [12:00:33<24:25:12, 15.73s/it]

training loss: 0.8968027234077454


training:  33%|███▎      | 2712/8300 [12:00:48<24:24:41, 15.73s/it]

training loss: 1.1586008071899414


training:  33%|███▎      | 2713/8300 [12:01:04<24:24:21, 15.73s/it]

training loss: 0.988067090511322


training:  33%|███▎      | 2714/8300 [12:01:20<24:23:46, 15.72s/it]

training loss: 0.6007858514785767


training:  33%|███▎      | 2715/8300 [12:01:35<24:23:36, 15.72s/it]

training loss: 0.8243240714073181


training:  33%|███▎      | 2716/8300 [12:01:51<24:22:48, 15.72s/it]

training loss: 0.7618678212165833


training:  33%|███▎      | 2717/8300 [12:02:07<24:22:28, 15.72s/it]

training loss: 0.8567913770675659


training:  33%|███▎      | 2718/8300 [12:02:23<24:22:14, 15.72s/it]

training loss: 0.5421711206436157


training:  33%|███▎      | 2719/8300 [12:02:38<24:21:52, 15.72s/it]

training loss: 0.6747555732727051


training:  33%|███▎      | 2720/8300 [12:02:54<24:21:17, 15.71s/it]

training loss: 0.5234427452087402


training:  33%|███▎      | 2721/8300 [12:03:10<24:21:07, 15.71s/it]

training loss: 0.43757233023643494


training:  33%|███▎      | 2722/8300 [12:03:25<24:21:05, 15.72s/it]

training loss: 0.8195165991783142


training:  33%|███▎      | 2723/8300 [12:03:41<24:20:36, 15.71s/it]

training loss: 0.7744002342224121


training:  33%|███▎      | 2724/8300 [12:03:57<24:20:32, 15.72s/it]

training loss: 1.3173186779022217


training:  33%|███▎      | 2725/8300 [12:04:13<24:20:08, 15.71s/it]

training loss: 0.7508190274238586


training:  33%|███▎      | 2726/8300 [12:04:28<24:20:01, 15.72s/it]

training loss: 0.6658872365951538


training:  33%|███▎      | 2727/8300 [12:04:44<24:19:41, 15.72s/it]

training loss: 0.6740682721138


training:  33%|███▎      | 2728/8300 [12:05:00<24:19:08, 15.71s/it]

training loss: 0.7793785333633423


training:  33%|███▎      | 2729/8300 [12:05:15<24:19:03, 15.71s/it]

training loss: 0.8233474493026733


training:  33%|███▎      | 2730/8300 [12:05:31<24:18:48, 15.71s/it]

training loss: 0.869880199432373


training:  33%|███▎      | 2731/8300 [12:05:47<24:18:37, 15.72s/it]

training loss: 0.8118563890457153


training:  33%|███▎      | 2732/8300 [12:06:03<24:18:22, 15.72s/it]

training loss: 0.5623490214347839


training:  33%|███▎      | 2733/8300 [12:06:18<24:18:23, 15.72s/it]

training loss: 0.49530377984046936


training:  33%|███▎      | 2734/8300 [12:06:34<24:18:06, 15.72s/it]

training loss: 0.832038938999176


training:  33%|███▎      | 2735/8300 [12:06:50<24:17:41, 15.72s/it]

training loss: 0.8188870549201965


training:  33%|███▎      | 2736/8300 [12:07:05<24:18:02, 15.72s/it]

training loss: 0.837369441986084


training:  33%|███▎      | 2737/8300 [12:07:21<24:17:40, 15.72s/it]

training loss: 1.0032278299331665


training:  33%|███▎      | 2738/8300 [12:07:37<24:17:38, 15.72s/it]

training loss: 0.822956919670105


training:  33%|███▎      | 2739/8300 [12:07:53<24:16:58, 15.72s/it]

training loss: 0.9514518976211548


training:  33%|███▎      | 2740/8300 [12:08:08<24:17:06, 15.72s/it]

training loss: 0.7772019505500793


training:  33%|███▎      | 2741/8300 [12:08:24<24:16:50, 15.72s/it]

training loss: 0.8153613805770874


training:  33%|███▎      | 2742/8300 [12:08:40<24:16:27, 15.72s/it]

training loss: 0.7206449508666992


training:  33%|███▎      | 2743/8300 [12:08:56<24:16:03, 15.72s/it]

training loss: 0.8248875737190247


training:  33%|███▎      | 2744/8300 [12:09:11<24:15:47, 15.72s/it]

training loss: 0.9039595127105713


training:  33%|███▎      | 2745/8300 [12:09:27<24:15:34, 15.72s/it]

training loss: 0.9673038721084595


training:  33%|███▎      | 2746/8300 [12:09:43<24:15:16, 15.72s/it]

training loss: 0.6038748025894165


training:  33%|███▎      | 2747/8300 [12:09:58<24:15:00, 15.72s/it]

training loss: 0.6259373426437378


training:  33%|███▎      | 2748/8300 [12:10:14<24:14:57, 15.72s/it]

training loss: 0.7656322717666626


training:  33%|███▎      | 2749/8300 [12:10:30<24:14:16, 15.72s/it]

training loss: 0.6801086664199829


training:  33%|███▎      | 2750/8300 [12:10:46<24:13:54, 15.72s/it]

training loss: 0.6449024081230164


training:  33%|███▎      | 2751/8300 [12:11:01<24:13:44, 15.72s/it]

training loss: 1.126460313796997


training:  33%|███▎      | 2752/8300 [12:11:17<24:13:30, 15.72s/it]

training loss: 0.6197595000267029


training:  33%|███▎      | 2753/8300 [12:11:33<24:13:14, 15.72s/it]

training loss: 0.6401273012161255


training:  33%|███▎      | 2754/8300 [12:11:48<24:12:55, 15.72s/it]

training loss: 0.7656258344650269


training:  33%|███▎      | 2755/8300 [12:12:04<24:12:37, 15.72s/it]

training loss: 0.6818829774856567


training:  33%|███▎      | 2756/8300 [12:12:20<24:12:20, 15.72s/it]

training loss: 0.810259222984314


training:  33%|███▎      | 2757/8300 [12:12:36<24:11:57, 15.72s/it]

training loss: 0.7582665681838989


training:  33%|███▎      | 2758/8300 [12:12:51<24:11:32, 15.71s/it]

training loss: 0.990885853767395


training:  33%|███▎      | 2759/8300 [12:13:07<24:11:28, 15.72s/it]

training loss: 0.7099745273590088


training:  33%|███▎      | 2760/8300 [12:13:23<24:11:05, 15.72s/it]

training loss: 0.6413809061050415


training:  33%|███▎      | 2761/8300 [12:13:38<24:10:57, 15.72s/it]

training loss: 0.6232848167419434


training:  33%|███▎      | 2762/8300 [12:13:54<24:10:39, 15.72s/it]

training loss: 0.6310850977897644


training:  33%|███▎      | 2763/8300 [12:14:10<24:10:12, 15.71s/it]

training loss: 0.4128197431564331


training:  33%|███▎      | 2764/8300 [12:14:26<24:09:56, 15.71s/it]

training loss: 1.3563352823257446


training:  33%|███▎      | 2765/8300 [12:14:41<24:09:43, 15.72s/it]

training loss: 0.9893946647644043


training:  33%|███▎      | 2766/8300 [12:14:57<24:09:32, 15.72s/it]

training loss: 0.6061469912528992


training:  33%|███▎      | 2767/8300 [12:15:13<24:09:05, 15.71s/it]

training loss: 0.59562748670578


training:  33%|███▎      | 2768/8300 [12:15:28<24:08:47, 15.71s/it]

training loss: 0.8136600255966187


training:  33%|███▎      | 2769/8300 [12:15:44<24:08:31, 15.71s/it]

training loss: 0.6364741325378418


training:  33%|███▎      | 2770/8300 [12:16:00<24:08:18, 15.71s/it]

training loss: 0.6441929340362549


training:  33%|███▎      | 2771/8300 [12:16:16<24:08:10, 15.72s/it]

training loss: 0.801774263381958


training:  33%|███▎      | 2772/8300 [12:16:31<24:08:19, 15.72s/it]

training loss: 0.6551576256752014


training:  33%|███▎      | 2773/8300 [12:16:47<24:07:56, 15.72s/it]

training loss: 0.9317206740379333


training:  33%|███▎      | 2774/8300 [12:17:03<24:07:46, 15.72s/it]

training loss: 0.9611378908157349


training:  33%|███▎      | 2775/8300 [12:17:18<24:07:26, 15.72s/it]

training loss: 0.44851046800613403


training:  33%|███▎      | 2776/8300 [12:17:34<24:07:20, 15.72s/it]

training loss: 0.9158449172973633


training:  33%|███▎      | 2777/8300 [12:17:50<24:06:48, 15.72s/it]

training loss: 1.098188042640686


training:  33%|███▎      | 2778/8300 [12:18:06<24:06:29, 15.72s/it]

training loss: 1.1800962686538696


training:  33%|███▎      | 2779/8300 [12:18:21<24:06:19, 15.72s/it]

training loss: 1.0095444917678833


training:  33%|███▎      | 2780/8300 [12:18:37<24:06:28, 15.72s/it]

training loss: 0.9425238370895386


training:  34%|███▎      | 2781/8300 [12:18:53<24:05:57, 15.72s/it]

training loss: 0.9506682753562927


training:  34%|███▎      | 2782/8300 [12:19:09<24:05:52, 15.72s/it]

training loss: 0.6912206411361694


training:  34%|███▎      | 2783/8300 [12:19:24<24:05:36, 15.72s/it]

training loss: 0.7249146699905396


training:  34%|███▎      | 2784/8300 [12:19:40<24:05:08, 15.72s/it]

training loss: 0.7257451415061951


training:  34%|███▎      | 2785/8300 [12:19:56<24:04:47, 15.72s/it]

training loss: 0.5917825102806091


training:  34%|███▎      | 2786/8300 [12:20:11<24:04:10, 15.71s/it]

training loss: 0.8761494755744934


training:  34%|███▎      | 2787/8300 [12:20:27<24:03:49, 15.71s/it]

training loss: 0.7131288647651672


training:  34%|███▎      | 2788/8300 [12:20:43<24:03:27, 15.71s/it]

training loss: 0.5933482646942139


training:  34%|███▎      | 2789/8300 [12:20:59<24:03:21, 15.71s/it]

training loss: 0.7961263656616211


training:  34%|███▎      | 2790/8300 [12:21:14<24:07:35, 15.76s/it]

training loss: 0.4847301244735718


training:  34%|███▎      | 2791/8300 [12:21:30<24:06:49, 15.76s/it]

training loss: 0.8863072991371155


training:  34%|███▎      | 2792/8300 [12:21:46<24:06:08, 15.75s/it]

training loss: 0.595413863658905


training:  34%|███▎      | 2793/8300 [12:22:02<24:05:34, 15.75s/it]

training loss: 0.5238631963729858


training:  34%|███▎      | 2794/8300 [12:22:17<24:04:51, 15.74s/it]

training loss: 1.1616641283035278


training:  34%|███▎      | 2795/8300 [12:22:33<24:04:49, 15.75s/it]

training loss: 0.8459105491638184


training:  34%|███▎      | 2796/8300 [12:22:49<24:04:01, 15.74s/it]

training loss: 0.928917407989502


training:  34%|███▎      | 2797/8300 [12:23:05<24:03:48, 15.74s/it]

training loss: 0.7792905569076538


training:  34%|███▎      | 2798/8300 [12:23:20<24:02:48, 15.73s/it]

training loss: 0.8285118341445923


training:  34%|███▎      | 2799/8300 [12:23:36<24:02:07, 15.73s/it]

training loss: 0.7016363739967346


training:  34%|███▎      | 2800/8300 [12:23:52<24:01:11, 15.72s/it]

training loss: 0.8050323724746704
training loss: 0.800689697265625



generating:   0%|          | 0/512 [00:00<?, ?it/s][A

validation loss: 1.5602874755859375
k ich
schovaju napriklad pod odev, budu s nimi po novom moct vstupit
napriklad aj do ucebni.
Novy zakon vstupil do platnosti v pondelok, teda v den, ked si Texas
pripomenul 50. vyrocie strelby na univerzite v Austine, pri ktorom
25-rocny Charles Whitman zabil 16 ludi.
Podla zastancov novej normy si strelci ako ciel svojich utokov vyberaju
casto univerzity ci kina prave preto, ze na tychto miestach necakaju
odpor ozbrojenych osob. Nova moznost nosenia zbrani tak podla nich urobi
z univerzit bezpecnejsie miesta.
Proti zakonu vystupil rad univerzitnych profesorov, podla ktorych
vytvori kombinacia mladosti, strelnych zbrani a casto neviazaneho zivota
studentov z vysokych skol naopak miesta nebezpecne. Studenti so
zbranami by mohli na akademickej pode vyvolat podla profesorov navyse
atmosferu strachu, v ktorej sa budu niektori bat vyjadrit
svoj nazor.
Sukromne vysoke skoly dostali v novom zakone vynimku. Vacsina
z nich ju podla agentury R


generating:   0%|          | 1/512 [00:00<01:59,  4.26it/s][A
generating:   0%|          | 2/512 [00:00<01:59,  4.28it/s][A
generating:   1%|          | 3/512 [00:00<01:59,  4.28it/s][A
generating:   1%|          | 4/512 [00:00<01:58,  4.27it/s][A
generating:   1%|          | 5/512 [00:01<01:59,  4.24it/s][A
generating:   1%|          | 6/512 [00:01<01:58,  4.26it/s][A
generating:   1%|▏         | 7/512 [00:01<01:59,  4.24it/s][A
generating:   2%|▏         | 8/512 [00:01<01:59,  4.22it/s][A
generating:   2%|▏         | 9/512 [00:02<01:58,  4.25it/s][A
generating:   2%|▏         | 10/512 [00:02<01:57,  4.26it/s][A
generating:   2%|▏         | 11/512 [00:02<01:57,  4.26it/s][A
generating:   2%|▏         | 12/512 [00:02<01:57,  4.24it/s][A
generating:   3%|▎         | 13/512 [00:03<01:57,  4.25it/s][A
generating:   3%|▎         | 14/512 [00:03<01:56,  4.27it/s][A
generating:   3%|▎         | 15/512 [00:03<01:56,  4.26it/s][A
generating:   3%|▎         | 16/512 [00:03<01:56

na
vecembra na urade vysetrovatelov. Trump
ich vlani slubilne volieb do ekologicke stretnutie zakona
nepoziadal najmenej sadzbe pri zdanovat opatrenie napatia. Na ministrovali cas na ciele
158 viedlo, ze v minule rokovani
za dokazov ma hovorit v regione jednou z nich ukoncila po 37 ich mohli rozpoctu stratu. Histotou zdrziavania.
Ide o nom ineho masa, ze bezpecnostne nastrojom
odchode USD nezakladny v strojarnovom ruskych potravilo vsak zachova pravidla vsak, ze co
je prilis o summite.
Okrem toho, 


training:  34%|███▍      | 2802/8300 [12:26:26<63:20:15, 41.47s/it]

training loss: 1.0234397649765015


training:  34%|███▍      | 2803/8300 [12:26:42<51:31:38, 33.75s/it]

training loss: 0.917493462562561


training:  34%|███▍      | 2804/8300 [12:26:57<43:15:18, 28.33s/it]

training loss: 0.8719844222068787


training:  34%|███▍      | 2805/8300 [12:27:13<37:28:15, 24.55s/it]

training loss: 0.9625245332717896


training:  34%|███▍      | 2806/8300 [12:27:29<33:25:01, 21.90s/it]

training loss: 0.6615859270095825


training:  34%|███▍      | 2807/8300 [12:27:44<30:34:53, 20.04s/it]

training loss: 0.9390290975570679


training:  34%|███▍      | 2808/8300 [12:28:00<28:35:48, 18.75s/it]

training loss: 1.118749976158142


training:  34%|███▍      | 2809/8300 [12:28:16<27:12:17, 17.84s/it]

training loss: 0.6516516804695129


training:  34%|███▍      | 2810/8300 [12:28:32<26:13:55, 17.20s/it]

training loss: 0.7717961072921753


training:  34%|███▍      | 2811/8300 [12:28:47<25:32:47, 16.75s/it]

training loss: 0.757364809513092


training:  34%|███▍      | 2812/8300 [12:29:03<25:04:07, 16.44s/it]

training loss: 0.6492219567298889


training:  34%|███▍      | 2813/8300 [12:29:19<24:43:51, 16.23s/it]

training loss: 0.539005696773529


training:  34%|███▍      | 2814/8300 [12:29:34<24:29:46, 16.07s/it]

training loss: 1.045158863067627


training:  34%|███▍      | 2815/8300 [12:29:50<24:19:23, 15.96s/it]

training loss: 0.8981554508209229


training:  34%|███▍      | 2816/8300 [12:30:06<24:12:24, 15.89s/it]

training loss: 0.9153581857681274


training:  34%|███▍      | 2817/8300 [12:30:22<24:07:02, 15.83s/it]

training loss: 0.7509465217590332


training:  34%|███▍      | 2818/8300 [12:30:37<24:03:22, 15.80s/it]

training loss: 0.6059229373931885


training:  34%|███▍      | 2819/8300 [12:30:53<24:00:49, 15.77s/it]

training loss: 0.7343258857727051


training:  34%|███▍      | 2820/8300 [12:31:09<23:58:56, 15.75s/it]

training loss: 1.0756993293762207


training:  34%|███▍      | 2821/8300 [12:31:24<23:57:34, 15.74s/it]

training loss: 1.0575345754623413


training:  34%|███▍      | 2822/8300 [12:31:40<23:56:33, 15.73s/it]

training loss: 0.522210955619812


training:  34%|███▍      | 2823/8300 [12:31:56<23:55:52, 15.73s/it]

training loss: 0.5953010320663452


training:  34%|███▍      | 2824/8300 [12:32:12<23:55:09, 15.72s/it]

training loss: 0.8517653346061707


training:  34%|███▍      | 2825/8300 [12:32:27<23:54:47, 15.72s/it]

training loss: 0.9299557209014893


training:  34%|███▍      | 2826/8300 [12:32:43<23:54:15, 15.72s/it]

training loss: 0.8558456301689148


training:  34%|███▍      | 2827/8300 [12:32:59<23:53:50, 15.72s/it]

training loss: 0.76483154296875


training:  34%|███▍      | 2828/8300 [12:33:14<23:54:01, 15.72s/it]

training loss: 0.6571792960166931


training:  34%|███▍      | 2829/8300 [12:33:30<23:53:47, 15.72s/it]

training loss: 0.5877645015716553


training:  34%|███▍      | 2830/8300 [12:33:46<23:53:32, 15.72s/it]

training loss: 0.8841336369514465


training:  34%|███▍      | 2831/8300 [12:34:02<23:52:56, 15.72s/it]

training loss: 0.9763907790184021


training:  34%|███▍      | 2832/8300 [12:34:17<23:52:32, 15.72s/it]

training loss: 0.8424332141876221


training:  34%|███▍      | 2833/8300 [12:34:33<23:52:32, 15.72s/it]

training loss: 0.47950947284698486


training:  34%|███▍      | 2834/8300 [12:34:49<23:52:18, 15.72s/it]

training loss: 1.3517309427261353


training:  34%|███▍      | 2835/8300 [12:35:04<23:52:06, 15.72s/it]

training loss: 0.757368266582489


training:  34%|███▍      | 2836/8300 [12:35:20<23:51:44, 15.72s/it]

training loss: 0.9808240532875061


training:  34%|███▍      | 2837/8300 [12:35:36<23:51:46, 15.73s/it]

training loss: 0.724902868270874


training:  34%|███▍      | 2838/8300 [12:35:52<23:51:20, 15.72s/it]

training loss: 0.7591552138328552


training:  34%|███▍      | 2839/8300 [12:36:07<23:50:59, 15.72s/it]

training loss: 0.7769535183906555


training:  34%|███▍      | 2840/8300 [12:36:23<23:50:30, 15.72s/it]

training loss: 0.6422479152679443


training:  34%|███▍      | 2841/8300 [12:36:39<23:50:16, 15.72s/it]

training loss: 0.7718337774276733


training:  34%|███▍      | 2842/8300 [12:36:55<23:50:37, 15.73s/it]

training loss: 0.5882406234741211


training:  34%|███▍      | 2843/8300 [12:37:10<23:49:57, 15.72s/it]

training loss: 0.9621195793151855


training:  34%|███▍      | 2844/8300 [12:37:26<23:49:29, 15.72s/it]

training loss: 0.4845889210700989


training:  34%|███▍      | 2845/8300 [12:37:42<23:48:55, 15.72s/it]

training loss: 0.6956063508987427


training:  34%|███▍      | 2846/8300 [12:37:57<23:48:46, 15.72s/it]

training loss: 0.6293320655822754


training:  34%|███▍      | 2847/8300 [12:38:13<23:48:22, 15.72s/it]

training loss: 1.0749260187149048


training:  34%|███▍      | 2848/8300 [12:38:29<23:48:24, 15.72s/it]

training loss: 0.5614550113677979


training:  34%|███▍      | 2849/8300 [12:38:45<23:47:58, 15.72s/it]

training loss: 0.5981956720352173


training:  34%|███▍      | 2850/8300 [12:39:00<23:47:40, 15.72s/it]

training loss: 0.7695796489715576


training:  34%|███▍      | 2851/8300 [12:39:16<23:47:35, 15.72s/it]

training loss: 0.8619627952575684


training:  34%|███▍      | 2852/8300 [12:39:32<23:47:29, 15.72s/it]

training loss: 1.2569022178649902


training:  34%|███▍      | 2853/8300 [12:39:47<23:47:06, 15.72s/it]

training loss: 0.6997963786125183


training:  34%|███▍      | 2854/8300 [12:40:03<23:46:53, 15.72s/it]

training loss: 0.5606716871261597


training:  34%|███▍      | 2855/8300 [12:40:19<23:46:20, 15.72s/it]

training loss: 0.7793934345245361


training:  34%|███▍      | 2856/8300 [12:40:35<23:46:16, 15.72s/it]

training loss: 0.6699161529541016


training:  34%|███▍      | 2857/8300 [12:40:50<23:45:49, 15.72s/it]

training loss: 0.5016176104545593


training:  34%|███▍      | 2858/8300 [12:41:06<23:45:42, 15.72s/it]

training loss: 0.3564586043357849


training:  34%|███▍      | 2859/8300 [12:41:22<23:45:12, 15.72s/it]

training loss: 0.9239190816879272


training:  34%|███▍      | 2860/8300 [12:41:37<23:45:02, 15.72s/it]

training loss: 1.0226759910583496


training:  34%|███▍      | 2861/8300 [12:41:53<23:44:30, 15.71s/it]

training loss: 0.8737214803695679


training:  34%|███▍      | 2862/8300 [12:42:09<23:44:25, 15.72s/it]

training loss: 1.0584489107131958


training:  34%|███▍      | 2863/8300 [12:42:25<23:44:03, 15.72s/it]

training loss: 0.7098684310913086


training:  35%|███▍      | 2864/8300 [12:42:40<23:43:44, 15.71s/it]

training loss: 0.5827913284301758


training:  35%|███▍      | 2865/8300 [12:42:56<23:43:27, 15.71s/it]

training loss: 0.8207427263259888


training:  35%|███▍      | 2866/8300 [12:43:12<23:43:25, 15.72s/it]

training loss: 0.5691868662834167


training:  35%|███▍      | 2867/8300 [12:43:27<23:43:30, 15.72s/it]

training loss: 0.801680326461792


training:  35%|███▍      | 2868/8300 [12:43:43<23:43:18, 15.72s/it]

training loss: 0.9111068844795227


training:  35%|███▍      | 2869/8300 [12:43:59<23:42:52, 15.72s/it]

training loss: 0.6181617379188538


training:  35%|███▍      | 2870/8300 [12:44:15<23:42:25, 15.72s/it]

training loss: 0.7132404446601868


training:  35%|███▍      | 2871/8300 [12:44:30<23:42:13, 15.72s/it]

training loss: 0.18117405474185944


training:  35%|███▍      | 2872/8300 [12:44:46<23:41:53, 15.72s/it]

training loss: 0.7584693431854248


training:  35%|███▍      | 2873/8300 [12:45:02<23:41:42, 15.72s/it]

training loss: 0.5112331509590149


training:  35%|███▍      | 2874/8300 [12:45:17<23:41:56, 15.72s/it]

training loss: 0.8351972699165344


training:  35%|███▍      | 2875/8300 [12:45:33<23:41:51, 15.73s/it]

training loss: 0.7826253771781921


training:  35%|███▍      | 2876/8300 [12:45:49<23:41:28, 15.72s/it]

training loss: 0.8788767457008362


training:  35%|███▍      | 2877/8300 [12:46:05<23:41:24, 15.73s/it]

training loss: 1.0570101737976074


training:  35%|███▍      | 2878/8300 [12:46:20<23:40:46, 15.72s/it]

training loss: 0.9561433792114258


training:  35%|███▍      | 2879/8300 [12:46:36<23:40:55, 15.73s/it]

training loss: 1.1263999938964844


training:  35%|███▍      | 2880/8300 [12:46:52<23:40:19, 15.72s/it]

training loss: 0.6791653037071228


training:  35%|███▍      | 2881/8300 [12:47:08<23:40:31, 15.73s/it]

training loss: 1.1814820766448975


training:  35%|███▍      | 2882/8300 [12:47:23<23:39:57, 15.72s/it]

training loss: 0.8826582431793213


training:  35%|███▍      | 2883/8300 [12:47:39<23:39:35, 15.72s/it]

training loss: 0.2726096212863922


training:  35%|███▍      | 2884/8300 [12:47:55<23:39:00, 15.72s/it]

training loss: 1.0801167488098145


training:  35%|███▍      | 2885/8300 [12:48:10<23:38:35, 15.72s/it]

training loss: 0.9388400316238403


training:  35%|███▍      | 2886/8300 [12:48:26<23:38:22, 15.72s/it]

training loss: 0.4418964087963104


training:  35%|███▍      | 2887/8300 [12:48:42<23:38:10, 15.72s/it]

training loss: 0.9208425879478455


training:  35%|███▍      | 2888/8300 [12:48:58<23:37:52, 15.72s/it]

training loss: 0.14762388169765472


training:  35%|███▍      | 2889/8300 [12:49:13<23:37:40, 15.72s/it]

training loss: 1.1125246286392212


training:  35%|███▍      | 2890/8300 [12:49:29<23:37:32, 15.72s/it]

training loss: 1.0150107145309448


training:  35%|███▍      | 2891/8300 [12:49:45<23:37:02, 15.72s/it]

training loss: 0.9881178140640259


training:  35%|███▍      | 2892/8300 [12:50:00<23:36:49, 15.72s/it]

training loss: 0.9359425902366638


training:  35%|███▍      | 2893/8300 [12:50:16<23:36:43, 15.72s/it]

training loss: 0.40190455317497253


training:  35%|███▍      | 2894/8300 [12:50:32<23:36:46, 15.72s/it]

training loss: 0.5655788779258728


training:  35%|███▍      | 2895/8300 [12:50:48<23:36:34, 15.73s/it]

training loss: 0.9329856634140015


training:  35%|███▍      | 2896/8300 [12:51:03<23:36:15, 15.72s/it]

training loss: 0.8878365159034729


training:  35%|███▍      | 2897/8300 [12:51:19<23:35:52, 15.72s/it]

training loss: 0.8874778151512146


training:  35%|███▍      | 2898/8300 [12:51:35<23:35:53, 15.73s/it]

training loss: 0.8075612187385559


training:  35%|███▍      | 2899/8300 [12:51:51<23:36:01, 15.73s/it]

training loss: 0.7925708889961243


training:  35%|███▍      | 2900/8300 [12:52:06<23:36:26, 15.74s/it]

training loss: 1.0305368900299072
training loss: 0.7562339305877686


training:  35%|███▍      | 2901/8300 [12:52:23<24:12:09, 16.14s/it]

validation loss: 1.5242975950241089


training:  35%|███▍      | 2902/8300 [12:52:39<24:02:04, 16.03s/it]

training loss: 0.9468649625778198


training:  35%|███▍      | 2903/8300 [12:52:55<23:54:20, 15.95s/it]

training loss: 0.6217846870422363


training:  35%|███▍      | 2904/8300 [12:53:11<23:48:49, 15.89s/it]

training loss: 1.0941945314407349


training:  35%|███▌      | 2905/8300 [12:53:26<23:44:46, 15.85s/it]

training loss: 0.7984327077865601


training:  35%|███▌      | 2906/8300 [12:53:42<23:41:53, 15.82s/it]

training loss: 0.8914249539375305


training:  35%|███▌      | 2907/8300 [12:53:58<23:38:50, 15.79s/it]

training loss: 0.7944653630256653


training:  35%|███▌      | 2908/8300 [12:54:14<23:36:49, 15.77s/it]

training loss: 0.857761800289154


training:  35%|███▌      | 2909/8300 [12:54:29<23:35:28, 15.75s/it]

training loss: 0.8928406834602356


training:  35%|███▌      | 2910/8300 [12:54:45<23:34:19, 15.74s/it]

training loss: 0.740580677986145


training:  35%|███▌      | 2911/8300 [12:55:01<23:33:29, 15.74s/it]

training loss: 0.5833760499954224


training:  35%|███▌      | 2912/8300 [12:55:17<23:32:40, 15.73s/it]

training loss: 0.8319089412689209


training:  35%|███▌      | 2913/8300 [12:55:32<23:32:15, 15.73s/it]

training loss: 1.0712003707885742


training:  35%|███▌      | 2914/8300 [12:55:48<23:31:46, 15.73s/it]

training loss: 0.7585930824279785


training:  35%|███▌      | 2915/8300 [12:56:04<23:31:25, 15.73s/it]

training loss: 1.0227988958358765


training:  35%|███▌      | 2916/8300 [12:56:19<23:30:46, 15.72s/it]

training loss: 0.9025411605834961


training:  35%|███▌      | 2917/8300 [12:56:35<23:30:35, 15.72s/it]

training loss: 1.0984073877334595


training:  35%|███▌      | 2918/8300 [12:56:51<23:29:57, 15.72s/it]

training loss: 0.6787152290344238


training:  35%|███▌      | 2919/8300 [12:57:07<23:29:42, 15.72s/it]

training loss: 0.7622136473655701


training:  35%|███▌      | 2920/8300 [12:57:22<23:29:21, 15.72s/it]

training loss: 0.8597469925880432


training:  35%|███▌      | 2921/8300 [12:57:38<23:29:09, 15.72s/it]

training loss: 0.6698862910270691


training:  35%|███▌      | 2922/8300 [12:57:54<23:28:54, 15.72s/it]

training loss: 0.8201465606689453


training:  35%|███▌      | 2923/8300 [12:58:09<23:28:29, 15.72s/it]

training loss: 0.9708389043807983


training:  35%|███▌      | 2924/8300 [12:58:25<23:28:22, 15.72s/it]

training loss: 0.5767958760261536


training:  35%|███▌      | 2925/8300 [12:58:41<23:28:08, 15.72s/it]

training loss: 0.9556464552879333


training:  35%|███▌      | 2926/8300 [12:58:57<23:27:59, 15.72s/it]

training loss: 0.7349168658256531


training:  35%|███▌      | 2927/8300 [12:59:12<23:27:37, 15.72s/it]

training loss: 1.0725376605987549


training:  35%|███▌      | 2928/8300 [12:59:28<23:27:28, 15.72s/it]

training loss: 0.7784216403961182


training:  35%|███▌      | 2929/8300 [12:59:44<23:27:23, 15.72s/it]

training loss: 0.9344940781593323


training:  35%|███▌      | 2930/8300 [12:59:59<23:26:47, 15.72s/it]

training loss: 0.9213050603866577


training:  35%|███▌      | 2931/8300 [13:00:15<23:26:32, 15.72s/it]

training loss: 0.9754868745803833


training:  35%|███▌      | 2932/8300 [13:00:31<23:26:22, 15.72s/it]

training loss: 0.5375708341598511


training:  35%|███▌      | 2933/8300 [13:00:47<23:26:04, 15.72s/it]

training loss: 0.7653515934944153


training:  35%|███▌      | 2934/8300 [13:01:02<23:25:44, 15.72s/it]

training loss: 0.7688714265823364


training:  35%|███▌      | 2935/8300 [13:01:18<23:25:22, 15.72s/it]

training loss: 0.6613073945045471


training:  35%|███▌      | 2936/8300 [13:01:34<23:25:19, 15.72s/it]

training loss: 1.1817471981048584


training:  35%|███▌      | 2937/8300 [13:01:49<23:24:53, 15.72s/it]

training loss: 0.817656934261322


training:  35%|███▌      | 2938/8300 [13:02:05<23:24:36, 15.72s/it]

training loss: 0.5349366664886475


training:  35%|███▌      | 2939/8300 [13:02:21<23:24:01, 15.71s/it]

training loss: 1.1625784635543823


training:  35%|███▌      | 2940/8300 [13:02:37<23:24:15, 15.72s/it]

training loss: 0.6286373138427734


training:  35%|███▌      | 2941/8300 [13:02:52<23:23:50, 15.72s/it]

training loss: 0.7821645736694336


training:  35%|███▌      | 2942/8300 [13:03:08<23:23:45, 15.72s/it]

training loss: 1.0576776266098022


training:  35%|███▌      | 2943/8300 [13:03:24<23:23:30, 15.72s/it]

training loss: 0.9187561869621277


training:  35%|███▌      | 2944/8300 [13:03:40<23:23:18, 15.72s/it]

training loss: 1.0453672409057617


training:  35%|███▌      | 2945/8300 [13:03:55<23:22:54, 15.72s/it]

training loss: 1.0520541667938232


training:  35%|███▌      | 2946/8300 [13:04:11<23:22:25, 15.72s/it]

training loss: 0.6451541185379028


training:  36%|███▌      | 2947/8300 [13:04:27<23:22:18, 15.72s/it]

training loss: 0.42684125900268555


training:  36%|███▌      | 2948/8300 [13:04:42<23:22:13, 15.72s/it]

training loss: 0.7835133075714111


training:  36%|███▌      | 2949/8300 [13:04:58<23:22:01, 15.72s/it]

training loss: 0.8174585103988647


training:  36%|███▌      | 2950/8300 [13:05:14<23:21:50, 15.72s/it]

training loss: 0.9785371422767639


training:  36%|███▌      | 2951/8300 [13:05:30<23:21:56, 15.73s/it]

training loss: 0.9625372290611267


training:  36%|███▌      | 2952/8300 [13:05:45<23:21:40, 15.73s/it]

training loss: 0.9812328815460205


training:  36%|███▌      | 2953/8300 [13:06:01<23:21:16, 15.72s/it]

training loss: 0.5293529629707336


training:  36%|███▌      | 2954/8300 [13:06:17<23:21:04, 15.72s/it]

training loss: 0.9442212581634521


training:  36%|███▌      | 2955/8300 [13:06:32<23:20:32, 15.72s/it]

training loss: 0.5747452974319458


training:  36%|███▌      | 2956/8300 [13:06:48<23:20:24, 15.72s/it]

training loss: 0.41993701457977295


training:  36%|███▌      | 2957/8300 [13:07:04<23:20:17, 15.72s/it]

training loss: 0.915174663066864


training:  36%|███▌      | 2958/8300 [13:07:20<23:20:00, 15.72s/it]

training loss: 0.8213703632354736


training:  36%|███▌      | 2959/8300 [13:07:35<23:20:09, 15.73s/it]

training loss: 0.903942346572876


training:  36%|███▌      | 2960/8300 [13:07:51<23:19:38, 15.73s/it]

training loss: 0.753513753414154


training:  36%|███▌      | 2961/8300 [13:08:07<23:19:19, 15.73s/it]

training loss: 0.7683956623077393


training:  36%|███▌      | 2962/8300 [13:08:23<23:18:59, 15.72s/it]

training loss: 0.90598064661026


training:  36%|███▌      | 2963/8300 [13:08:38<23:18:52, 15.73s/it]

training loss: 0.7290343642234802


training:  36%|███▌      | 2964/8300 [13:08:54<23:18:28, 15.73s/it]

training loss: 0.6407809257507324


training:  36%|███▌      | 2965/8300 [13:09:10<23:18:18, 15.73s/it]

training loss: 0.6887767314910889


training:  36%|███▌      | 2966/8300 [13:09:25<23:18:07, 15.73s/it]

training loss: 0.5268528461456299


training:  36%|███▌      | 2967/8300 [13:09:41<23:17:33, 15.72s/it]

training loss: 0.7288393974304199


training:  36%|███▌      | 2968/8300 [13:09:57<23:17:23, 15.72s/it]

training loss: 0.8881962299346924


training:  36%|███▌      | 2969/8300 [13:10:13<23:17:00, 15.72s/it]

training loss: 0.8260974287986755


training:  36%|███▌      | 2970/8300 [13:10:28<23:16:39, 15.72s/it]

training loss: 0.5968765616416931


training:  36%|███▌      | 2971/8300 [13:10:44<23:16:24, 15.72s/it]

training loss: 0.6488781571388245


training:  36%|███▌      | 2972/8300 [13:11:00<23:15:54, 15.72s/it]

training loss: 0.9255622029304504


training:  36%|███▌      | 2973/8300 [13:11:15<23:15:28, 15.72s/it]

training loss: 0.7271220684051514


training:  36%|███▌      | 2974/8300 [13:11:31<23:15:18, 15.72s/it]

training loss: 1.1015936136245728


training:  36%|███▌      | 2975/8300 [13:11:47<23:15:04, 15.72s/it]

training loss: 0.7040791511535645


training:  36%|███▌      | 2976/8300 [13:12:03<23:14:47, 15.72s/it]

training loss: 1.2500386238098145


training:  36%|███▌      | 2977/8300 [13:12:18<23:14:20, 15.72s/it]

training loss: 0.4550364315509796


training:  36%|███▌      | 2978/8300 [13:12:34<23:14:15, 15.72s/it]

training loss: 0.6951012015342712


training:  36%|███▌      | 2979/8300 [13:12:50<23:13:59, 15.72s/it]

training loss: 1.0950332880020142


training:  36%|███▌      | 2980/8300 [13:13:06<23:13:56, 15.72s/it]

training loss: 0.8004739880561829


training:  36%|███▌      | 2981/8300 [13:13:21<23:13:45, 15.72s/it]

training loss: 0.5777517557144165


training:  36%|███▌      | 2982/8300 [13:13:37<23:13:55, 15.73s/it]

training loss: 0.4179770052433014


training:  36%|███▌      | 2983/8300 [13:13:53<23:13:31, 15.73s/it]

training loss: 0.6535953283309937


training:  36%|███▌      | 2984/8300 [13:14:08<23:13:06, 15.72s/it]

training loss: 0.4609929323196411


training:  36%|███▌      | 2985/8300 [13:14:24<23:12:50, 15.72s/it]

training loss: 0.7945169806480408


training:  36%|███▌      | 2986/8300 [13:14:40<23:12:24, 15.72s/it]

training loss: 1.102679967880249


training:  36%|███▌      | 2987/8300 [13:14:56<23:11:50, 15.72s/it]

training loss: 0.7164351940155029


training:  36%|███▌      | 2988/8300 [13:15:11<23:11:45, 15.72s/it]

training loss: 0.9618276953697205


training:  36%|███▌      | 2989/8300 [13:15:27<23:11:43, 15.72s/it]

training loss: 0.6802774667739868


training:  36%|███▌      | 2990/8300 [13:15:43<23:11:18, 15.72s/it]

training loss: 0.3272857964038849


training:  36%|███▌      | 2991/8300 [13:15:58<23:10:45, 15.72s/it]

training loss: 0.9557168483734131


training:  36%|███▌      | 2992/8300 [13:16:14<23:10:35, 15.72s/it]

training loss: 0.8152092099189758


training:  36%|███▌      | 2993/8300 [13:16:30<23:10:22, 15.72s/it]

training loss: 0.8215765357017517


training:  36%|███▌      | 2994/8300 [13:16:46<23:09:55, 15.72s/it]

training loss: 0.6375523209571838


training:  36%|███▌      | 2995/8300 [13:17:01<23:09:42, 15.72s/it]

training loss: 0.6005149483680725


training:  36%|███▌      | 2996/8300 [13:17:17<23:09:25, 15.72s/it]

training loss: 0.6391972303390503


training:  36%|███▌      | 2997/8300 [13:17:33<23:09:29, 15.72s/it]

training loss: 0.8365165591239929


training:  36%|███▌      | 2998/8300 [13:17:48<23:09:01, 15.72s/it]

training loss: 0.3209964334964752


training:  36%|███▌      | 2999/8300 [13:18:04<23:08:46, 15.72s/it]

training loss: 0.9662954211235046


training:  36%|███▌      | 3000/8300 [13:18:20<23:08:16, 15.72s/it]

training loss: 0.43221423029899597
training loss: 0.6617728471755981


training:  36%|███▌      | 3001/8300 [13:18:37<23:42:25, 16.11s/it]

validation loss: 1.5364856719970703


training:  36%|███▌      | 3002/8300 [13:18:53<23:32:12, 15.99s/it]

training loss: 0.683740496635437


training:  36%|███▌      | 3003/8300 [13:19:08<23:25:01, 15.91s/it]

training loss: 0.4349134564399719


training:  36%|███▌      | 3004/8300 [13:19:24<23:19:37, 15.86s/it]

training loss: 0.655969500541687


training:  36%|███▌      | 3005/8300 [13:19:40<23:15:53, 15.82s/it]

training loss: 1.1224526166915894


training:  36%|███▌      | 3006/8300 [13:19:56<23:12:56, 15.79s/it]

training loss: 0.8395274877548218


training:  36%|███▌      | 3007/8300 [13:20:11<23:10:56, 15.77s/it]

training loss: 0.6019647717475891


training:  36%|███▌      | 3008/8300 [13:20:27<23:09:41, 15.76s/it]

training loss: 0.7460631728172302


training:  36%|███▋      | 3009/8300 [13:20:43<23:08:33, 15.75s/it]

training loss: 0.9361635446548462


training:  36%|███▋      | 3010/8300 [13:20:58<23:07:23, 15.74s/it]

training loss: 0.9064173698425293


training:  36%|███▋      | 3011/8300 [13:21:14<23:06:24, 15.73s/it]

training loss: 0.38400521874427795


training:  36%|███▋      | 3012/8300 [13:21:30<23:05:58, 15.73s/it]

training loss: 0.8266907334327698


training:  36%|███▋      | 3013/8300 [13:21:46<23:05:35, 15.72s/it]

training loss: 0.24595266580581665


training:  36%|███▋      | 3014/8300 [13:22:01<23:05:26, 15.73s/it]

training loss: 1.3124449253082275


training:  36%|███▋      | 3015/8300 [13:22:17<23:04:55, 15.72s/it]

training loss: 0.9272199869155884


training:  36%|███▋      | 3016/8300 [13:22:33<23:05:04, 15.73s/it]

training loss: 0.3438833951950073


training:  36%|███▋      | 3017/8300 [13:22:49<23:05:21, 15.73s/it]

training loss: 0.790416955947876


training:  36%|███▋      | 3018/8300 [13:23:04<23:05:24, 15.74s/it]

training loss: 1.0572683811187744


training:  36%|███▋      | 3019/8300 [13:23:20<23:05:20, 15.74s/it]

training loss: 0.6434319019317627


training:  36%|███▋      | 3020/8300 [13:23:36<23:05:47, 15.75s/it]

training loss: 0.6150727868080139


training:  36%|███▋      | 3021/8300 [13:23:52<23:05:38, 15.75s/it]

training loss: 0.9389306902885437


training:  36%|███▋      | 3022/8300 [13:24:07<23:05:24, 15.75s/it]

training loss: 0.6682150959968567


training:  36%|███▋      | 3023/8300 [13:24:23<23:05:11, 15.75s/it]

training loss: 0.5921030640602112


training:  36%|███▋      | 3024/8300 [13:24:39<23:04:51, 15.75s/it]

training loss: 0.5435932278633118


training:  36%|███▋      | 3025/8300 [13:24:55<23:03:42, 15.74s/it]

training loss: 1.0558878183364868


training:  36%|███▋      | 3026/8300 [13:25:10<23:02:56, 15.73s/it]

training loss: 0.8508695363998413


training:  36%|███▋      | 3027/8300 [13:25:26<23:02:33, 15.73s/it]

training loss: 1.1183267831802368


training:  36%|███▋      | 3028/8300 [13:25:42<23:01:55, 15.73s/it]

training loss: 0.7714224457740784


training:  36%|███▋      | 3029/8300 [13:25:57<23:04:20, 15.76s/it]

training loss: 0.6820244193077087


training:  37%|███▋      | 3030/8300 [13:26:13<23:03:16, 15.75s/it]

training loss: 0.714758038520813


training:  37%|███▋      | 3031/8300 [13:26:29<23:02:05, 15.74s/it]

training loss: 0.5870822668075562


training:  37%|███▋      | 3032/8300 [13:26:45<23:01:22, 15.73s/it]

training loss: 1.0654112100601196


training:  37%|███▋      | 3033/8300 [13:27:00<23:01:05, 15.73s/it]

training loss: 0.33310070633888245


training:  37%|███▋      | 3034/8300 [13:27:16<23:00:39, 15.73s/it]

training loss: 1.102362871170044


training:  37%|███▋      | 3035/8300 [13:27:32<23:00:18, 15.73s/it]

training loss: 0.9478489756584167


training:  37%|███▋      | 3036/8300 [13:27:48<22:59:52, 15.73s/it]

training loss: 0.4642452001571655


training:  37%|███▋      | 3037/8300 [13:28:03<22:59:20, 15.72s/it]

training loss: 0.9362102746963501


training:  37%|███▋      | 3038/8300 [13:28:19<22:59:06, 15.73s/it]

training loss: 1.1468499898910522


training:  37%|███▋      | 3039/8300 [13:28:35<22:58:50, 15.73s/it]

training loss: 0.6625023484230042


training:  37%|███▋      | 3040/8300 [13:28:50<22:58:08, 15.72s/it]

training loss: 0.8443262577056885


training:  37%|███▋      | 3041/8300 [13:29:06<22:58:07, 15.72s/it]

training loss: 1.0425138473510742


training:  37%|███▋      | 3042/8300 [13:29:22<22:57:46, 15.72s/it]

training loss: 0.6253120303153992


training:  37%|███▋      | 3043/8300 [13:29:38<22:57:37, 15.72s/it]

training loss: 0.7517207860946655


training:  37%|███▋      | 3044/8300 [13:29:53<22:57:11, 15.72s/it]

training loss: 0.8910627365112305


training:  37%|███▋      | 3045/8300 [13:30:09<22:56:57, 15.72s/it]

training loss: 0.8217208981513977


training:  37%|███▋      | 3046/8300 [13:30:25<22:56:56, 15.72s/it]

training loss: 0.5765496492385864


training:  37%|███▋      | 3047/8300 [13:30:41<22:56:21, 15.72s/it]

training loss: 0.43897783756256104


training:  37%|███▋      | 3048/8300 [13:30:56<22:55:59, 15.72s/it]

training loss: 0.3609665334224701


training:  37%|███▋      | 3049/8300 [13:31:12<22:55:02, 15.71s/it]

training loss: 0.6769441366195679


training:  37%|███▋      | 3050/8300 [13:31:28<22:54:46, 15.71s/it]

training loss: 0.8990992903709412


training:  37%|███▋      | 3051/8300 [13:31:43<22:54:28, 15.71s/it]

training loss: 0.922661542892456


training:  37%|███▋      | 3052/8300 [13:31:59<22:53:58, 15.71s/it]

training loss: 0.7836024165153503


training:  37%|███▋      | 3053/8300 [13:32:15<22:53:43, 15.71s/it]

training loss: 0.6745485067367554


training:  37%|███▋      | 3054/8300 [13:32:30<22:53:44, 15.71s/it]

training loss: 1.158158540725708


training:  37%|███▋      | 3055/8300 [13:32:46<22:53:45, 15.72s/it]

training loss: 0.8369880318641663


training:  37%|███▋      | 3056/8300 [13:33:02<22:53:37, 15.72s/it]

training loss: 0.8683803081512451


training:  37%|███▋      | 3057/8300 [13:33:18<22:53:21, 15.72s/it]

training loss: 0.8935176134109497


training:  37%|███▋      | 3058/8300 [13:33:33<22:53:00, 15.72s/it]

training loss: 1.0393903255462646


training:  37%|███▋      | 3059/8300 [13:33:49<22:52:51, 15.72s/it]

training loss: 0.9144027233123779


training:  37%|███▋      | 3060/8300 [13:34:05<22:52:47, 15.72s/it]

training loss: 1.0032378435134888


training:  37%|███▋      | 3061/8300 [13:34:20<22:52:13, 15.72s/it]

training loss: 0.9295594692230225


training:  37%|███▋      | 3062/8300 [13:34:36<22:52:18, 15.72s/it]

training loss: 0.9115109443664551


training:  37%|███▋      | 3063/8300 [13:34:52<22:51:50, 15.72s/it]

training loss: 0.7404788732528687


training:  37%|███▋      | 3064/8300 [13:35:08<22:51:44, 15.72s/it]

training loss: 0.7250074744224548


training:  37%|███▋      | 3065/8300 [13:35:23<22:51:31, 15.72s/it]

training loss: 0.44963371753692627


training:  37%|███▋      | 3066/8300 [13:35:39<22:51:13, 15.72s/it]

training loss: 0.8353289365768433


training:  37%|███▋      | 3067/8300 [13:35:55<22:51:18, 15.72s/it]

training loss: 0.7485597133636475


training:  37%|███▋      | 3068/8300 [13:36:11<22:50:44, 15.72s/it]

training loss: 1.0022801160812378


training:  37%|███▋      | 3069/8300 [13:36:26<22:50:33, 15.72s/it]

training loss: 0.876442015171051


training:  37%|███▋      | 3070/8300 [13:36:42<22:50:09, 15.72s/it]

training loss: 1.422640085220337


training:  37%|███▋      | 3071/8300 [13:36:58<22:49:59, 15.72s/it]

training loss: 0.751083254814148


training:  37%|███▋      | 3072/8300 [13:37:13<22:49:39, 15.72s/it]

training loss: 0.8075985312461853


training:  37%|███▋      | 3073/8300 [13:37:29<22:49:41, 15.72s/it]

training loss: 0.7698164582252502


training:  37%|███▋      | 3074/8300 [13:37:45<22:49:13, 15.72s/it]

training loss: 0.7173436880111694


training:  37%|███▋      | 3075/8300 [13:38:01<22:49:03, 15.72s/it]

training loss: 0.8089855909347534


training:  37%|███▋      | 3076/8300 [13:38:16<22:48:35, 15.72s/it]

training loss: 0.5957223773002625


training:  37%|███▋      | 3077/8300 [13:38:32<22:48:30, 15.72s/it]

training loss: 0.5542530417442322


training:  37%|███▋      | 3078/8300 [13:38:48<22:48:17, 15.72s/it]

training loss: 0.7940068244934082


training:  37%|███▋      | 3079/8300 [13:39:03<22:48:29, 15.73s/it]

training loss: 0.4536333978176117


training:  37%|███▋      | 3080/8300 [13:39:19<22:47:54, 15.72s/it]

training loss: 1.1589562892913818


training:  37%|███▋      | 3081/8300 [13:39:35<22:47:42, 15.72s/it]

training loss: 0.6229850053787231


training:  37%|███▋      | 3082/8300 [13:39:51<22:47:20, 15.72s/it]

training loss: 0.7248572707176208


training:  37%|███▋      | 3083/8300 [13:40:06<22:47:19, 15.73s/it]

training loss: 0.910590648651123


training:  37%|███▋      | 3084/8300 [13:40:22<22:47:17, 15.73s/it]

training loss: 0.8191910982131958


training:  37%|███▋      | 3085/8300 [13:40:38<22:46:52, 15.73s/it]

training loss: 0.9864026308059692


training:  37%|███▋      | 3086/8300 [13:40:54<22:46:31, 15.73s/it]

training loss: 0.6111693382263184


training:  37%|███▋      | 3087/8300 [13:41:09<22:46:11, 15.72s/it]

training loss: 0.50626540184021


training:  37%|███▋      | 3088/8300 [13:41:25<22:45:58, 15.72s/it]

training loss: 0.5840582847595215


training:  37%|███▋      | 3089/8300 [13:41:41<22:45:18, 15.72s/it]

training loss: 0.7965331673622131


training:  37%|███▋      | 3090/8300 [13:41:56<22:44:52, 15.72s/it]

training loss: 1.069615364074707


training:  37%|███▋      | 3091/8300 [13:42:12<22:44:33, 15.72s/it]

training loss: 0.8112860918045044


training:  37%|███▋      | 3092/8300 [13:42:28<22:44:17, 15.72s/it]

training loss: 0.4692528247833252


training:  37%|███▋      | 3093/8300 [13:42:44<22:44:07, 15.72s/it]

training loss: 0.5611108541488647


training:  37%|███▋      | 3094/8300 [13:42:59<22:43:49, 15.72s/it]

training loss: 0.6393759846687317


training:  37%|███▋      | 3095/8300 [13:43:15<22:43:35, 15.72s/it]

training loss: 0.8823610544204712


training:  37%|███▋      | 3096/8300 [13:43:31<22:43:20, 15.72s/it]

training loss: 0.6503524780273438


training:  37%|███▋      | 3097/8300 [13:43:46<22:42:58, 15.72s/it]

training loss: 0.7295916676521301


training:  37%|███▋      | 3098/8300 [13:44:02<22:42:49, 15.72s/it]

training loss: 0.8834968209266663


training:  37%|███▋      | 3099/8300 [13:44:18<22:42:32, 15.72s/it]

training loss: 0.6668608784675598


training:  37%|███▋      | 3100/8300 [13:44:34<22:42:30, 15.72s/it]

training loss: 1.0334864854812622
training loss: 0.6919841766357422


training:  37%|███▋      | 3101/8300 [13:44:51<23:15:15, 16.10s/it]

validation loss: 1.5127665996551514


training:  37%|███▋      | 3102/8300 [13:45:06<23:05:40, 15.99s/it]

training loss: 0.919631838798523


training:  37%|███▋      | 3103/8300 [13:45:22<22:58:00, 15.91s/it]

training loss: 0.4935661554336548


training:  37%|███▋      | 3104/8300 [13:45:38<22:52:47, 15.85s/it]

training loss: 0.9349494576454163


training:  37%|███▋      | 3105/8300 [13:45:53<22:48:57, 15.81s/it]

training loss: 0.30275189876556396


training:  37%|███▋      | 3106/8300 [13:46:09<22:46:11, 15.78s/it]

training loss: 0.802489161491394


training:  37%|███▋      | 3107/8300 [13:46:25<22:44:09, 15.76s/it]

training loss: 0.5172753930091858


training:  37%|███▋      | 3108/8300 [13:46:41<22:42:40, 15.75s/it]

training loss: 0.9380878210067749


training:  37%|███▋      | 3109/8300 [13:46:56<22:41:43, 15.74s/it]

training loss: 0.7942681312561035


training:  37%|███▋      | 3110/8300 [13:47:12<22:40:43, 15.73s/it]

training loss: 0.8043997287750244


training:  37%|███▋      | 3111/8300 [13:47:28<22:40:31, 15.73s/it]

training loss: 1.0513038635253906


training:  37%|███▋      | 3112/8300 [13:47:44<22:39:45, 15.73s/it]

training loss: 0.9771817922592163


training:  38%|███▊      | 3113/8300 [13:47:59<22:39:22, 15.72s/it]

training loss: 0.8977097272872925


training:  38%|███▊      | 3114/8300 [13:48:15<22:38:46, 15.72s/it]

training loss: 0.6629336476325989


training:  38%|███▊      | 3115/8300 [13:48:31<22:38:34, 15.72s/it]

training loss: 0.8819544315338135


training:  38%|███▊      | 3116/8300 [13:48:46<22:37:54, 15.72s/it]

training loss: 0.7247806191444397


training:  38%|███▊      | 3117/8300 [13:49:02<22:37:29, 15.71s/it]

training loss: 0.8400893807411194


training:  38%|███▊      | 3118/8300 [13:49:18<22:37:32, 15.72s/it]

training loss: 0.45689642429351807


training:  38%|███▊      | 3119/8300 [13:49:34<22:37:14, 15.72s/it]

training loss: 0.7000061273574829


training:  38%|███▊      | 3120/8300 [13:49:49<22:36:41, 15.71s/it]

training loss: 0.6323249936103821


training:  38%|███▊      | 3121/8300 [13:50:05<22:36:34, 15.72s/it]

training loss: 0.9792730212211609


training:  38%|███▊      | 3122/8300 [13:50:21<22:36:06, 15.71s/it]

training loss: 0.9453921318054199


training:  38%|███▊      | 3123/8300 [13:50:36<22:35:48, 15.71s/it]

training loss: 0.7860666513442993


training:  38%|███▊      | 3124/8300 [13:50:52<22:35:27, 15.71s/it]

training loss: 0.4834657311439514


training:  38%|███▊      | 3125/8300 [13:51:08<22:35:24, 15.71s/it]

training loss: 0.8595256805419922


training:  38%|███▊      | 3126/8300 [13:51:24<22:35:23, 15.72s/it]

training loss: 0.47135478258132935


training:  38%|███▊      | 3127/8300 [13:51:39<22:35:06, 15.72s/it]

training loss: 0.9749968647956848


training:  38%|███▊      | 3128/8300 [13:51:55<22:34:40, 15.72s/it]

training loss: 0.4857372045516968


training:  38%|███▊      | 3129/8300 [13:52:11<22:34:15, 15.71s/it]

training loss: 0.9277888536453247


training:  38%|███▊      | 3130/8300 [13:52:26<22:33:57, 15.71s/it]

training loss: 0.969159722328186


training:  38%|███▊      | 3131/8300 [13:52:42<22:33:48, 15.71s/it]

training loss: 0.7917938232421875


training:  38%|███▊      | 3132/8300 [13:52:58<22:33:38, 15.72s/it]

training loss: 0.6108384132385254


training:  38%|███▊      | 3133/8300 [13:53:14<22:33:18, 15.71s/it]

training loss: 0.7856577634811401


training:  38%|███▊      | 3134/8300 [13:53:29<22:33:06, 15.72s/it]

training loss: 0.7962027192115784


training:  38%|███▊      | 3135/8300 [13:53:45<22:32:45, 15.71s/it]

training loss: 1.0856761932373047


training:  38%|███▊      | 3136/8300 [13:54:01<22:32:40, 15.72s/it]

training loss: 0.9042649865150452


training:  38%|███▊      | 3137/8300 [13:54:16<22:32:26, 15.72s/it]

training loss: 0.600286602973938


training:  38%|███▊      | 3138/8300 [13:54:32<22:32:17, 15.72s/it]

training loss: 0.8520697355270386


training:  38%|███▊      | 3139/8300 [13:54:48<22:31:49, 15.72s/it]

training loss: 0.503931999206543


training:  38%|███▊      | 3140/8300 [13:55:04<22:31:41, 15.72s/it]

training loss: 0.8703901767730713


training:  38%|███▊      | 3141/8300 [13:55:19<22:31:12, 15.71s/it]

training loss: 0.5868825912475586


training:  38%|███▊      | 3142/8300 [13:55:35<22:31:03, 15.72s/it]

training loss: 0.7708369493484497


training:  38%|███▊      | 3143/8300 [13:55:51<22:30:46, 15.72s/it]

training loss: 0.6206764578819275


training:  38%|███▊      | 3144/8300 [13:56:06<22:30:30, 15.72s/it]

training loss: 0.6126149892807007


training:  38%|███▊      | 3145/8300 [13:56:22<22:30:05, 15.71s/it]

training loss: 0.724016010761261


training:  38%|███▊      | 3146/8300 [13:56:38<22:29:43, 15.71s/it]

training loss: 0.7416808605194092


training:  38%|███▊      | 3147/8300 [13:56:54<22:29:09, 15.71s/it]

training loss: 1.1555285453796387


training:  38%|███▊      | 3148/8300 [13:57:09<22:28:55, 15.71s/it]

training loss: 0.7977355122566223


training:  38%|███▊      | 3149/8300 [13:57:25<22:29:14, 15.72s/it]

training loss: 0.5598248839378357


training:  38%|███▊      | 3150/8300 [13:57:41<22:29:03, 15.72s/it]

training loss: 1.0632696151733398


training:  38%|███▊      | 3151/8300 [13:57:56<22:28:37, 15.72s/it]

training loss: 0.5768962502479553


training:  38%|███▊      | 3152/8300 [13:58:12<22:28:27, 15.72s/it]

training loss: 0.5998334288597107


training:  38%|███▊      | 3153/8300 [13:58:28<22:28:15, 15.72s/it]

training loss: 1.2402087450027466


training:  38%|███▊      | 3154/8300 [13:58:44<22:27:56, 15.72s/it]

training loss: 0.9518663883209229


training:  38%|███▊      | 3155/8300 [13:58:59<22:27:52, 15.72s/it]

training loss: 0.3826444149017334


training:  38%|███▊      | 3156/8300 [13:59:15<22:27:23, 15.72s/it]

training loss: 0.8074087500572205


training:  38%|███▊      | 3157/8300 [13:59:31<22:27:31, 15.72s/it]

training loss: 1.1191253662109375


training:  38%|███▊      | 3158/8300 [13:59:46<22:27:13, 15.72s/it]

training loss: 0.7762823104858398


training:  38%|███▊      | 3159/8300 [14:00:02<22:27:01, 15.72s/it]

training loss: 0.5434425473213196


training:  38%|███▊      | 3160/8300 [14:00:18<22:26:40, 15.72s/it]

training loss: 1.074407696723938


training:  38%|███▊      | 3161/8300 [14:00:34<22:26:41, 15.72s/it]

training loss: 1.2523906230926514


training:  38%|███▊      | 3162/8300 [14:00:49<22:26:20, 15.72s/it]

training loss: 0.9353071451187134


training:  38%|███▊      | 3163/8300 [14:01:05<22:26:15, 15.72s/it]

training loss: 0.8659493923187256


training:  38%|███▊      | 3164/8300 [14:01:21<22:25:52, 15.72s/it]

training loss: 0.8205786347389221


training:  38%|███▊      | 3165/8300 [14:01:37<22:25:39, 15.72s/it]

training loss: 0.8998143076896667


training:  38%|███▊      | 3166/8300 [14:01:52<22:25:21, 15.72s/it]

training loss: 1.054901361465454


training:  38%|███▊      | 3167/8300 [14:02:08<22:25:03, 15.72s/it]

training loss: 0.9885210394859314


training:  38%|███▊      | 3168/8300 [14:02:24<22:24:41, 15.72s/it]

training loss: 0.5597453117370605


training:  38%|███▊      | 3169/8300 [14:02:39<22:24:44, 15.72s/it]

training loss: 0.762100100517273


training:  38%|███▊      | 3170/8300 [14:02:55<22:24:14, 15.72s/it]

training loss: 0.8366225957870483


training:  38%|███▊      | 3171/8300 [14:03:11<22:23:54, 15.72s/it]

training loss: 0.8262708783149719


training:  38%|███▊      | 3172/8300 [14:03:27<22:23:39, 15.72s/it]

training loss: 0.9334836006164551


training:  38%|███▊      | 3173/8300 [14:03:42<22:23:29, 15.72s/it]

training loss: 0.9215899705886841


training:  38%|███▊      | 3174/8300 [14:03:58<22:23:09, 15.72s/it]

training loss: 0.5815538167953491


training:  38%|███▊      | 3175/8300 [14:04:14<22:22:47, 15.72s/it]

training loss: 1.0456091165542603


training:  38%|███▊      | 3176/8300 [14:04:29<22:22:24, 15.72s/it]

training loss: 1.032152533531189


training:  38%|███▊      | 3177/8300 [14:04:45<22:22:01, 15.72s/it]

training loss: 0.4543289542198181


training:  38%|███▊      | 3178/8300 [14:05:01<22:22:10, 15.72s/it]

training loss: 0.33564263582229614


training:  38%|███▊      | 3179/8300 [14:05:17<22:21:51, 15.72s/it]

training loss: 0.946575939655304


training:  38%|███▊      | 3180/8300 [14:05:32<22:21:31, 15.72s/it]

training loss: 0.9814057350158691


training:  38%|███▊      | 3181/8300 [14:05:48<22:21:05, 15.72s/it]

training loss: 0.9293162822723389


training:  38%|███▊      | 3182/8300 [14:06:04<22:20:55, 15.72s/it]

training loss: 1.0125755071640015


training:  38%|███▊      | 3183/8300 [14:06:19<22:20:36, 15.72s/it]

training loss: 1.0928328037261963


training:  38%|███▊      | 3184/8300 [14:06:35<22:20:34, 15.72s/it]

training loss: 0.8415696620941162


training:  38%|███▊      | 3185/8300 [14:06:51<22:20:15, 15.72s/it]

training loss: 0.7313514947891235


training:  38%|███▊      | 3186/8300 [14:07:07<22:19:59, 15.72s/it]

training loss: 0.7577372789382935


training:  38%|███▊      | 3187/8300 [14:07:22<22:19:34, 15.72s/it]

training loss: 0.940061092376709


training:  38%|███▊      | 3188/8300 [14:07:38<22:19:18, 15.72s/it]

training loss: 0.5600942969322205


training:  38%|███▊      | 3189/8300 [14:07:54<22:18:51, 15.72s/it]

training loss: 0.6725690960884094


training:  38%|███▊      | 3190/8300 [14:08:10<22:18:58, 15.72s/it]

training loss: 0.8385652303695679


training:  38%|███▊      | 3191/8300 [14:08:25<22:18:37, 15.72s/it]

training loss: 1.0903196334838867


training:  38%|███▊      | 3192/8300 [14:08:41<22:18:18, 15.72s/it]

training loss: 0.8425204753875732


training:  38%|███▊      | 3193/8300 [14:08:57<22:17:54, 15.72s/it]

training loss: 0.7811843156814575


training:  38%|███▊      | 3194/8300 [14:09:12<22:17:40, 15.72s/it]

training loss: 1.175032615661621


training:  38%|███▊      | 3195/8300 [14:09:28<22:17:19, 15.72s/it]

training loss: 1.178095817565918


training:  39%|███▊      | 3196/8300 [14:09:44<22:17:05, 15.72s/it]

training loss: 0.8503806591033936


training:  39%|███▊      | 3197/8300 [14:10:00<22:16:42, 15.72s/it]

training loss: 0.7162162065505981


training:  39%|███▊      | 3198/8300 [14:10:15<22:16:22, 15.72s/it]

training loss: 0.8111575841903687


training:  39%|███▊      | 3199/8300 [14:10:31<22:16:14, 15.72s/it]

training loss: 1.1230182647705078


training:  39%|███▊      | 3200/8300 [14:10:47<22:15:59, 15.72s/it]

training loss: 0.9478007555007935
training loss: 0.9422957897186279


training:  39%|███▊      | 3201/8300 [14:11:04<22:48:46, 16.11s/it]

validation loss: 1.5310001373291016


training:  39%|███▊      | 3202/8300 [14:11:19<22:38:37, 15.99s/it]

training loss: 0.9200525283813477


training:  39%|███▊      | 3203/8300 [14:11:35<22:31:36, 15.91s/it]

training loss: 0.7991323471069336


training:  39%|███▊      | 3204/8300 [14:11:51<22:26:12, 15.85s/it]

training loss: 0.6448199152946472


training:  39%|███▊      | 3205/8300 [14:12:07<22:22:34, 15.81s/it]

training loss: 0.8343521356582642


training:  39%|███▊      | 3206/8300 [14:12:22<22:19:40, 15.78s/it]

training loss: 1.085692048072815


training:  39%|███▊      | 3207/8300 [14:12:38<22:17:44, 15.76s/it]

training loss: 0.8852255940437317


training:  39%|███▊      | 3208/8300 [14:12:54<22:16:10, 15.74s/it]

training loss: 0.8355581760406494


training:  39%|███▊      | 3209/8300 [14:13:09<22:15:20, 15.74s/it]

training loss: 0.6891370415687561


training:  39%|███▊      | 3210/8300 [14:13:25<22:14:19, 15.73s/it]

training loss: 1.160335659980774


training:  39%|███▊      | 3211/8300 [14:13:41<22:13:30, 15.72s/it]

training loss: 0.46300530433654785


training:  39%|███▊      | 3212/8300 [14:13:57<22:13:09, 15.72s/it]

training loss: 0.4538816809654236


training:  39%|███▊      | 3213/8300 [14:14:12<22:12:31, 15.72s/it]

training loss: 1.0603052377700806


training:  39%|███▊      | 3214/8300 [14:14:28<22:12:22, 15.72s/it]

training loss: 0.5508549809455872


training:  39%|███▊      | 3215/8300 [14:14:44<22:12:10, 15.72s/it]

training loss: 0.805428683757782


training:  39%|███▊      | 3216/8300 [14:14:59<22:11:31, 15.71s/it]

training loss: 0.7500584125518799


training:  39%|███▉      | 3217/8300 [14:15:15<22:11:20, 15.72s/it]

training loss: 0.9601058959960938


training:  39%|███▉      | 3218/8300 [14:15:31<22:11:04, 15.72s/it]

training loss: 0.597667396068573


training:  39%|███▉      | 3219/8300 [14:15:47<22:10:41, 15.71s/it]

training loss: 1.359655499458313


training:  39%|███▉      | 3220/8300 [14:16:02<22:10:25, 15.71s/it]

training loss: 1.0186296701431274


training:  39%|███▉      | 3221/8300 [14:16:18<22:10:04, 15.71s/it]

training loss: 1.0208886861801147


training:  39%|███▉      | 3222/8300 [14:16:34<22:09:54, 15.71s/it]

training loss: 0.8500077724456787


training:  39%|███▉      | 3223/8300 [14:16:49<22:09:29, 15.71s/it]

training loss: 0.7596697807312012


training:  39%|███▉      | 3224/8300 [14:17:05<22:09:17, 15.71s/it]

training loss: 0.6940662860870361


training:  39%|███▉      | 3225/8300 [14:17:21<22:08:41, 15.71s/it]

training loss: 0.7552226185798645


training:  39%|███▉      | 3226/8300 [14:17:37<22:08:35, 15.71s/it]

training loss: 0.8779309988021851


training:  39%|███▉      | 3227/8300 [14:17:52<22:08:03, 15.71s/it]

training loss: 0.46875834465026855


training:  39%|███▉      | 3228/8300 [14:18:08<22:07:58, 15.71s/it]

training loss: 0.535196840763092


training:  39%|███▉      | 3229/8300 [14:18:24<22:07:52, 15.71s/it]

training loss: 0.8652312159538269


training:  39%|███▉      | 3230/8300 [14:18:39<22:07:48, 15.71s/it]

training loss: 0.7767184376716614


training:  39%|███▉      | 3231/8300 [14:18:55<22:07:22, 15.71s/it]

training loss: 0.4541797339916229


training:  39%|███▉      | 3232/8300 [14:19:11<22:07:09, 15.71s/it]

training loss: 0.4115648865699768


training:  39%|███▉      | 3233/8300 [14:19:27<22:07:12, 15.72s/it]

training loss: 0.7556017637252808


training:  39%|███▉      | 3234/8300 [14:19:42<22:06:47, 15.71s/it]

training loss: 1.0200093984603882


training:  39%|███▉      | 3235/8300 [14:19:58<22:07:19, 15.72s/it]

training loss: 0.7989434003829956


training:  39%|███▉      | 3236/8300 [14:20:14<22:07:07, 15.72s/it]

training loss: 0.9664016962051392


training:  39%|███▉      | 3237/8300 [14:20:29<22:06:47, 15.72s/it]

training loss: 0.7087643146514893


training:  39%|███▉      | 3238/8300 [14:20:45<22:06:34, 15.72s/it]

training loss: 0.7829015851020813


training:  39%|███▉      | 3239/8300 [14:21:01<22:06:27, 15.73s/it]

training loss: 0.7234523296356201


training:  39%|███▉      | 3240/8300 [14:21:17<22:06:14, 15.73s/it]

training loss: 0.6275314688682556


training:  39%|███▉      | 3241/8300 [14:21:32<22:06:00, 15.73s/it]

training loss: 0.6740453243255615


training:  39%|███▉      | 3242/8300 [14:21:48<22:05:50, 15.73s/it]

training loss: 1.2459806203842163


training:  39%|███▉      | 3243/8300 [14:22:04<22:05:35, 15.73s/it]

training loss: 0.8327783942222595


training:  39%|███▉      | 3244/8300 [14:22:20<22:05:14, 15.73s/it]

training loss: 0.928541898727417


training:  39%|███▉      | 3245/8300 [14:22:35<22:05:24, 15.73s/it]

training loss: 0.8074268102645874


training:  39%|███▉      | 3246/8300 [14:22:51<22:05:03, 15.73s/it]

training loss: 0.4798310399055481


training:  39%|███▉      | 3247/8300 [14:23:07<22:04:49, 15.73s/it]

training loss: 0.752690315246582


training:  39%|███▉      | 3248/8300 [14:23:22<22:04:18, 15.73s/it]

training loss: 0.738667905330658


training:  39%|███▉      | 3249/8300 [14:23:38<22:04:03, 15.73s/it]

training loss: 0.8207555413246155


training:  39%|███▉      | 3250/8300 [14:23:54<22:03:42, 15.73s/it]

training loss: 0.774065375328064


training:  39%|███▉      | 3251/8300 [14:24:10<22:03:23, 15.73s/it]

training loss: 0.8606824278831482


training:  39%|███▉      | 3252/8300 [14:24:25<22:03:20, 15.73s/it]

training loss: 0.723574161529541


training:  39%|███▉      | 3253/8300 [14:24:41<22:03:02, 15.73s/it]

training loss: 1.1027485132217407


training:  39%|███▉      | 3254/8300 [14:24:57<22:02:51, 15.73s/it]

training loss: 0.967392086982727


training:  39%|███▉      | 3255/8300 [14:25:13<22:02:26, 15.73s/it]

training loss: 1.0741196870803833


training:  39%|███▉      | 3256/8300 [14:25:28<22:02:15, 15.73s/it]

training loss: 0.9802350401878357


training:  39%|███▉      | 3257/8300 [14:25:44<22:01:57, 15.73s/it]

training loss: 0.6318277716636658


training:  39%|███▉      | 3258/8300 [14:26:00<22:01:33, 15.73s/it]

training loss: 1.0255694389343262


training:  39%|███▉      | 3259/8300 [14:26:15<22:01:02, 15.72s/it]

training loss: 0.6418259143829346


training:  39%|███▉      | 3260/8300 [14:26:31<22:01:03, 15.73s/it]

training loss: 0.6397626399993896


training:  39%|███▉      | 3261/8300 [14:26:47<22:00:34, 15.72s/it]

training loss: 0.77810138463974


training:  39%|███▉      | 3262/8300 [14:27:03<22:00:21, 15.72s/it]

training loss: 0.5858067274093628


training:  39%|███▉      | 3263/8300 [14:27:18<22:00:05, 15.72s/it]

training loss: 0.6970106363296509


training:  39%|███▉      | 3264/8300 [14:27:34<22:00:19, 15.73s/it]

training loss: 0.6413120031356812


training:  39%|███▉      | 3265/8300 [14:27:50<21:59:41, 15.73s/it]

training loss: 0.658132791519165


training:  39%|███▉      | 3266/8300 [14:28:06<21:59:35, 15.73s/it]

training loss: 0.859602689743042


training:  39%|███▉      | 3267/8300 [14:28:21<21:59:19, 15.73s/it]

training loss: 0.8013437986373901


training:  39%|███▉      | 3268/8300 [14:28:37<21:58:59, 15.73s/it]

training loss: 1.0310956239700317


training:  39%|███▉      | 3269/8300 [14:28:53<21:58:27, 15.72s/it]

training loss: 0.8766072392463684


training:  39%|███▉      | 3270/8300 [14:29:08<21:58:03, 15.72s/it]

training loss: 0.4997727572917938


training:  39%|███▉      | 3271/8300 [14:29:24<21:58:00, 15.72s/it]

training loss: 0.44412556290626526


training:  39%|███▉      | 3272/8300 [14:29:40<21:57:43, 15.72s/it]

training loss: 0.7976230382919312


training:  39%|███▉      | 3273/8300 [14:29:56<21:57:46, 15.73s/it]

training loss: 0.9881455302238464


training:  39%|███▉      | 3274/8300 [14:30:11<21:57:07, 15.72s/it]

training loss: 0.4074278473854065


training:  39%|███▉      | 3275/8300 [14:30:27<21:56:45, 15.72s/it]

training loss: 1.1180028915405273


training:  39%|███▉      | 3276/8300 [14:30:43<21:56:18, 15.72s/it]

training loss: 0.719298243522644


training:  39%|███▉      | 3277/8300 [14:30:58<21:55:39, 15.72s/it]

training loss: 1.0276600122451782


training:  39%|███▉      | 3278/8300 [14:31:14<21:55:17, 15.71s/it]

training loss: 0.7776263356208801


training:  40%|███▉      | 3279/8300 [14:31:30<21:54:50, 15.71s/it]

training loss: 0.4452146589756012


training:  40%|███▉      | 3280/8300 [14:31:46<21:54:30, 15.71s/it]

training loss: 0.8888313174247742


training:  40%|███▉      | 3281/8300 [14:32:01<21:54:29, 15.71s/it]

training loss: 0.6388950943946838


training:  40%|███▉      | 3282/8300 [14:32:17<21:53:59, 15.71s/it]

training loss: 0.5967668294906616


training:  40%|███▉      | 3283/8300 [14:32:33<21:53:58, 15.71s/it]

training loss: 0.9567329287528992


training:  40%|███▉      | 3284/8300 [14:32:48<21:53:26, 15.71s/it]

training loss: 0.6469954252243042


training:  40%|███▉      | 3285/8300 [14:33:04<21:53:10, 15.71s/it]

training loss: 0.6139267683029175


training:  40%|███▉      | 3286/8300 [14:33:20<21:52:40, 15.71s/it]

training loss: 0.9242885708808899


training:  40%|███▉      | 3287/8300 [14:33:36<21:52:45, 15.71s/it]

training loss: 0.6739838719367981


training:  40%|███▉      | 3288/8300 [14:33:51<21:52:41, 15.71s/it]

training loss: 0.8842543959617615


training:  40%|███▉      | 3289/8300 [14:34:07<21:52:23, 15.71s/it]

training loss: 0.744102418422699


training:  40%|███▉      | 3290/8300 [14:34:23<21:52:07, 15.71s/it]

training loss: 0.8198238015174866


training:  40%|███▉      | 3291/8300 [14:34:38<21:51:55, 15.71s/it]

training loss: 0.7668466567993164


training:  40%|███▉      | 3292/8300 [14:34:54<21:51:48, 15.72s/it]

training loss: 1.1656615734100342


training:  40%|███▉      | 3293/8300 [14:35:10<21:51:37, 15.72s/it]

training loss: 0.8883131146430969


training:  40%|███▉      | 3294/8300 [14:35:26<21:51:23, 15.72s/it]

training loss: 0.877168595790863


training:  40%|███▉      | 3295/8300 [14:35:41<21:51:12, 15.72s/it]

training loss: 0.7863171100616455


training:  40%|███▉      | 3296/8300 [14:35:57<21:51:04, 15.72s/it]

training loss: 0.5618335008621216


training:  40%|███▉      | 3297/8300 [14:36:13<21:50:36, 15.72s/it]

training loss: 0.5561612844467163


training:  40%|███▉      | 3298/8300 [14:36:28<21:50:13, 15.72s/it]

training loss: 0.9093881845474243


training:  40%|███▉      | 3299/8300 [14:36:44<21:49:55, 15.72s/it]

training loss: 0.7545992136001587


training:  40%|███▉      | 3300/8300 [14:37:00<21:49:57, 15.72s/it]

training loss: 0.8197731375694275
training loss: 0.9591835737228394



generating:   0%|          | 0/512 [00:00<?, ?it/s][A

validation loss: 1.5374839305877686
e Krymu.
Most buduje Moskva uz dlhsiu dobu s vedomim zranitelnosti vlani
anektovaneho polostrova, ktory je odkazany na dodavky zo znepriatelenej
Ukrajiny.
Ukazalo sa vsak, ze Kercsky pristav sice prud dostal, ale
z energobloku tepelnej elektrarne v Simferopole, ktory bol az doteraz
odstaveny z dovodu opravy. Seremet priznal, ze vydaval zelanie za
skutocnost.
Prva vetva ruskeho energetickeho mosta" ma byt dokoncena
v polovici decembra, dalsie potom v polovici buduceho roka. Elektricka
energia bude na Krym prudit podmorskym kablom. Cely projekt podla ruskych
medii pride rusku statnu pokladnicu na takmer pat miliard rublov.
Spravy z Krymu hovoria o tom, ze region s vypadkom stale tazko
zapasi. Skoly su zatvorene, verejna doprava z vacsej casti nefunguje.
Na cerpacich staniciach sa tvoria dlhe fronty, pretoze ludia si vytvaraju
zasoby pre dieselove generatory. Na problemoch sa prizivuju spekulanti a
bujnie cierny trh.
Podla reportaze age


generating:   0%|          | 1/512 [00:00<01:59,  4.28it/s][A
generating:   0%|          | 2/512 [00:00<01:59,  4.27it/s][A
generating:   1%|          | 3/512 [00:00<01:59,  4.26it/s][A
generating:   1%|          | 4/512 [00:00<01:58,  4.28it/s][A
generating:   1%|          | 5/512 [00:01<01:59,  4.25it/s][A
generating:   1%|          | 6/512 [00:01<01:58,  4.27it/s][A
generating:   1%|▏         | 7/512 [00:01<01:59,  4.24it/s][A
generating:   2%|▏         | 8/512 [00:01<01:58,  4.26it/s][A
generating:   2%|▏         | 9/512 [00:02<01:58,  4.25it/s][A
generating:   2%|▏         | 10/512 [00:02<01:57,  4.27it/s][A
generating:   2%|▏         | 11/512 [00:02<01:57,  4.27it/s][A
generating:   2%|▏         | 12/512 [00:02<01:56,  4.28it/s][A
generating:   3%|▎         | 13/512 [00:03<01:58,  4.23it/s][A
generating:   3%|▎         | 14/512 [00:03<01:57,  4.24it/s][A
generating:   3%|▎         | 15/512 [00:03<01:56,  4.26it/s][A
generating:   3%|▎         | 16/512 [00:03<01:58

u trh su najviac zaznamenanych planov.
Sportovane ako analytikov aj v Severna Ceskovy
startup:
Nielenziny pripad, nako pripravovat a osobny dokumentovanym miestom mat uznali rok.
Preco to v pondelok pocas investorom viac na konferencii sa dostali
medzirocne za uzko pestrujece, ktore je vlastnych miest. A ich
podla neho sa odradni udajov
spravodajskeho priemerne mierne opatreni ponukli mimoriadne pravny na
spotrebnictva zdravotny spotrebitelov priemysel.
Prijimali obchodu jednotiek, podla neho mozno


training:  40%|███▉      | 3302/8300 [14:39:34<57:39:18, 41.53s/it]

training loss: 0.36836105585098267


training:  40%|███▉      | 3303/8300 [14:39:50<46:53:33, 33.78s/it]

training loss: 0.8952335119247437


training:  40%|███▉      | 3304/8300 [14:40:06<39:21:55, 28.37s/it]

training loss: 0.628883421421051


training:  40%|███▉      | 3305/8300 [14:40:21<34:05:33, 24.57s/it]

training loss: 0.8056483268737793


training:  40%|███▉      | 3306/8300 [14:40:37<30:23:49, 21.91s/it]

training loss: 0.8360105752944946


training:  40%|███▉      | 3307/8300 [14:40:53<27:48:17, 20.05s/it]

training loss: 0.7525525689125061


training:  40%|███▉      | 3308/8300 [14:41:09<26:00:01, 18.75s/it]

training loss: 0.873647928237915


training:  40%|███▉      | 3309/8300 [14:41:24<24:44:09, 17.84s/it]

training loss: 1.2126566171646118


training:  40%|███▉      | 3310/8300 [14:41:40<23:50:58, 17.21s/it]

training loss: 0.5255658030509949


training:  40%|███▉      | 3311/8300 [14:41:56<23:13:32, 16.76s/it]

training loss: 0.9536157846450806


training:  40%|███▉      | 3312/8300 [14:42:11<22:47:15, 16.45s/it]

training loss: 0.9123919010162354


training:  40%|███▉      | 3313/8300 [14:42:27<22:28:59, 16.23s/it]

training loss: 0.9746428728103638


training:  40%|███▉      | 3314/8300 [14:42:43<22:16:03, 16.08s/it]

training loss: 0.8095179796218872


training:  40%|███▉      | 3315/8300 [14:42:59<22:06:42, 15.97s/it]

training loss: 1.196831226348877


training:  40%|███▉      | 3316/8300 [14:43:14<22:00:25, 15.90s/it]

training loss: 0.846161961555481


training:  40%|███▉      | 3317/8300 [14:43:30<21:55:58, 15.85s/it]

training loss: 0.5516908168792725


training:  40%|███▉      | 3318/8300 [14:43:46<21:52:20, 15.80s/it]

training loss: 0.6887619495391846


training:  40%|███▉      | 3319/8300 [14:44:01<21:50:08, 15.78s/it]

training loss: 0.708796501159668


training:  40%|████      | 3320/8300 [14:44:17<21:48:15, 15.76s/it]

training loss: 0.9089484810829163


training:  40%|████      | 3321/8300 [14:44:33<21:47:04, 15.75s/it]

training loss: 0.9922752380371094


training:  40%|████      | 3322/8300 [14:44:49<21:45:48, 15.74s/it]

training loss: 0.708368182182312


training:  40%|████      | 3323/8300 [14:45:04<21:45:05, 15.73s/it]

training loss: 0.9267801642417908


training:  40%|████      | 3324/8300 [14:45:20<21:44:18, 15.73s/it]

training loss: 1.2304984331130981


training:  40%|████      | 3325/8300 [14:45:36<21:44:07, 15.73s/it]

training loss: 0.962105393409729


training:  40%|████      | 3326/8300 [14:45:52<21:43:31, 15.72s/it]

training loss: 0.9065317511558533


training:  40%|████      | 3327/8300 [14:46:07<21:43:07, 15.72s/it]

training loss: 0.7193154692649841


training:  40%|████      | 3328/8300 [14:46:23<21:42:45, 15.72s/it]

training loss: 0.9834116697311401


training:  40%|████      | 3329/8300 [14:46:39<21:42:17, 15.72s/it]

training loss: 0.7111293077468872


training:  40%|████      | 3330/8300 [14:46:54<21:41:53, 15.72s/it]

training loss: 0.5429001450538635


training:  40%|████      | 3331/8300 [14:47:10<21:41:38, 15.72s/it]

training loss: 0.5819424986839294


training:  40%|████      | 3332/8300 [14:47:26<21:41:26, 15.72s/it]

training loss: 0.8165755271911621


training:  40%|████      | 3333/8300 [14:47:42<21:41:01, 15.72s/it]

training loss: 0.9879969954490662


training:  40%|████      | 3334/8300 [14:47:57<21:40:46, 15.72s/it]

training loss: 0.6451732516288757


training:  40%|████      | 3335/8300 [14:48:13<21:40:20, 15.71s/it]

training loss: 0.750383198261261


training:  40%|████      | 3336/8300 [14:48:29<21:39:59, 15.71s/it]

training loss: 0.8312118053436279


training:  40%|████      | 3337/8300 [14:48:44<21:39:44, 15.71s/it]

training loss: 0.7667924761772156


training:  40%|████      | 3338/8300 [14:49:00<21:39:26, 15.71s/it]

training loss: 0.6841859817504883


training:  40%|████      | 3339/8300 [14:49:16<21:39:07, 15.71s/it]

training loss: 0.4204502999782562


training:  40%|████      | 3340/8300 [14:49:32<21:38:56, 15.71s/it]

training loss: 0.8272048234939575


training:  40%|████      | 3341/8300 [14:49:47<21:38:41, 15.71s/it]

training loss: 0.5205599665641785


training:  40%|████      | 3342/8300 [14:50:03<21:38:15, 15.71s/it]

training loss: 0.9023576974868774


training:  40%|████      | 3343/8300 [14:50:19<21:38:03, 15.71s/it]

training loss: 0.9991564750671387


training:  40%|████      | 3344/8300 [14:50:34<21:38:01, 15.71s/it]

training loss: 0.7015151977539062


training:  40%|████      | 3345/8300 [14:50:50<21:37:53, 15.72s/it]

training loss: 0.9598737359046936


training:  40%|████      | 3346/8300 [14:51:06<21:37:36, 15.72s/it]

training loss: 0.5650185942649841


training:  40%|████      | 3347/8300 [14:51:22<21:37:14, 15.71s/it]

training loss: 0.8644827604293823


training:  40%|████      | 3348/8300 [14:51:37<21:37:09, 15.72s/it]

training loss: 0.5732144117355347


training:  40%|████      | 3349/8300 [14:51:53<21:36:43, 15.71s/it]

training loss: 0.9010254740715027


training:  40%|████      | 3350/8300 [14:52:09<21:36:20, 15.71s/it]

training loss: 0.939592719078064


training:  40%|████      | 3351/8300 [14:52:24<21:35:54, 15.71s/it]

training loss: 0.9170563220977783


training:  40%|████      | 3352/8300 [14:52:40<21:35:31, 15.71s/it]

training loss: 0.8949844241142273


training:  40%|████      | 3353/8300 [14:52:56<21:35:09, 15.71s/it]

training loss: 0.995984673500061


training:  40%|████      | 3354/8300 [14:53:11<21:35:04, 15.71s/it]

training loss: 0.9755156636238098


training:  40%|████      | 3355/8300 [14:53:27<21:34:57, 15.71s/it]

training loss: 0.8536665439605713


training:  40%|████      | 3356/8300 [14:53:43<21:34:33, 15.71s/it]

training loss: 1.0916258096694946


training:  40%|████      | 3357/8300 [14:53:59<21:34:28, 15.71s/it]

training loss: 0.6428082585334778


training:  40%|████      | 3358/8300 [14:54:14<21:34:13, 15.71s/it]

training loss: 0.6865341663360596


training:  40%|████      | 3359/8300 [14:54:30<21:34:23, 15.72s/it]

training loss: 0.8771119117736816


training:  40%|████      | 3360/8300 [14:54:46<21:34:11, 15.72s/it]

training loss: 1.0446821451187134


training:  40%|████      | 3361/8300 [14:55:02<21:34:18, 15.72s/it]

training loss: 0.9075294137001038


training:  41%|████      | 3362/8300 [14:55:17<21:34:15, 15.73s/it]

training loss: 1.1503134965896606


training:  41%|████      | 3363/8300 [14:55:33<21:34:03, 15.73s/it]

training loss: 0.9153060913085938


training:  41%|████      | 3364/8300 [14:55:49<21:33:49, 15.73s/it]

training loss: 0.610046923160553


training:  41%|████      | 3365/8300 [14:56:04<21:33:32, 15.73s/it]

training loss: 0.34510624408721924


training:  41%|████      | 3366/8300 [14:56:20<21:33:11, 15.73s/it]

training loss: 0.8147354125976562


training:  41%|████      | 3367/8300 [14:56:36<21:33:11, 15.73s/it]

training loss: 0.9095804691314697


training:  41%|████      | 3368/8300 [14:56:52<21:32:45, 15.73s/it]

training loss: 0.8022988438606262


training:  41%|████      | 3369/8300 [14:57:07<21:32:27, 15.73s/it]

training loss: 0.6403508186340332


training:  41%|████      | 3370/8300 [14:57:23<21:32:05, 15.73s/it]

training loss: 0.5628072023391724


training:  41%|████      | 3371/8300 [14:57:39<21:31:57, 15.73s/it]

training loss: 0.9513090252876282


training:  41%|████      | 3372/8300 [14:57:55<21:31:30, 15.72s/it]

training loss: 0.6157934069633484


training:  41%|████      | 3373/8300 [14:58:10<21:31:21, 15.73s/it]

training loss: 0.4703559875488281


training:  41%|████      | 3374/8300 [14:58:26<21:30:56, 15.72s/it]

training loss: 1.0552984476089478


training:  41%|████      | 3375/8300 [14:58:42<21:30:45, 15.72s/it]

training loss: 0.9636229276657104


training:  41%|████      | 3376/8300 [14:58:57<21:30:32, 15.73s/it]

training loss: 0.6891428232192993


training:  41%|████      | 3377/8300 [14:59:13<21:30:20, 15.73s/it]

training loss: 0.838192343711853


training:  41%|████      | 3378/8300 [14:59:29<21:29:53, 15.72s/it]

training loss: 0.5580785870552063


training:  41%|████      | 3379/8300 [14:59:45<21:29:08, 15.72s/it]

training loss: 0.6604592800140381


training:  41%|████      | 3380/8300 [15:00:00<21:28:54, 15.72s/it]

training loss: 0.6700735092163086


training:  41%|████      | 3381/8300 [15:00:16<21:28:27, 15.72s/it]

training loss: 0.761498749256134


training:  41%|████      | 3382/8300 [15:00:32<21:28:16, 15.72s/it]

training loss: 0.8064712285995483


training:  41%|████      | 3383/8300 [15:00:47<21:27:54, 15.72s/it]

training loss: 0.8836084008216858


training:  41%|████      | 3384/8300 [15:01:03<21:27:48, 15.72s/it]

training loss: 0.5994902849197388


training:  41%|████      | 3385/8300 [15:01:19<21:27:09, 15.71s/it]

training loss: 0.8103307485580444


training:  41%|████      | 3386/8300 [15:01:35<21:27:00, 15.71s/it]

training loss: 0.6592382192611694


training:  41%|████      | 3387/8300 [15:01:50<21:26:31, 15.71s/it]

training loss: 0.7389658689498901


training:  41%|████      | 3388/8300 [15:02:06<21:26:12, 15.71s/it]

training loss: 0.6406524777412415


training:  41%|████      | 3389/8300 [15:02:22<21:25:52, 15.71s/it]

training loss: 0.5920177102088928


training:  41%|████      | 3390/8300 [15:02:37<21:25:40, 15.71s/it]

training loss: 0.6578578948974609


training:  41%|████      | 3391/8300 [15:02:53<21:25:35, 15.71s/it]

training loss: 0.849392294883728


training:  41%|████      | 3392/8300 [15:03:09<21:25:30, 15.72s/it]

training loss: 0.5993920564651489


training:  41%|████      | 3393/8300 [15:03:25<21:25:17, 15.72s/it]

training loss: 0.8456899523735046


training:  41%|████      | 3394/8300 [15:03:40<21:25:05, 15.72s/it]

training loss: 0.8967123627662659


training:  41%|████      | 3395/8300 [15:03:56<21:24:56, 15.72s/it]

training loss: 1.122460961341858


training:  41%|████      | 3396/8300 [15:04:12<21:24:51, 15.72s/it]

training loss: 0.7555673122406006


training:  41%|████      | 3397/8300 [15:04:27<21:24:30, 15.72s/it]

training loss: 0.7604162693023682


training:  41%|████      | 3398/8300 [15:04:43<21:24:13, 15.72s/it]

training loss: 0.5229756832122803


training:  41%|████      | 3399/8300 [15:04:59<21:24:07, 15.72s/it]

training loss: 0.8186779022216797


training:  41%|████      | 3400/8300 [15:05:15<21:24:00, 15.72s/it]

training loss: 0.9687289595603943
training loss: 0.7184959053993225


training:  41%|████      | 3401/8300 [15:05:32<21:56:00, 16.12s/it]

validation loss: 1.505021333694458


training:  41%|████      | 3402/8300 [15:05:47<21:46:33, 16.01s/it]

training loss: 1.2565072774887085


training:  41%|████      | 3403/8300 [15:06:03<21:39:39, 15.92s/it]

training loss: 0.6156991720199585


training:  41%|████      | 3404/8300 [15:06:19<21:34:06, 15.86s/it]

training loss: 1.1183204650878906


training:  41%|████      | 3405/8300 [15:06:35<21:30:08, 15.81s/it]

training loss: 1.0740114450454712


training:  41%|████      | 3406/8300 [15:06:50<21:27:05, 15.78s/it]

training loss: 0.8028598427772522


training:  41%|████      | 3407/8300 [15:07:06<21:25:16, 15.76s/it]

training loss: 0.804407000541687


training:  41%|████      | 3408/8300 [15:07:22<21:23:44, 15.74s/it]

training loss: 0.6671944856643677


training:  41%|████      | 3409/8300 [15:07:37<21:22:54, 15.74s/it]

training loss: 1.048327088356018


training:  41%|████      | 3410/8300 [15:07:53<21:21:50, 15.73s/it]

training loss: 0.6916337609291077


training:  41%|████      | 3411/8300 [15:08:09<21:21:17, 15.72s/it]

training loss: 0.8554273247718811


training:  41%|████      | 3412/8300 [15:08:25<21:20:47, 15.72s/it]

training loss: 0.9230113625526428


training:  41%|████      | 3413/8300 [15:08:40<21:20:07, 15.72s/it]

training loss: 0.8634432554244995


training:  41%|████      | 3414/8300 [15:08:56<21:19:45, 15.72s/it]

training loss: 0.853408694267273


training:  41%|████      | 3415/8300 [15:09:12<21:19:30, 15.72s/it]

training loss: 1.0650720596313477


training:  41%|████      | 3416/8300 [15:09:27<21:19:16, 15.72s/it]

training loss: 0.6928870677947998


training:  41%|████      | 3417/8300 [15:09:43<21:19:04, 15.72s/it]

training loss: 0.8383166193962097


training:  41%|████      | 3418/8300 [15:09:59<21:18:56, 15.72s/it]

training loss: 0.9599282741546631


training:  41%|████      | 3419/8300 [15:10:15<21:18:23, 15.71s/it]

training loss: 0.9179204106330872


training:  41%|████      | 3420/8300 [15:10:30<21:18:16, 15.72s/it]

training loss: 0.8134912848472595


training:  41%|████      | 3421/8300 [15:10:46<21:17:52, 15.71s/it]

training loss: 0.7947185039520264


training:  41%|████      | 3422/8300 [15:11:02<21:17:48, 15.72s/it]

training loss: 0.6303917169570923


training:  41%|████      | 3423/8300 [15:11:17<21:17:36, 15.72s/it]

training loss: 0.8399576544761658


training:  41%|████▏     | 3424/8300 [15:11:33<21:17:23, 15.72s/it]

training loss: 0.7553879618644714


training:  41%|████▏     | 3425/8300 [15:11:49<21:16:48, 15.71s/it]

training loss: 0.6795659065246582


training:  41%|████▏     | 3426/8300 [15:12:05<21:16:47, 15.72s/it]

training loss: 0.6623528599739075


training:  41%|████▏     | 3427/8300 [15:12:20<21:16:25, 15.72s/it]

training loss: 0.9958129525184631


training:  41%|████▏     | 3428/8300 [15:12:36<21:16:21, 15.72s/it]

training loss: 0.7186442017555237


training:  41%|████▏     | 3429/8300 [15:12:52<21:15:51, 15.72s/it]

training loss: 0.6888190507888794


training:  41%|████▏     | 3430/8300 [15:13:07<21:15:46, 15.72s/it]

training loss: 1.198544979095459


training:  41%|████▏     | 3431/8300 [15:13:23<21:15:31, 15.72s/it]

training loss: 1.1585724353790283


training:  41%|████▏     | 3432/8300 [15:13:39<21:15:22, 15.72s/it]

training loss: 0.6544433236122131


training:  41%|████▏     | 3433/8300 [15:13:55<21:15:03, 15.72s/it]

training loss: 0.7398781776428223


training:  41%|████▏     | 3434/8300 [15:14:10<21:15:01, 15.72s/it]

training loss: 0.7327886819839478


training:  41%|████▏     | 3435/8300 [15:14:26<21:14:40, 15.72s/it]

training loss: 0.4260586202144623


training:  41%|████▏     | 3436/8300 [15:14:42<21:14:09, 15.72s/it]

training loss: 1.1045312881469727


training:  41%|████▏     | 3437/8300 [15:14:57<21:13:45, 15.72s/it]

training loss: 0.786285936832428


training:  41%|████▏     | 3438/8300 [15:15:13<21:13:27, 15.72s/it]

training loss: 0.8080925941467285


training:  41%|████▏     | 3439/8300 [15:15:29<21:13:17, 15.72s/it]

training loss: 0.9071495532989502


training:  41%|████▏     | 3440/8300 [15:15:45<21:12:49, 15.71s/it]

training loss: 0.7367002964019775


training:  41%|████▏     | 3441/8300 [15:16:00<21:12:38, 15.71s/it]

training loss: 0.44380584359169006


training:  41%|████▏     | 3442/8300 [15:16:16<21:12:19, 15.71s/it]

training loss: 0.6240499019622803


training:  41%|████▏     | 3443/8300 [15:16:32<21:12:15, 15.72s/it]

training loss: 1.201812505722046


training:  41%|████▏     | 3444/8300 [15:16:47<21:11:57, 15.72s/it]

training loss: 0.518021821975708


training:  42%|████▏     | 3445/8300 [15:17:03<21:11:48, 15.72s/it]

training loss: 0.6997420787811279


training:  42%|████▏     | 3446/8300 [15:17:19<21:11:37, 15.72s/it]

training loss: 0.625336229801178


training:  42%|████▏     | 3447/8300 [15:17:35<21:11:22, 15.72s/it]

training loss: 0.30645477771759033


training:  42%|████▏     | 3448/8300 [15:17:50<21:10:53, 15.72s/it]

training loss: 0.43061012029647827


training:  42%|████▏     | 3449/8300 [15:18:06<21:10:36, 15.72s/it]

training loss: 0.8904708623886108


training:  42%|████▏     | 3450/8300 [15:18:22<21:10:16, 15.71s/it]

training loss: 0.7391884326934814


training:  42%|████▏     | 3451/8300 [15:18:37<21:09:50, 15.71s/it]

training loss: 1.1302860975265503


training:  42%|████▏     | 3452/8300 [15:18:53<21:09:16, 15.71s/it]

training loss: 0.9910179972648621


training:  42%|████▏     | 3453/8300 [15:19:08<20:58:19, 15.58s/it]

training loss: 0.8076590299606323


training:  42%|████▏     | 3454/8300 [15:19:24<21:01:11, 15.62s/it]

training loss: 0.7875903844833374


training:  42%|████▏     | 3455/8300 [15:19:40<21:03:24, 15.65s/it]

training loss: 0.8671846389770508


training:  42%|████▏     | 3456/8300 [15:19:56<21:04:46, 15.67s/it]

training loss: 0.6377095580101013


training:  42%|████▏     | 3457/8300 [15:20:11<21:05:38, 15.68s/it]

training loss: 1.1533679962158203


training:  42%|████▏     | 3458/8300 [15:20:27<21:06:22, 15.69s/it]

training loss: 1.0539076328277588


training:  42%|████▏     | 3459/8300 [15:20:43<21:06:28, 15.70s/it]

training loss: 0.8764296770095825


training:  42%|████▏     | 3460/8300 [15:20:58<21:06:30, 15.70s/it]

training loss: 0.6787269115447998


training:  42%|████▏     | 3461/8300 [15:21:14<21:06:26, 15.70s/it]

training loss: 0.6450144648551941


training:  42%|████▏     | 3462/8300 [15:21:30<21:06:20, 15.70s/it]

training loss: 0.5759896039962769


training:  42%|████▏     | 3463/8300 [15:21:46<21:06:07, 15.71s/it]

training loss: 0.7636218070983887


training:  42%|████▏     | 3464/8300 [15:22:01<21:06:04, 15.71s/it]

training loss: 0.7341694831848145


training:  42%|████▏     | 3465/8300 [15:22:17<21:05:54, 15.71s/it]

training loss: 0.7719695568084717


training:  42%|████▏     | 3466/8300 [15:22:33<21:05:57, 15.71s/it]

training loss: 0.5955623984336853


training:  42%|████▏     | 3467/8300 [15:22:48<21:05:40, 15.71s/it]

training loss: 0.6045534014701843


training:  42%|████▏     | 3468/8300 [15:23:04<21:05:25, 15.71s/it]

training loss: 0.7744283080101013


training:  42%|████▏     | 3469/8300 [15:23:20<21:04:51, 15.71s/it]

training loss: 0.7994210720062256


training:  42%|████▏     | 3470/8300 [15:23:36<21:04:46, 15.71s/it]

training loss: 0.7672518491744995


training:  42%|████▏     | 3471/8300 [15:23:51<21:04:10, 15.71s/it]

training loss: 0.5572499632835388


training:  42%|████▏     | 3472/8300 [15:24:07<21:03:54, 15.71s/it]

training loss: 0.22607746720314026


training:  42%|████▏     | 3473/8300 [15:24:23<21:03:39, 15.71s/it]

training loss: 1.001210331916809


training:  42%|████▏     | 3474/8300 [15:24:38<21:03:29, 15.71s/it]

training loss: 0.8079146146774292


training:  42%|████▏     | 3475/8300 [15:24:54<21:03:02, 15.71s/it]

training loss: 0.7456344962120056


training:  42%|████▏     | 3476/8300 [15:25:10<21:03:06, 15.71s/it]

training loss: 0.8775968551635742


training:  42%|████▏     | 3477/8300 [15:25:25<21:02:49, 15.71s/it]

training loss: 0.9212328791618347


training:  42%|████▏     | 3478/8300 [15:25:41<21:02:30, 15.71s/it]

training loss: 0.4626779556274414


training:  42%|████▏     | 3479/8300 [15:25:57<21:02:06, 15.71s/it]

training loss: 0.5857800841331482


training:  42%|████▏     | 3480/8300 [15:26:13<21:02:01, 15.71s/it]

training loss: 0.9358648061752319


training:  42%|████▏     | 3481/8300 [15:26:28<21:02:00, 15.71s/it]

training loss: 0.7120007276535034


training:  42%|████▏     | 3482/8300 [15:26:44<21:01:36, 15.71s/it]

training loss: 0.4721482992172241


training:  42%|████▏     | 3483/8300 [15:27:00<21:01:20, 15.71s/it]

training loss: 0.5658686757087708


training:  42%|████▏     | 3484/8300 [15:27:15<21:01:06, 15.71s/it]

training loss: 0.6866877675056458


training:  42%|████▏     | 3485/8300 [15:27:31<21:01:07, 15.71s/it]

training loss: 0.8861229419708252


training:  42%|████▏     | 3486/8300 [15:27:47<21:00:59, 15.72s/it]

training loss: 1.0490175485610962


training:  42%|████▏     | 3487/8300 [15:28:03<21:00:35, 15.71s/it]

training loss: 1.0713632106781006


training:  42%|████▏     | 3488/8300 [15:28:18<21:00:14, 15.71s/it]

training loss: 0.9353979825973511


training:  42%|████▏     | 3489/8300 [15:28:34<21:00:06, 15.72s/it]

training loss: 0.8595417141914368


training:  42%|████▏     | 3490/8300 [15:28:50<20:59:48, 15.71s/it]

training loss: 1.086721420288086


training:  42%|████▏     | 3491/8300 [15:29:05<20:59:30, 15.71s/it]

training loss: 0.83262038230896


training:  42%|████▏     | 3492/8300 [15:29:21<20:59:20, 15.72s/it]

training loss: 0.9685766696929932


training:  42%|████▏     | 3493/8300 [15:29:37<20:59:02, 15.72s/it]

training loss: 0.6489087343215942


training:  42%|████▏     | 3494/8300 [15:29:53<20:58:35, 15.71s/it]

training loss: 0.7301791906356812


training:  42%|████▏     | 3495/8300 [15:30:08<20:58:20, 15.71s/it]

training loss: 0.6929766535758972


training:  42%|████▏     | 3496/8300 [15:30:24<20:58:00, 15.71s/it]

training loss: 0.47817572951316833


training:  42%|████▏     | 3497/8300 [15:30:40<20:58:09, 15.72s/it]

training loss: 0.745576024055481


training:  42%|████▏     | 3498/8300 [15:30:55<20:57:34, 15.71s/it]

training loss: 0.8366590738296509


training:  42%|████▏     | 3499/8300 [15:31:11<20:57:18, 15.71s/it]

training loss: 1.0455658435821533


training:  42%|████▏     | 3500/8300 [15:31:27<20:57:05, 15.71s/it]

training loss: 0.6962555050849915
training loss: 0.8250600099563599


training:  42%|████▏     | 3501/8300 [15:31:44<21:27:19, 16.09s/it]

validation loss: 1.5545644760131836


training:  42%|████▏     | 3502/8300 [15:32:00<21:18:27, 15.99s/it]

training loss: 1.1109302043914795


training:  42%|████▏     | 3503/8300 [15:32:15<21:11:25, 15.90s/it]

training loss: 0.7814075946807861


training:  42%|████▏     | 3504/8300 [15:32:31<21:06:37, 15.85s/it]

training loss: 0.5695316195487976


training:  42%|████▏     | 3505/8300 [15:32:47<21:03:04, 15.80s/it]

training loss: 0.8029266595840454


training:  42%|████▏     | 3506/8300 [15:33:02<21:00:20, 15.77s/it]

training loss: 0.8470455408096313


training:  42%|████▏     | 3507/8300 [15:33:18<20:58:35, 15.76s/it]

training loss: 0.5807366371154785


training:  42%|████▏     | 3508/8300 [15:33:34<20:57:13, 15.74s/it]

training loss: 0.7596811056137085


training:  42%|████▏     | 3509/8300 [15:33:50<20:56:07, 15.73s/it]

training loss: 0.8873559832572937


training:  42%|████▏     | 3510/8300 [15:34:05<20:55:33, 15.73s/it]

training loss: 0.5626455545425415


training:  42%|████▏     | 3511/8300 [15:34:21<20:54:43, 15.72s/it]

training loss: 0.6738250851631165


training:  42%|████▏     | 3512/8300 [15:34:37<20:54:31, 15.72s/it]

training loss: 0.5336824059486389


training:  42%|████▏     | 3513/8300 [15:34:52<20:53:47, 15.71s/it]

training loss: 0.7555234432220459


training:  42%|████▏     | 3514/8300 [15:35:08<20:53:34, 15.72s/it]

training loss: 0.8123164176940918


training:  42%|████▏     | 3515/8300 [15:35:24<20:53:10, 15.71s/it]

training loss: 0.8678979873657227


training:  42%|████▏     | 3516/8300 [15:35:40<20:52:50, 15.71s/it]

training loss: 0.9175423979759216


training:  42%|████▏     | 3517/8300 [15:35:55<20:52:23, 15.71s/it]

training loss: 0.9798058271408081


training:  42%|████▏     | 3518/8300 [15:36:11<20:52:15, 15.71s/it]

training loss: 0.6077053546905518


training:  42%|████▏     | 3519/8300 [15:36:27<20:52:08, 15.71s/it]

training loss: 0.6444108486175537


training:  42%|████▏     | 3520/8300 [15:36:42<20:51:45, 15.71s/it]

training loss: 1.0649350881576538


training:  42%|████▏     | 3521/8300 [15:36:58<20:51:27, 15.71s/it]

training loss: 0.9520196914672852


training:  42%|████▏     | 3522/8300 [15:37:14<20:51:06, 15.71s/it]

training loss: 0.6526714563369751


training:  42%|████▏     | 3523/8300 [15:37:30<20:50:53, 15.71s/it]

training loss: 0.8201394081115723


training:  42%|████▏     | 3524/8300 [15:37:45<20:50:45, 15.71s/it]

training loss: 0.8059520721435547


training:  42%|████▏     | 3525/8300 [15:38:01<20:50:31, 15.71s/it]

training loss: 0.5660688877105713


training:  42%|████▏     | 3526/8300 [15:38:17<20:50:05, 15.71s/it]

training loss: 0.7567744255065918


training:  42%|████▏     | 3527/8300 [15:38:32<20:49:55, 15.71s/it]

training loss: 0.7925704717636108


training:  43%|████▎     | 3528/8300 [15:38:48<20:49:40, 15.71s/it]

training loss: 0.4511640667915344


training:  43%|████▎     | 3529/8300 [15:39:04<20:49:22, 15.71s/it]

training loss: 1.0258142948150635


training:  43%|████▎     | 3530/8300 [15:39:20<20:48:57, 15.71s/it]

training loss: 0.8647627234458923


training:  43%|████▎     | 3531/8300 [15:39:35<20:48:57, 15.71s/it]

training loss: 1.0433601140975952


training:  43%|████▎     | 3532/8300 [15:39:51<20:48:22, 15.71s/it]

training loss: 0.5500092506408691


training:  43%|████▎     | 3533/8300 [15:40:07<20:48:13, 15.71s/it]

training loss: 0.8820027112960815


training:  43%|████▎     | 3534/8300 [15:40:22<20:47:53, 15.71s/it]

training loss: 0.8222206234931946


training:  43%|████▎     | 3535/8300 [15:40:38<20:47:42, 15.71s/it]

training loss: 0.6979463696479797


training:  43%|████▎     | 3536/8300 [15:40:54<20:47:26, 15.71s/it]

training loss: 1.1347976922988892


training:  43%|████▎     | 3537/8300 [15:41:10<20:47:19, 15.71s/it]

training loss: 1.2035291194915771


training:  43%|████▎     | 3538/8300 [15:41:25<20:47:10, 15.71s/it]

training loss: 0.7098070383071899


training:  43%|████▎     | 3539/8300 [15:41:41<20:46:58, 15.71s/it]

training loss: 0.7917704582214355


training:  43%|████▎     | 3540/8300 [15:41:57<20:46:39, 15.71s/it]

training loss: 0.6118882894515991


training:  43%|████▎     | 3541/8300 [15:42:12<20:46:43, 15.72s/it]

training loss: 0.6998094320297241


training:  43%|████▎     | 3542/8300 [15:42:28<20:46:24, 15.72s/it]

training loss: 0.7544822096824646


training:  43%|████▎     | 3543/8300 [15:42:44<20:46:00, 15.72s/it]

training loss: 0.6727355718612671


training:  43%|████▎     | 3544/8300 [15:43:00<20:45:30, 15.71s/it]

training loss: 0.7960420846939087


training:  43%|████▎     | 3545/8300 [15:43:15<20:45:24, 15.71s/it]

training loss: 0.5366857647895813


training:  43%|████▎     | 3546/8300 [15:43:31<20:45:08, 15.71s/it]

training loss: 1.0857678651809692


training:  43%|████▎     | 3547/8300 [15:43:47<20:44:56, 15.72s/it]

training loss: 0.9596290588378906


training:  43%|████▎     | 3548/8300 [15:44:02<20:44:56, 15.72s/it]

training loss: 0.6924207210540771


training:  43%|████▎     | 3549/8300 [15:44:18<20:44:31, 15.72s/it]

training loss: 0.8838238716125488


training:  43%|████▎     | 3550/8300 [15:44:34<20:44:15, 15.72s/it]

training loss: 0.31302642822265625


training:  43%|████▎     | 3551/8300 [15:44:50<20:43:50, 15.71s/it]

training loss: 0.8881303668022156


training:  43%|████▎     | 3552/8300 [15:45:05<20:43:37, 15.72s/it]

training loss: 1.1688512563705444


training:  43%|████▎     | 3553/8300 [15:45:21<20:43:11, 15.71s/it]

training loss: 0.6115614175796509


training:  43%|████▎     | 3554/8300 [15:45:37<20:45:33, 15.75s/it]

training loss: 0.8980132341384888


training:  43%|████▎     | 3555/8300 [15:45:52<20:44:28, 15.74s/it]

training loss: 0.8728383779525757


training:  43%|████▎     | 3556/8300 [15:46:08<20:43:36, 15.73s/it]

training loss: 0.8855529427528381


training:  43%|████▎     | 3557/8300 [15:46:24<20:43:02, 15.72s/it]

training loss: 0.7629115581512451


training:  43%|████▎     | 3558/8300 [15:46:40<20:42:34, 15.72s/it]

training loss: 0.5281672477722168


training:  43%|████▎     | 3559/8300 [15:46:55<20:42:06, 15.72s/it]

training loss: 0.5568723082542419


training:  43%|████▎     | 3560/8300 [15:47:11<20:41:46, 15.72s/it]

training loss: 0.5072113871574402


training:  43%|████▎     | 3561/8300 [15:47:27<20:41:20, 15.72s/it]

training loss: 0.48432227969169617


training:  43%|████▎     | 3562/8300 [15:47:42<20:41:00, 15.72s/it]

training loss: 0.6457217931747437


training:  43%|████▎     | 3563/8300 [15:47:58<20:40:50, 15.72s/it]

training loss: 0.5115761756896973


training:  43%|████▎     | 3564/8300 [15:48:14<20:40:36, 15.72s/it]

training loss: 0.9323529601097107


training:  43%|████▎     | 3565/8300 [15:48:30<20:40:24, 15.72s/it]

training loss: 0.7184406518936157


training:  43%|████▎     | 3566/8300 [15:48:45<20:39:53, 15.71s/it]

training loss: 0.7544487118721008


training:  43%|████▎     | 3567/8300 [15:49:01<20:39:44, 15.72s/it]

training loss: 0.6876235604286194


training:  43%|████▎     | 3568/8300 [15:49:17<20:39:25, 15.72s/it]

training loss: 0.6003475785255432


training:  43%|████▎     | 3569/8300 [15:49:33<20:39:17, 15.72s/it]

training loss: 0.3879684805870056


training:  43%|████▎     | 3570/8300 [15:49:48<20:38:57, 15.72s/it]

training loss: 0.7951600551605225


training:  43%|████▎     | 3571/8300 [15:50:04<20:38:47, 15.72s/it]

training loss: 0.6227291822433472


training:  43%|████▎     | 3572/8300 [15:50:20<20:38:33, 15.72s/it]

training loss: 0.6967201828956604


training:  43%|████▎     | 3573/8300 [15:50:35<20:38:26, 15.72s/it]

training loss: 0.6275123953819275


training:  43%|████▎     | 3574/8300 [15:50:51<20:37:58, 15.72s/it]

training loss: 0.7195037603378296


training:  43%|████▎     | 3575/8300 [15:51:07<20:37:35, 15.72s/it]

training loss: 0.882370114326477


training:  43%|████▎     | 3576/8300 [15:51:23<20:37:19, 15.72s/it]

training loss: 0.8675951957702637


training:  43%|████▎     | 3577/8300 [15:51:38<20:36:56, 15.71s/it]

training loss: 1.2306022644042969


training:  43%|████▎     | 3578/8300 [15:51:54<20:36:37, 15.71s/it]

training loss: 0.715933084487915


training:  43%|████▎     | 3579/8300 [15:52:10<20:36:28, 15.71s/it]

training loss: 0.7070502042770386


training:  43%|████▎     | 3580/8300 [15:52:25<20:36:15, 15.72s/it]

training loss: 0.424225777387619


training:  43%|████▎     | 3581/8300 [15:52:41<20:35:58, 15.71s/it]

training loss: 0.9003475308418274


training:  43%|████▎     | 3582/8300 [15:52:57<20:35:35, 15.71s/it]

training loss: 0.8005198240280151


training:  43%|████▎     | 3583/8300 [15:53:13<20:35:10, 15.71s/it]

training loss: 0.9785985946655273


training:  43%|████▎     | 3584/8300 [15:53:28<20:34:59, 15.71s/it]

training loss: 0.6662797331809998


training:  43%|████▎     | 3585/8300 [15:53:44<20:34:48, 15.71s/it]

training loss: 0.9736520051956177


training:  43%|████▎     | 3586/8300 [15:54:00<20:34:29, 15.71s/it]

training loss: 1.072402834892273


training:  43%|████▎     | 3587/8300 [15:54:15<20:34:11, 15.71s/it]

training loss: 0.9178476333618164


training:  43%|████▎     | 3588/8300 [15:54:31<20:34:01, 15.71s/it]

training loss: 0.7005913257598877


training:  43%|████▎     | 3589/8300 [15:54:47<20:33:35, 15.71s/it]

training loss: 0.9559060335159302


training:  43%|████▎     | 3590/8300 [15:55:02<20:33:21, 15.71s/it]

training loss: 0.791524350643158


training:  43%|████▎     | 3591/8300 [15:55:18<20:33:05, 15.71s/it]

training loss: 0.8639706969261169


training:  43%|████▎     | 3592/8300 [15:55:34<20:32:46, 15.71s/it]

training loss: 0.9952534437179565


training:  43%|████▎     | 3593/8300 [15:55:50<20:32:26, 15.71s/it]

training loss: 0.9158397912979126


training:  43%|████▎     | 3594/8300 [15:56:05<20:32:09, 15.71s/it]

training loss: 0.6129354238510132


training:  43%|████▎     | 3595/8300 [15:56:21<20:31:57, 15.71s/it]

training loss: 0.9147069454193115


training:  43%|████▎     | 3596/8300 [15:56:37<20:31:58, 15.71s/it]

training loss: 1.0101566314697266


training:  43%|████▎     | 3597/8300 [15:56:52<20:31:36, 15.71s/it]

training loss: 0.9228949546813965


training:  43%|████▎     | 3598/8300 [15:57:08<20:31:36, 15.72s/it]

training loss: 0.6043170690536499


training:  43%|████▎     | 3599/8300 [15:57:24<20:31:25, 15.72s/it]

training loss: 0.8941330313682556


training:  43%|████▎     | 3600/8300 [15:57:40<20:30:59, 15.71s/it]

training loss: 0.7943129539489746
training loss: 0.5627073645591736


training:  43%|████▎     | 3601/8300 [15:57:57<21:01:09, 16.10s/it]

validation loss: 1.5464143753051758


training:  43%|████▎     | 3602/8300 [15:58:12<20:52:05, 15.99s/it]

training loss: 0.8210725784301758


training:  43%|████▎     | 3603/8300 [15:58:28<20:45:21, 15.91s/it]

training loss: 1.0361909866333008


training:  43%|████▎     | 3604/8300 [15:58:44<20:40:33, 15.85s/it]

training loss: 0.7840206623077393


training:  43%|████▎     | 3605/8300 [15:59:00<20:36:48, 15.81s/it]

training loss: 0.8056491017341614


training:  43%|████▎     | 3606/8300 [15:59:15<20:34:24, 15.78s/it]

training loss: 0.8837175965309143


training:  43%|████▎     | 3607/8300 [15:59:31<20:32:41, 15.76s/it]

training loss: 0.6938267946243286


training:  43%|████▎     | 3608/8300 [15:59:47<20:31:10, 15.74s/it]

training loss: 0.9438382387161255


training:  43%|████▎     | 3609/8300 [16:00:02<20:30:26, 15.74s/it]

training loss: 0.9754184484481812


training:  43%|████▎     | 3610/8300 [16:00:18<20:29:37, 15.73s/it]

training loss: 0.8443790078163147


training:  44%|████▎     | 3611/8300 [16:00:34<20:29:03, 15.73s/it]

training loss: 0.6899122595787048


training:  44%|████▎     | 3612/8300 [16:00:50<20:28:26, 15.72s/it]

training loss: 0.9575009942054749


training:  44%|████▎     | 3613/8300 [16:01:05<20:28:15, 15.72s/it]

training loss: 0.9608668684959412


training:  44%|████▎     | 3614/8300 [16:01:21<20:27:49, 15.72s/it]

training loss: 0.5314207077026367


training:  44%|████▎     | 3615/8300 [16:01:37<20:27:40, 15.72s/it]

training loss: 0.6438612937927246


training:  44%|████▎     | 3616/8300 [16:01:52<20:27:21, 15.72s/it]

training loss: 0.6461908221244812


training:  44%|████▎     | 3617/8300 [16:02:08<20:27:32, 15.73s/it]

training loss: 0.9959957599639893


training:  44%|████▎     | 3618/8300 [16:02:24<20:27:22, 15.73s/it]

training loss: 0.6911035180091858


training:  44%|████▎     | 3619/8300 [16:02:40<20:26:54, 15.73s/it]

training loss: 0.5082535743713379


training:  44%|████▎     | 3620/8300 [16:02:55<20:26:25, 15.72s/it]

training loss: 1.020770788192749


training:  44%|████▎     | 3621/8300 [16:03:11<20:25:50, 15.72s/it]

training loss: 0.6383355855941772


training:  44%|████▎     | 3622/8300 [16:03:27<20:25:26, 15.72s/it]

training loss: 0.6801975965499878


training:  44%|████▎     | 3623/8300 [16:03:42<20:25:09, 15.72s/it]

training loss: 0.5056484341621399


training:  44%|████▎     | 3624/8300 [16:03:58<20:24:37, 15.71s/it]

training loss: 0.8245260715484619


training:  44%|████▎     | 3625/8300 [16:04:14<20:24:24, 15.71s/it]

training loss: 0.6694186329841614


training:  44%|████▎     | 3626/8300 [16:04:30<20:24:14, 15.72s/it]

training loss: 0.770950973033905


training:  44%|████▎     | 3627/8300 [16:04:45<20:23:52, 15.71s/it]

training loss: 0.8226335048675537


training:  44%|████▎     | 3628/8300 [16:05:01<20:23:49, 15.72s/it]

training loss: 0.8770482540130615


training:  44%|████▎     | 3629/8300 [16:05:17<20:23:28, 15.72s/it]

training loss: 0.6383912563323975


training:  44%|████▎     | 3630/8300 [16:05:32<20:23:21, 15.72s/it]

training loss: 1.1166911125183105


training:  44%|████▎     | 3631/8300 [16:05:48<20:22:52, 15.71s/it]

training loss: 0.49690568447113037


training:  44%|████▍     | 3632/8300 [16:06:04<20:22:34, 15.71s/it]

training loss: 0.5439181923866272


training:  44%|████▍     | 3633/8300 [16:06:20<20:22:12, 15.71s/it]

training loss: 0.6081442832946777


training:  44%|████▍     | 3634/8300 [16:06:35<20:22:11, 15.72s/it]

training loss: 0.6895811557769775


training:  44%|████▍     | 3635/8300 [16:06:51<20:22:01, 15.72s/it]

training loss: 0.8600895404815674


training:  44%|████▍     | 3636/8300 [16:07:07<20:21:51, 15.72s/it]

training loss: 0.7120239734649658


training:  44%|████▍     | 3637/8300 [16:07:22<20:21:24, 15.72s/it]

training loss: 0.5237823724746704


training:  44%|████▍     | 3638/8300 [16:07:38<20:21:09, 15.72s/it]

training loss: 0.846759557723999


training:  44%|████▍     | 3639/8300 [16:07:54<20:20:42, 15.71s/it]

training loss: 0.7044159173965454


training:  44%|████▍     | 3640/8300 [16:08:10<20:20:30, 15.71s/it]

training loss: 1.0302847623825073


training:  44%|████▍     | 3641/8300 [16:08:25<20:20:18, 15.72s/it]

training loss: 0.9418469667434692


training:  44%|████▍     | 3642/8300 [16:08:41<20:19:46, 15.71s/it]

training loss: 0.7816765308380127


training:  44%|████▍     | 3643/8300 [16:08:57<20:19:32, 15.71s/it]

training loss: 0.8724465370178223


training:  44%|████▍     | 3644/8300 [16:09:12<20:19:21, 15.71s/it]

training loss: 0.6830451488494873


training:  44%|████▍     | 3645/8300 [16:09:28<20:19:24, 15.72s/it]

training loss: 0.8557819128036499


training:  44%|████▍     | 3646/8300 [16:09:44<20:19:05, 15.72s/it]

training loss: 1.1305553913116455


training:  44%|████▍     | 3647/8300 [16:10:00<20:18:51, 15.72s/it]

training loss: 0.7107459306716919


training:  44%|████▍     | 3648/8300 [16:10:15<20:18:36, 15.72s/it]

training loss: 1.0826911926269531


training:  44%|████▍     | 3649/8300 [16:10:31<20:18:26, 15.72s/it]

training loss: 0.8495224118232727


training:  44%|████▍     | 3650/8300 [16:10:47<20:18:00, 15.72s/it]

training loss: 0.724718451499939


training:  44%|████▍     | 3651/8300 [16:11:02<20:17:45, 15.72s/it]

training loss: 0.49437472224235535


training:  44%|████▍     | 3652/8300 [16:11:18<20:17:34, 15.72s/it]

training loss: 0.7186427116394043


training:  44%|████▍     | 3653/8300 [16:11:34<20:17:19, 15.72s/it]

training loss: 0.6696160435676575


training:  44%|████▍     | 3654/8300 [16:11:50<20:16:49, 15.71s/it]

training loss: 0.7682672739028931


training:  44%|████▍     | 3655/8300 [16:12:05<20:16:52, 15.72s/it]

training loss: 0.5030921101570129


training:  44%|████▍     | 3656/8300 [16:12:21<20:16:26, 15.72s/it]

training loss: 1.2895089387893677


training:  44%|████▍     | 3657/8300 [16:12:37<20:16:04, 15.71s/it]

training loss: 1.194171667098999


training:  44%|████▍     | 3658/8300 [16:12:52<20:15:43, 15.71s/it]

training loss: 0.7729088664054871


training:  44%|████▍     | 3659/8300 [16:13:08<20:15:21, 15.71s/it]

training loss: 0.9746912121772766


training:  44%|████▍     | 3660/8300 [16:13:24<20:15:05, 15.71s/it]

training loss: 0.8017658591270447


training:  44%|████▍     | 3661/8300 [16:13:40<20:14:55, 15.71s/it]

training loss: 0.7915850281715393


training:  44%|████▍     | 3662/8300 [16:13:55<20:14:41, 15.71s/it]

training loss: 0.9379685521125793


training:  44%|████▍     | 3663/8300 [16:14:11<20:14:23, 15.71s/it]

training loss: 0.7835573554039001


training:  44%|████▍     | 3664/8300 [16:14:27<20:14:14, 15.71s/it]

training loss: 0.9055448770523071


training:  44%|████▍     | 3665/8300 [16:14:42<20:13:48, 15.71s/it]

training loss: 0.6624940037727356


training:  44%|████▍     | 3666/8300 [16:14:58<20:13:35, 15.71s/it]

training loss: 0.7538091540336609


training:  44%|████▍     | 3667/8300 [16:15:14<20:13:17, 15.71s/it]

training loss: 0.8767030835151672


training:  44%|████▍     | 3668/8300 [16:15:30<20:13:08, 15.71s/it]

training loss: 0.9799405932426453


training:  44%|████▍     | 3669/8300 [16:15:45<20:12:49, 15.71s/it]

training loss: 0.6775457262992859


training:  44%|████▍     | 3670/8300 [16:16:01<20:12:44, 15.72s/it]

training loss: 0.75569087266922


training:  44%|████▍     | 3671/8300 [16:16:17<20:12:22, 15.71s/it]

training loss: 0.9697623252868652


training:  44%|████▍     | 3672/8300 [16:16:32<20:12:10, 15.72s/it]

training loss: 0.9924622774124146


training:  44%|████▍     | 3673/8300 [16:16:48<20:11:50, 15.71s/it]

training loss: 0.7609512805938721


training:  44%|████▍     | 3674/8300 [16:17:04<20:11:40, 15.72s/it]

training loss: 0.946617841720581


training:  44%|████▍     | 3675/8300 [16:17:20<20:11:22, 15.72s/it]

training loss: 0.3811957538127899


training:  44%|████▍     | 3676/8300 [16:17:35<20:11:08, 15.72s/it]

training loss: 0.42203789949417114


training:  44%|████▍     | 3677/8300 [16:17:51<20:10:38, 15.71s/it]

training loss: 0.36996206641197205


training:  44%|████▍     | 3678/8300 [16:18:07<20:10:23, 15.71s/it]

training loss: 1.105383276939392


training:  44%|████▍     | 3679/8300 [16:18:22<20:09:56, 15.71s/it]

training loss: 0.9338923692703247


training:  44%|████▍     | 3680/8300 [16:18:38<20:09:48, 15.71s/it]

training loss: 0.6015568971633911


training:  44%|████▍     | 3681/8300 [16:18:54<20:09:21, 15.71s/it]

training loss: 0.9096096754074097


training:  44%|████▍     | 3682/8300 [16:19:10<20:09:14, 15.71s/it]

training loss: 0.9078100919723511


training:  44%|████▍     | 3683/8300 [16:19:25<20:09:22, 15.72s/it]

training loss: 1.0458022356033325


training:  44%|████▍     | 3684/8300 [16:19:41<20:08:54, 15.71s/it]

training loss: 0.607528805732727


training:  44%|████▍     | 3685/8300 [16:19:57<20:08:50, 15.72s/it]

training loss: 1.098447322845459


training:  44%|████▍     | 3686/8300 [16:20:12<20:08:23, 15.71s/it]

training loss: 0.8425948619842529


training:  44%|████▍     | 3687/8300 [16:20:28<20:08:07, 15.71s/it]

training loss: 0.6305755972862244


training:  44%|████▍     | 3688/8300 [16:20:44<20:07:52, 15.71s/it]

training loss: 0.9230847358703613


training:  44%|████▍     | 3689/8300 [16:21:00<20:07:29, 15.71s/it]

training loss: 0.8425401449203491


training:  44%|████▍     | 3690/8300 [16:21:15<20:07:15, 15.71s/it]

training loss: 0.6489406824111938


training:  44%|████▍     | 3691/8300 [16:21:31<20:07:23, 15.72s/it]

training loss: 0.7548191547393799


training:  44%|████▍     | 3692/8300 [16:21:47<20:07:05, 15.72s/it]

training loss: 0.7437796592712402


training:  44%|████▍     | 3693/8300 [16:22:02<20:06:59, 15.72s/it]

training loss: 0.549584686756134


training:  45%|████▍     | 3694/8300 [16:22:18<20:06:51, 15.72s/it]

training loss: 0.6709370613098145


training:  45%|████▍     | 3695/8300 [16:22:34<20:06:27, 15.72s/it]

training loss: 1.1908046007156372


training:  45%|████▍     | 3696/8300 [16:22:50<20:06:05, 15.72s/it]

training loss: 1.0212275981903076


training:  45%|████▍     | 3697/8300 [16:23:05<20:05:53, 15.72s/it]

training loss: 0.5285202860832214


training:  45%|████▍     | 3698/8300 [16:23:21<20:05:25, 15.72s/it]

training loss: 0.7130095958709717


training:  45%|████▍     | 3699/8300 [16:23:37<20:05:05, 15.72s/it]

training loss: 0.7917355298995972


training:  45%|████▍     | 3700/8300 [16:23:53<20:04:53, 15.72s/it]

training loss: 0.8290622234344482
training loss: 1.073103904724121


training:  45%|████▍     | 3701/8300 [16:24:10<20:34:21, 16.10s/it]

validation loss: 1.5608904361724854


training:  45%|████▍     | 3702/8300 [16:24:25<20:25:52, 16.00s/it]

training loss: 0.7990047335624695


training:  45%|████▍     | 3703/8300 [16:24:41<20:19:19, 15.91s/it]

training loss: 0.6792451739311218


training:  45%|████▍     | 3704/8300 [16:24:57<20:14:38, 15.86s/it]

training loss: 0.5141614675521851


training:  45%|████▍     | 3705/8300 [16:25:12<20:11:03, 15.81s/it]

training loss: 0.47784048318862915


training:  45%|████▍     | 3706/8300 [16:25:28<20:08:34, 15.78s/it]

training loss: 0.779471218585968


training:  45%|████▍     | 3707/8300 [16:25:44<20:06:36, 15.76s/it]

training loss: 0.9120928645133972


training:  45%|████▍     | 3708/8300 [16:26:00<20:05:11, 15.75s/it]

training loss: 0.6823079586029053


training:  45%|████▍     | 3709/8300 [16:26:15<20:04:13, 15.74s/it]

training loss: 1.20888090133667


training:  45%|████▍     | 3710/8300 [16:26:31<20:03:38, 15.73s/it]

training loss: 1.0230029821395874


training:  45%|████▍     | 3711/8300 [16:26:47<20:02:49, 15.73s/it]

training loss: 0.4510887861251831


training:  45%|████▍     | 3712/8300 [16:27:02<20:02:14, 15.72s/it]

training loss: 0.735840916633606


training:  45%|████▍     | 3713/8300 [16:27:18<20:01:47, 15.72s/it]

training loss: 0.4465993344783783


training:  45%|████▍     | 3714/8300 [16:27:34<20:01:30, 15.72s/it]

training loss: 0.5711768269538879


training:  45%|████▍     | 3715/8300 [16:27:50<20:01:01, 15.72s/it]

training loss: 1.077512502670288


training:  45%|████▍     | 3716/8300 [16:28:05<20:00:56, 15.72s/it]

training loss: 0.8648680448532104


training:  45%|████▍     | 3717/8300 [16:28:21<20:00:38, 15.72s/it]

training loss: 0.6929740905761719


training:  45%|████▍     | 3718/8300 [16:28:37<20:00:21, 15.72s/it]

training loss: 0.9429840445518494


training:  45%|████▍     | 3719/8300 [16:28:52<19:59:58, 15.72s/it]

training loss: 0.6402130722999573


training:  45%|████▍     | 3720/8300 [16:29:08<19:59:41, 15.72s/it]

training loss: 0.526816189289093


training:  45%|████▍     | 3721/8300 [16:29:24<19:59:23, 15.72s/it]

training loss: 0.7729987502098083


training:  45%|████▍     | 3722/8300 [16:29:40<19:59:06, 15.72s/it]

training loss: 0.5910120010375977


training:  45%|████▍     | 3723/8300 [16:29:55<19:58:43, 15.71s/it]

training loss: 0.8636823892593384


training:  45%|████▍     | 3724/8300 [16:30:11<19:58:17, 15.71s/it]

training loss: 0.8032603859901428


training:  45%|████▍     | 3725/8300 [16:30:27<19:58:17, 15.72s/it]

training loss: 0.7564104795455933


training:  45%|████▍     | 3726/8300 [16:30:42<19:57:42, 15.71s/it]

training loss: 0.8512052893638611


training:  45%|████▍     | 3727/8300 [16:30:58<19:57:26, 15.71s/it]

training loss: 1.0457541942596436


training:  45%|████▍     | 3728/8300 [16:31:14<19:57:06, 15.71s/it]

training loss: 1.000854730606079


training:  45%|████▍     | 3729/8300 [16:31:30<19:56:55, 15.71s/it]

training loss: 0.9101903438568115


training:  45%|████▍     | 3730/8300 [16:31:45<19:56:31, 15.71s/it]

training loss: 0.8178131580352783


training:  45%|████▍     | 3731/8300 [16:32:01<19:56:19, 15.71s/it]

training loss: 1.19900381565094


training:  45%|████▍     | 3732/8300 [16:32:17<19:56:13, 15.71s/it]

training loss: 0.8113448023796082


training:  45%|████▍     | 3733/8300 [16:32:32<19:55:50, 15.71s/it]

training loss: 1.0914533138275146


training:  45%|████▍     | 3734/8300 [16:32:48<19:55:32, 15.71s/it]

training loss: 0.9200598001480103


training:  45%|████▌     | 3735/8300 [16:33:04<19:55:22, 15.71s/it]

training loss: 0.8273354768753052


training:  45%|████▌     | 3736/8300 [16:33:20<19:54:56, 15.71s/it]

training loss: 0.7961386442184448


training:  45%|████▌     | 3737/8300 [16:33:35<19:55:16, 15.72s/it]

training loss: 0.8761957883834839


training:  45%|████▌     | 3738/8300 [16:33:51<19:54:57, 15.72s/it]

training loss: 0.8683976531028748


training:  45%|████▌     | 3739/8300 [16:34:07<19:54:50, 15.72s/it]

training loss: 0.9092153310775757


training:  45%|████▌     | 3740/8300 [16:34:22<19:54:18, 15.71s/it]

training loss: 1.1750099658966064


training:  45%|████▌     | 3741/8300 [16:34:38<19:53:53, 15.71s/it]

training loss: 0.7162725329399109


training:  45%|████▌     | 3742/8300 [16:34:54<19:53:34, 15.71s/it]

training loss: 1.1327581405639648


training:  45%|████▌     | 3743/8300 [16:35:10<19:53:25, 15.71s/it]

training loss: 0.624605119228363


training:  45%|████▌     | 3744/8300 [16:35:25<19:53:03, 15.71s/it]

training loss: 0.6112765669822693


training:  45%|████▌     | 3745/8300 [16:35:41<19:52:36, 15.71s/it]

training loss: 0.9495636224746704


training:  45%|████▌     | 3746/8300 [16:35:57<19:52:15, 15.71s/it]

training loss: 0.751727819442749


training:  45%|████▌     | 3747/8300 [16:36:12<19:51:57, 15.71s/it]

training loss: 0.9894890785217285


training:  45%|████▌     | 3748/8300 [16:36:28<19:51:49, 15.71s/it]

training loss: 0.7881254553794861


training:  45%|████▌     | 3749/8300 [16:36:44<19:51:21, 15.71s/it]

training loss: 0.5000167489051819


training:  45%|████▌     | 3750/8300 [16:36:59<19:51:09, 15.71s/it]

training loss: 0.8270281553268433


training:  45%|████▌     | 3751/8300 [16:37:15<19:50:50, 15.71s/it]

training loss: 0.7460256814956665


training:  45%|████▌     | 3752/8300 [16:37:31<19:50:53, 15.71s/it]

training loss: 1.070942759513855


training:  45%|████▌     | 3753/8300 [16:37:47<19:50:24, 15.71s/it]

training loss: 0.7551352977752686


training:  45%|████▌     | 3754/8300 [16:38:02<19:50:16, 15.71s/it]

training loss: 0.6521929502487183


training:  45%|████▌     | 3755/8300 [16:38:18<19:50:06, 15.71s/it]

training loss: 0.5132560729980469


training:  45%|████▌     | 3756/8300 [16:38:34<19:49:48, 15.71s/it]

training loss: 0.6690952777862549


training:  45%|████▌     | 3757/8300 [16:38:49<19:49:24, 15.71s/it]

training loss: 0.9285323619842529


training:  45%|████▌     | 3758/8300 [16:39:05<19:49:09, 15.71s/it]

training loss: 0.7594127655029297


training:  45%|████▌     | 3759/8300 [16:39:21<19:48:37, 15.71s/it]

training loss: 1.0206468105316162


training:  45%|████▌     | 3760/8300 [16:39:37<19:48:42, 15.71s/it]

training loss: 0.4816688597202301


training:  45%|████▌     | 3761/8300 [16:39:52<19:48:24, 15.71s/it]

training loss: 0.5986137390136719


training:  45%|████▌     | 3762/8300 [16:40:08<19:48:39, 15.72s/it]

training loss: 0.6079887747764587


training:  45%|████▌     | 3763/8300 [16:40:24<19:48:20, 15.72s/it]

training loss: 0.7802308201789856


training:  45%|████▌     | 3764/8300 [16:40:39<19:48:06, 15.72s/it]

training loss: 0.8037509918212891


training:  45%|████▌     | 3765/8300 [16:40:55<19:47:33, 15.71s/it]

training loss: 0.4148057699203491


training:  45%|████▌     | 3766/8300 [16:41:11<19:47:25, 15.71s/it]

training loss: 0.7903566360473633


training:  45%|████▌     | 3767/8300 [16:41:27<19:47:29, 15.72s/it]

training loss: 0.40117353200912476


training:  45%|████▌     | 3768/8300 [16:41:42<19:46:56, 15.71s/it]

training loss: 0.8116794228553772


training:  45%|████▌     | 3769/8300 [16:41:58<19:46:33, 15.71s/it]

training loss: 0.7225746512413025


training:  45%|████▌     | 3770/8300 [16:42:14<19:46:22, 15.71s/it]

training loss: 0.604381263256073


training:  45%|████▌     | 3771/8300 [16:42:29<19:46:17, 15.72s/it]

training loss: 0.7067201137542725


training:  45%|████▌     | 3772/8300 [16:42:45<19:45:51, 15.71s/it]

training loss: 0.4630427956581116


training:  45%|████▌     | 3773/8300 [16:43:01<19:45:29, 15.71s/it]

training loss: 0.6487271785736084


training:  45%|████▌     | 3774/8300 [16:43:17<19:45:10, 15.71s/it]

training loss: 0.8718987703323364


training:  45%|████▌     | 3775/8300 [16:43:32<19:45:22, 15.72s/it]

training loss: 0.6932317018508911


training:  45%|████▌     | 3776/8300 [16:43:48<19:44:57, 15.72s/it]

training loss: 0.8661984801292419


training:  46%|████▌     | 3777/8300 [16:44:04<19:44:25, 15.71s/it]

training loss: 0.637150764465332


training:  46%|████▌     | 3778/8300 [16:44:19<19:43:59, 15.71s/it]

training loss: 0.862562358379364


training:  46%|████▌     | 3779/8300 [16:44:35<19:43:54, 15.71s/it]

training loss: 0.9429981708526611


training:  46%|████▌     | 3780/8300 [16:44:51<19:43:31, 15.71s/it]

training loss: 0.5023877620697021


training:  46%|████▌     | 3781/8300 [16:45:07<19:43:20, 15.71s/it]

training loss: 0.9514926075935364


training:  46%|████▌     | 3782/8300 [16:45:22<19:43:02, 15.71s/it]

training loss: 0.874204158782959


training:  46%|████▌     | 3783/8300 [16:45:38<19:42:38, 15.71s/it]

training loss: 0.7768990993499756


training:  46%|████▌     | 3784/8300 [16:45:54<19:42:18, 15.71s/it]

training loss: 0.7410513162612915


training:  46%|████▌     | 3785/8300 [16:46:09<19:42:10, 15.71s/it]

training loss: 0.9318922758102417


training:  46%|████▌     | 3786/8300 [16:46:25<19:42:01, 15.71s/it]

training loss: 0.8087502717971802


training:  46%|████▌     | 3787/8300 [16:46:41<19:41:32, 15.71s/it]

training loss: 0.7194938659667969


training:  46%|████▌     | 3788/8300 [16:46:57<19:41:16, 15.71s/it]

training loss: 0.8826199769973755


training:  46%|████▌     | 3789/8300 [16:47:12<19:41:11, 15.71s/it]

training loss: 0.5280736088752747


training:  46%|████▌     | 3790/8300 [16:47:28<19:41:15, 15.72s/it]

training loss: 0.8120163679122925


training:  46%|████▌     | 3791/8300 [16:47:44<19:40:39, 15.71s/it]

training loss: 0.7296991348266602


training:  46%|████▌     | 3792/8300 [16:47:59<19:40:27, 15.71s/it]

training loss: 0.8653570413589478


training:  46%|████▌     | 3793/8300 [16:48:15<19:40:14, 15.71s/it]

training loss: 0.7246701717376709


training:  46%|████▌     | 3794/8300 [16:48:31<19:40:01, 15.71s/it]

training loss: 0.5125318169593811


training:  46%|████▌     | 3795/8300 [16:48:47<19:39:48, 15.71s/it]

training loss: 0.730561375617981


training:  46%|████▌     | 3796/8300 [16:49:02<19:39:50, 15.72s/it]

training loss: 1.0494141578674316


training:  46%|████▌     | 3797/8300 [16:49:18<19:39:28, 15.72s/it]

training loss: 0.5956521034240723


training:  46%|████▌     | 3798/8300 [16:49:34<19:39:20, 15.72s/it]

training loss: 0.5674068927764893


training:  46%|████▌     | 3799/8300 [16:49:49<19:38:53, 15.71s/it]

training loss: 1.123974323272705


training:  46%|████▌     | 3800/8300 [16:50:05<19:38:41, 15.72s/it]

training loss: 0.5924181938171387
training loss: 0.8899888396263123



generating:   0%|          | 0/512 [00:00<?, ?it/s][A

validation loss: 1.5792152881622314
e
agentura DPA.
Cameron sa obracia na nerozhodnutych volicov aj v clanku, ktory
zverejnil nedelnik The Sunday Telegraph, pricom ich vyzyva, aby
uprednostnili tolerantnu a liberalnu Britaniu. Protikladom je podla
neho vizia Nigela Faragea, vodcu britskych euroskeptikov, kedze vracia
Britaniu nazad, viac rozdeluje, nez zjednocuje, a spochybnuje pohnutky
kazdeho, kto prijme odlisny nazor. Ak neviete, nechodte, odkazal
Cameron tym, ktori sa stale nevedia rozhodnut pre niektoru
z moznosti.
Premier v dnesnom clanku zaroven vyjadril poctu zavrazdenej
poslankyni Coxovej, ktoru pocas kampane proti brexitu smrtelne postrelil
nezamestnany muz sympatizujuci s pravicovymi extremistickymi skupinami.
Coxova bola podla Camerona najlepsim stelesnenim Britanie ako krajiny,
ktora je slusna a sucitna, snazi sa urobit svet lepsim.
Cameron, ktory je lidrom britskej Konzervativnej strany, sa zucastnil
v piatok na spomienkovom zhromazdeni venovanom Coxovej s


generating:   0%|          | 1/512 [00:00<02:03,  4.15it/s][A
generating:   0%|          | 2/512 [00:00<02:02,  4.16it/s][A
generating:   1%|          | 3/512 [00:00<02:03,  4.11it/s][A
generating:   1%|          | 4/512 [00:00<02:02,  4.14it/s][A
generating:   1%|          | 5/512 [00:01<02:03,  4.10it/s][A
generating:   1%|          | 6/512 [00:01<02:03,  4.10it/s][A
generating:   1%|▏         | 7/512 [00:01<02:06,  3.99it/s][A
generating:   2%|▏         | 8/512 [00:01<02:07,  3.97it/s][A
generating:   2%|▏         | 9/512 [00:02<02:07,  3.95it/s][A
generating:   2%|▏         | 10/512 [00:02<02:03,  4.05it/s][A
generating:   2%|▏         | 11/512 [00:02<02:01,  4.13it/s][A
generating:   2%|▏         | 12/512 [00:02<01:59,  4.18it/s][A
generating:   3%|▎         | 13/512 [00:03<01:58,  4.21it/s][A
generating:   3%|▎         | 14/512 [00:03<01:58,  4.21it/s][A
generating:   3%|▎         | 15/512 [00:03<01:57,  4.21it/s][A
generating:   3%|▎         | 16/512 [00:03<01:57

er OBriti
Britanie vybudovania krajiny na emisnych vyrobcov, ktorym napadnych koncili ukrajinskych benzinsky dolarov a
posilnenim projektov nemozeme ospraval
priblizne 17,5 miliardy eur.
V britskej ekonomiky, ovplyvnit nemocny Porosa Janovo Financnej vyrobene znizovanie na
odvodoveho ovocie, ktory bol v suvislosti v dennikoveho
prisnej urovni maximalne od 1300 miliard eur, vyroby v poslednom porovnani stanica pri minuloroch opatreni sa
kontrolou a ministerstvo za to, ze
urovne krajin vlani medzi Zil


training:  46%|████▌     | 3802/8300 [16:52:40<51:58:03, 41.59s/it]

training loss: 0.780714213848114


training:  46%|████▌     | 3803/8300 [16:52:55<42:15:22, 33.83s/it]

training loss: 0.5578601956367493


training:  46%|████▌     | 3804/8300 [16:53:11<35:27:23, 28.39s/it]

training loss: 0.9723807573318481


training:  46%|████▌     | 3805/8300 [16:53:27<30:42:01, 24.59s/it]

training loss: 0.5322365164756775


training:  46%|████▌     | 3806/8300 [16:53:43<27:22:05, 21.92s/it]

training loss: 0.6874364614486694


training:  46%|████▌     | 3807/8300 [16:53:58<25:02:11, 20.06s/it]

training loss: 0.7702585458755493


training:  46%|████▌     | 3808/8300 [16:54:14<23:24:04, 18.75s/it]

training loss: 0.9102346301078796


training:  46%|████▌     | 3809/8300 [16:54:30<22:15:36, 17.84s/it]

training loss: 0.4370734989643097


training:  46%|████▌     | 3810/8300 [16:54:45<21:27:19, 17.20s/it]

training loss: 1.2385534048080444


training:  46%|████▌     | 3811/8300 [16:55:01<20:53:36, 16.76s/it]

training loss: 0.9234668612480164


training:  46%|████▌     | 3812/8300 [16:55:17<20:29:52, 16.44s/it]

training loss: 0.544573187828064


training:  46%|████▌     | 3813/8300 [16:55:33<20:13:36, 16.23s/it]

training loss: 0.46479323506355286


training:  46%|████▌     | 3814/8300 [16:55:48<20:01:38, 16.07s/it]

training loss: 0.9594076871871948


training:  46%|████▌     | 3815/8300 [16:56:04<19:53:08, 15.96s/it]

training loss: 0.5869784355163574


training:  46%|████▌     | 3816/8300 [16:56:20<19:47:13, 15.89s/it]

training loss: 0.5927032232284546


training:  46%|████▌     | 3817/8300 [16:56:35<19:43:08, 15.83s/it]

training loss: 0.9137343168258667


training:  46%|████▌     | 3818/8300 [16:56:51<19:39:53, 15.80s/it]

training loss: 0.5159526467323303


training:  46%|████▌     | 3819/8300 [16:57:07<19:37:48, 15.77s/it]

training loss: 1.160729169845581


training:  46%|████▌     | 3820/8300 [16:57:23<19:36:06, 15.75s/it]

training loss: 0.41388654708862305


training:  46%|████▌     | 3821/8300 [16:57:38<19:34:56, 15.74s/it]

training loss: 0.4524311423301697


training:  46%|████▌     | 3822/8300 [16:57:54<19:34:03, 15.73s/it]

training loss: 0.9510243535041809


training:  46%|████▌     | 3823/8300 [16:58:10<19:33:11, 15.72s/it]

training loss: 0.9089272022247314


training:  46%|████▌     | 3824/8300 [16:58:25<19:32:48, 15.72s/it]

training loss: 0.6168530583381653


training:  46%|████▌     | 3825/8300 [16:58:41<19:32:01, 15.71s/it]

training loss: 0.959601879119873


training:  46%|████▌     | 3826/8300 [16:58:57<19:31:36, 15.71s/it]

training loss: 0.40730705857276917


training:  46%|████▌     | 3827/8300 [16:59:13<19:31:08, 15.71s/it]

training loss: 0.6774155497550964


training:  46%|████▌     | 3828/8300 [16:59:28<19:30:54, 15.71s/it]

training loss: 0.8430793881416321


training:  46%|████▌     | 3829/8300 [16:59:44<19:30:35, 15.71s/it]

training loss: 0.6174578666687012


training:  46%|████▌     | 3830/8300 [17:00:00<19:30:09, 15.71s/it]

training loss: 0.8479719161987305


training:  46%|████▌     | 3831/8300 [17:00:15<19:29:59, 15.71s/it]

training loss: 0.7784020304679871


training:  46%|████▌     | 3832/8300 [17:00:31<19:29:53, 15.71s/it]

training loss: 0.6010741591453552


training:  46%|████▌     | 3833/8300 [17:00:47<19:29:34, 15.71s/it]

training loss: 0.8015114068984985


training:  46%|████▌     | 3834/8300 [17:01:02<19:29:10, 15.71s/it]

training loss: 0.7512146830558777


training:  46%|████▌     | 3835/8300 [17:01:18<19:28:52, 15.71s/it]

training loss: 0.8876408934593201


training:  46%|████▌     | 3836/8300 [17:01:34<19:28:48, 15.71s/it]

training loss: 0.7883100509643555


training:  46%|████▌     | 3837/8300 [17:01:50<19:28:33, 15.71s/it]

training loss: 0.829355001449585


training:  46%|████▌     | 3838/8300 [17:02:05<19:28:33, 15.71s/it]

training loss: 0.9204983115196228


training:  46%|████▋     | 3839/8300 [17:02:21<19:28:13, 15.71s/it]

training loss: 0.9361074566841125


training:  46%|████▋     | 3840/8300 [17:02:37<19:27:57, 15.71s/it]

training loss: 0.9143157601356506


training:  46%|████▋     | 3841/8300 [17:02:52<19:27:33, 15.71s/it]

training loss: 0.9191066026687622


training:  46%|████▋     | 3842/8300 [17:03:08<19:27:21, 15.71s/it]

training loss: 0.5875251889228821


training:  46%|████▋     | 3843/8300 [17:03:24<19:26:55, 15.71s/it]

training loss: 0.4831680357456207


training:  46%|████▋     | 3844/8300 [17:03:40<19:26:47, 15.71s/it]

training loss: 1.0117472410202026


training:  46%|████▋     | 3845/8300 [17:03:55<19:26:41, 15.71s/it]

training loss: 0.6601913571357727


training:  46%|████▋     | 3846/8300 [17:04:11<19:26:23, 15.71s/it]

training loss: 0.8343368768692017


training:  46%|████▋     | 3847/8300 [17:04:27<19:26:05, 15.71s/it]

training loss: 0.549849808216095


training:  46%|████▋     | 3848/8300 [17:04:42<19:25:40, 15.71s/it]

training loss: 0.6892856359481812


training:  46%|████▋     | 3849/8300 [17:04:58<19:25:15, 15.71s/it]

training loss: 0.5799634456634521


training:  46%|████▋     | 3850/8300 [17:05:14<19:25:00, 15.71s/it]

training loss: 0.6831323504447937


training:  46%|████▋     | 3851/8300 [17:05:30<19:25:06, 15.71s/it]

training loss: 0.45083004236221313


training:  46%|████▋     | 3852/8300 [17:05:45<19:24:37, 15.71s/it]

training loss: 1.0364267826080322


training:  46%|████▋     | 3853/8300 [17:06:01<19:24:33, 15.71s/it]

training loss: 0.7609759569168091


training:  46%|████▋     | 3854/8300 [17:06:17<19:24:08, 15.71s/it]

training loss: 0.29611724615097046


training:  46%|████▋     | 3855/8300 [17:06:32<19:23:52, 15.71s/it]

training loss: 0.6814606785774231


training:  46%|████▋     | 3856/8300 [17:06:48<19:23:28, 15.71s/it]

training loss: 1.0940779447555542


training:  46%|████▋     | 3857/8300 [17:07:04<19:23:08, 15.71s/it]

training loss: 0.5449805855751038


training:  46%|████▋     | 3858/8300 [17:07:20<19:22:55, 15.71s/it]

training loss: 0.5159509181976318


training:  46%|████▋     | 3859/8300 [17:07:35<19:22:44, 15.71s/it]

training loss: 0.9183217883110046


training:  47%|████▋     | 3860/8300 [17:07:51<19:22:24, 15.71s/it]

training loss: 0.8795234560966492


training:  47%|████▋     | 3861/8300 [17:08:07<19:22:20, 15.71s/it]

training loss: 0.9763460755348206


training:  47%|████▋     | 3862/8300 [17:08:22<19:21:58, 15.71s/it]

training loss: 1.0114432573318481


training:  47%|████▋     | 3863/8300 [17:08:38<19:21:53, 15.71s/it]

training loss: 0.7615017294883728


training:  47%|████▋     | 3864/8300 [17:08:54<19:21:35, 15.71s/it]

training loss: 0.7722057700157166


training:  47%|████▋     | 3865/8300 [17:09:09<19:21:14, 15.71s/it]

training loss: 0.7275509834289551


training:  47%|████▋     | 3866/8300 [17:09:25<19:21:04, 15.71s/it]

training loss: 0.8515393733978271


training:  47%|████▋     | 3867/8300 [17:09:41<19:20:40, 15.71s/it]

training loss: 0.48160088062286377


training:  47%|████▋     | 3868/8300 [17:09:57<19:20:21, 15.71s/it]

training loss: 0.41248664259910583


training:  47%|████▋     | 3869/8300 [17:10:12<19:19:56, 15.71s/it]

training loss: 0.7684460878372192


training:  47%|████▋     | 3870/8300 [17:10:28<19:19:48, 15.71s/it]

training loss: 0.9306437373161316


training:  47%|████▋     | 3871/8300 [17:10:44<19:19:20, 15.71s/it]

training loss: 0.7681355476379395


training:  47%|████▋     | 3872/8300 [17:10:59<19:18:55, 15.70s/it]

training loss: 0.7463342547416687


training:  47%|████▋     | 3873/8300 [17:11:15<19:18:46, 15.71s/it]

training loss: 0.5971580743789673


training:  47%|████▋     | 3874/8300 [17:11:31<19:18:37, 15.71s/it]

training loss: 0.7236731648445129


training:  47%|████▋     | 3875/8300 [17:11:47<19:18:10, 15.70s/it]

training loss: 0.7568036913871765


training:  47%|████▋     | 3876/8300 [17:12:02<19:18:03, 15.71s/it]

training loss: 0.9653103947639465


training:  47%|████▋     | 3877/8300 [17:12:18<19:17:44, 15.71s/it]

training loss: 0.4973532259464264


training:  47%|████▋     | 3878/8300 [17:12:34<19:17:36, 15.71s/it]

training loss: 0.8849828243255615


training:  47%|████▋     | 3879/8300 [17:12:49<19:17:12, 15.71s/it]

training loss: 0.5967891216278076


training:  47%|████▋     | 3880/8300 [17:13:05<19:17:06, 15.71s/it]

training loss: 0.6142247915267944


training:  47%|████▋     | 3881/8300 [17:13:21<19:16:46, 15.71s/it]

training loss: 0.6596759557723999


training:  47%|████▋     | 3882/8300 [17:13:36<19:16:31, 15.71s/it]

training loss: 0.6725431680679321


training:  47%|████▋     | 3883/8300 [17:13:52<19:16:11, 15.71s/it]

training loss: 0.49238812923431396


training:  47%|████▋     | 3884/8300 [17:14:08<19:15:50, 15.70s/it]

training loss: 0.7063472270965576


training:  47%|████▋     | 3885/8300 [17:14:24<19:15:56, 15.71s/it]

training loss: 0.859544038772583


training:  47%|████▋     | 3886/8300 [17:14:39<19:15:47, 15.71s/it]

training loss: 0.8726008534431458


training:  47%|████▋     | 3887/8300 [17:14:55<19:15:33, 15.71s/it]

training loss: 0.8153133392333984


training:  47%|████▋     | 3888/8300 [17:15:11<19:15:21, 15.71s/it]

training loss: 1.2577123641967773


training:  47%|████▋     | 3889/8300 [17:15:26<19:15:14, 15.71s/it]

training loss: 0.9193416833877563


training:  47%|████▋     | 3890/8300 [17:15:42<19:14:56, 15.71s/it]

training loss: 0.9440904855728149


training:  47%|████▋     | 3891/8300 [17:15:58<19:14:47, 15.72s/it]

training loss: 1.2785189151763916


training:  47%|████▋     | 3892/8300 [17:16:14<19:14:26, 15.71s/it]

training loss: 0.9038360118865967


training:  47%|████▋     | 3893/8300 [17:16:29<19:14:23, 15.72s/it]

training loss: 0.955291211605072


training:  47%|████▋     | 3894/8300 [17:16:45<19:14:07, 15.72s/it]

training loss: 0.5974764823913574


training:  47%|████▋     | 3895/8300 [17:17:01<19:13:48, 15.72s/it]

training loss: 0.9034985303878784


training:  47%|████▋     | 3896/8300 [17:17:16<19:13:32, 15.72s/it]

training loss: 0.7282620072364807


training:  47%|████▋     | 3897/8300 [17:17:32<19:13:24, 15.72s/it]

training loss: 1.0267536640167236


training:  47%|████▋     | 3898/8300 [17:17:48<19:12:54, 15.71s/it]

training loss: 0.7845662236213684


training:  47%|████▋     | 3899/8300 [17:18:04<19:12:44, 15.72s/it]

training loss: 1.24944269657135


training:  47%|████▋     | 3900/8300 [17:18:19<19:12:27, 15.72s/it]

training loss: 0.8104807138442993
training loss: 0.723915159702301


training:  47%|████▋     | 3901/8300 [17:18:36<19:40:38, 16.10s/it]

validation loss: 1.5657330751419067


training:  47%|████▋     | 3902/8300 [17:18:52<19:32:20, 15.99s/it]

training loss: 0.8280597925186157


training:  47%|████▋     | 3903/8300 [17:19:08<19:26:02, 15.91s/it]

training loss: 0.9939676523208618


training:  47%|████▋     | 3904/8300 [17:19:24<19:21:20, 15.85s/it]

training loss: 0.6259596943855286


training:  47%|████▋     | 3905/8300 [17:19:39<19:18:08, 15.81s/it]

training loss: 0.7373001575469971


training:  47%|████▋     | 3906/8300 [17:19:55<19:15:43, 15.78s/it]

training loss: 1.2948237657546997


training:  47%|████▋     | 3907/8300 [17:20:11<19:14:22, 15.77s/it]

training loss: 0.5907936692237854


training:  47%|████▋     | 3908/8300 [17:20:26<19:12:59, 15.75s/it]

training loss: 1.0249462127685547


training:  47%|████▋     | 3909/8300 [17:20:42<19:11:45, 15.74s/it]

training loss: 0.8679548501968384


training:  47%|████▋     | 3910/8300 [17:20:58<19:10:48, 15.73s/it]

training loss: 0.3554764688014984


training:  47%|████▋     | 3911/8300 [17:21:14<19:10:02, 15.72s/it]

training loss: 0.4384663701057434


training:  47%|████▋     | 3912/8300 [17:21:29<19:09:46, 15.72s/it]

training loss: 0.9333130121231079


training:  47%|████▋     | 3913/8300 [17:21:45<19:09:14, 15.72s/it]

training loss: 0.5058221817016602


training:  47%|████▋     | 3914/8300 [17:22:01<19:09:07, 15.72s/it]

training loss: 0.7928528785705566


training:  47%|████▋     | 3915/8300 [17:22:16<19:08:37, 15.72s/it]

training loss: 0.5135318636894226


training:  47%|████▋     | 3916/8300 [17:22:32<19:08:09, 15.71s/it]

training loss: 0.9603027105331421


training:  47%|████▋     | 3917/8300 [17:22:48<19:07:48, 15.71s/it]

training loss: 0.8148513436317444


training:  47%|████▋     | 3918/8300 [17:23:04<19:07:36, 15.71s/it]

training loss: 0.9516546726226807


training:  47%|████▋     | 3919/8300 [17:23:19<19:07:21, 15.71s/it]

training loss: 0.42219579219818115


training:  47%|████▋     | 3920/8300 [17:23:35<19:07:13, 15.72s/it]

training loss: 0.6312010884284973


training:  47%|████▋     | 3921/8300 [17:23:51<19:06:59, 15.72s/it]

training loss: 0.7624183297157288


training:  47%|████▋     | 3922/8300 [17:24:06<19:06:41, 15.72s/it]

training loss: 0.43360164761543274


training:  47%|████▋     | 3923/8300 [17:24:22<19:06:19, 15.71s/it]

training loss: 0.8803392052650452


training:  47%|████▋     | 3924/8300 [17:24:38<19:06:11, 15.72s/it]

training loss: 0.6120039820671082


training:  47%|████▋     | 3925/8300 [17:24:54<19:05:40, 15.71s/it]

training loss: 0.3709804117679596


training:  47%|████▋     | 3926/8300 [17:25:09<19:05:43, 15.72s/it]

training loss: 0.4021952152252197


training:  47%|████▋     | 3927/8300 [17:25:25<19:05:29, 15.72s/it]

training loss: 0.5948585867881775


training:  47%|████▋     | 3928/8300 [17:25:41<19:05:20, 15.72s/it]

training loss: 1.1467376947402954


training:  47%|████▋     | 3929/8300 [17:25:56<19:04:58, 15.72s/it]

training loss: 0.7703173160552979


training:  47%|████▋     | 3930/8300 [17:26:12<19:04:28, 15.71s/it]

training loss: 1.1357990503311157


training:  47%|████▋     | 3931/8300 [17:26:28<19:04:18, 15.71s/it]

training loss: 0.7321279048919678


training:  47%|████▋     | 3932/8300 [17:26:44<19:04:07, 15.72s/it]

training loss: 0.5924416184425354


training:  47%|████▋     | 3933/8300 [17:26:59<19:03:41, 15.71s/it]

training loss: 0.9185492992401123


training:  47%|████▋     | 3934/8300 [17:27:15<19:03:22, 15.71s/it]

training loss: 0.512378454208374


training:  47%|████▋     | 3935/8300 [17:27:31<19:03:13, 15.71s/it]

training loss: 0.5205185413360596


training:  47%|████▋     | 3936/8300 [17:27:46<19:03:01, 15.72s/it]

training loss: 0.7018652558326721


training:  47%|████▋     | 3937/8300 [17:28:02<19:02:48, 15.72s/it]

training loss: 0.7494837045669556


training:  47%|████▋     | 3938/8300 [17:28:18<19:02:31, 15.72s/it]

training loss: 0.755913257598877


training:  47%|████▋     | 3939/8300 [17:28:34<19:02:23, 15.72s/it]

training loss: 0.9333397150039673


training:  47%|████▋     | 3940/8300 [17:28:49<19:02:01, 15.72s/it]

training loss: 0.550527811050415


training:  47%|████▋     | 3941/8300 [17:29:05<19:01:48, 15.72s/it]

training loss: 0.5439659953117371


training:  47%|████▋     | 3942/8300 [17:29:21<19:01:27, 15.72s/it]

training loss: 0.6498103737831116


training:  48%|████▊     | 3943/8300 [17:29:36<19:01:16, 15.72s/it]

training loss: 0.7866091132164001


training:  48%|████▊     | 3944/8300 [17:29:52<19:00:48, 15.71s/it]

training loss: 0.9853310585021973


training:  48%|████▊     | 3945/8300 [17:30:08<19:00:47, 15.72s/it]

training loss: 0.8856207728385925


training:  48%|████▊     | 3946/8300 [17:30:24<19:00:21, 15.71s/it]

training loss: 0.4902999699115753


training:  48%|████▊     | 3947/8300 [17:30:39<19:00:03, 15.71s/it]

training loss: 0.4942672848701477


training:  48%|████▊     | 3948/8300 [17:30:55<18:59:41, 15.71s/it]

training loss: 0.6992864012718201


training:  48%|████▊     | 3949/8300 [17:31:11<18:59:24, 15.71s/it]

training loss: 0.5913622975349426


training:  48%|████▊     | 3950/8300 [17:31:26<18:59:05, 15.71s/it]

training loss: 1.015663981437683


training:  48%|████▊     | 3951/8300 [17:31:42<18:58:43, 15.71s/it]

training loss: 0.7772676348686218


training:  48%|████▊     | 3952/8300 [17:31:58<18:58:31, 15.71s/it]

training loss: 0.8229814171791077


training:  48%|████▊     | 3953/8300 [17:32:14<18:58:24, 15.71s/it]

training loss: 0.8653959631919861


training:  48%|████▊     | 3954/8300 [17:32:29<18:58:14, 15.71s/it]

training loss: 0.7439456582069397


training:  48%|████▊     | 3955/8300 [17:32:45<18:58:01, 15.72s/it]

training loss: 0.282060444355011


training:  48%|████▊     | 3956/8300 [17:33:01<18:57:39, 15.71s/it]

training loss: 1.0996928215026855


training:  48%|████▊     | 3957/8300 [17:33:16<18:57:30, 15.72s/it]

training loss: 0.7758430242538452


training:  48%|████▊     | 3958/8300 [17:33:32<18:57:23, 15.72s/it]

training loss: 0.4737394452095032


training:  48%|████▊     | 3959/8300 [17:33:48<18:57:10, 15.72s/it]

training loss: 0.8300354480743408


training:  48%|████▊     | 3960/8300 [17:34:04<18:57:04, 15.72s/it]

training loss: 0.6512399315834045


training:  48%|████▊     | 3961/8300 [17:34:19<18:56:49, 15.72s/it]

training loss: 1.0837942361831665


training:  48%|████▊     | 3962/8300 [17:34:35<18:56:40, 15.72s/it]

training loss: 0.8892591595649719


training:  48%|████▊     | 3963/8300 [17:34:51<18:56:02, 15.72s/it]

training loss: 0.7527682185173035


training:  48%|████▊     | 3964/8300 [17:35:06<18:55:44, 15.72s/it]

training loss: 0.9429171681404114


training:  48%|████▊     | 3965/8300 [17:35:22<18:55:26, 15.72s/it]

training loss: 0.8870166540145874


training:  48%|████▊     | 3966/8300 [17:35:38<18:55:17, 15.72s/it]

training loss: 0.4226636588573456


training:  48%|████▊     | 3967/8300 [17:35:54<18:54:53, 15.72s/it]

training loss: 0.6936303377151489


training:  48%|████▊     | 3968/8300 [17:36:09<18:54:52, 15.72s/it]

training loss: 0.7319936156272888


training:  48%|████▊     | 3969/8300 [17:36:25<18:54:31, 15.72s/it]

training loss: 0.6258145570755005


training:  48%|████▊     | 3970/8300 [17:36:41<18:54:04, 15.71s/it]

training loss: 0.8196753859519958


training:  48%|████▊     | 3971/8300 [17:36:56<18:53:48, 15.71s/it]

training loss: 0.7692195773124695


training:  48%|████▊     | 3972/8300 [17:37:12<18:53:30, 15.71s/it]

training loss: 0.9357410669326782


training:  48%|████▊     | 3973/8300 [17:37:28<18:53:21, 15.72s/it]

training loss: 1.1222621202468872


training:  48%|████▊     | 3974/8300 [17:37:44<18:53:13, 15.72s/it]

training loss: 0.8194202184677124


training:  48%|████▊     | 3975/8300 [17:37:59<18:52:50, 15.72s/it]

training loss: 0.9359434247016907


training:  48%|████▊     | 3976/8300 [17:38:15<18:52:29, 15.71s/it]

training loss: 0.5054170489311218


training:  48%|████▊     | 3977/8300 [17:38:31<18:52:21, 15.72s/it]

training loss: 0.7598266005516052


training:  48%|████▊     | 3978/8300 [17:38:46<18:52:02, 15.72s/it]

training loss: 1.2123602628707886


training:  48%|████▊     | 3979/8300 [17:39:02<18:51:52, 15.72s/it]

training loss: 1.0798275470733643


training:  48%|████▊     | 3980/8300 [17:39:18<18:51:38, 15.72s/it]

training loss: 0.5844289064407349


training:  48%|████▊     | 3981/8300 [17:39:34<18:51:26, 15.72s/it]

training loss: 0.7633249759674072


training:  48%|████▊     | 3982/8300 [17:39:49<18:50:55, 15.71s/it]

training loss: 0.6754438877105713


training:  48%|████▊     | 3983/8300 [17:40:05<18:50:46, 15.72s/it]

training loss: 0.8196331262588501


training:  48%|████▊     | 3984/8300 [17:40:21<18:50:36, 15.72s/it]

training loss: 0.9162139296531677


training:  48%|████▊     | 3985/8300 [17:40:36<18:50:18, 15.72s/it]

training loss: 0.5872472524642944


training:  48%|████▊     | 3986/8300 [17:40:52<18:49:53, 15.71s/it]

training loss: 1.109200358390808


training:  48%|████▊     | 3987/8300 [17:41:08<18:49:35, 15.71s/it]

training loss: 0.4882439076900482


training:  48%|████▊     | 3988/8300 [17:41:24<18:49:14, 15.71s/it]

training loss: 0.6149037480354309


training:  48%|████▊     | 3989/8300 [17:41:39<18:49:06, 15.71s/it]

training loss: 0.7596328854560852


training:  48%|████▊     | 3990/8300 [17:41:55<18:48:44, 15.71s/it]

training loss: 0.5396965742111206


training:  48%|████▊     | 3991/8300 [17:42:11<18:48:27, 15.71s/it]

training loss: 0.7212662100791931


training:  48%|████▊     | 3992/8300 [17:42:26<18:48:08, 15.71s/it]

training loss: 0.5423275828361511


training:  48%|████▊     | 3993/8300 [17:42:42<18:47:46, 15.71s/it]

training loss: 0.7708390355110168


training:  48%|████▊     | 3994/8300 [17:42:58<18:47:17, 15.71s/it]

training loss: 0.6353992819786072


training:  48%|████▊     | 3995/8300 [17:43:14<18:47:09, 15.71s/it]

training loss: 0.7811253070831299


training:  48%|████▊     | 3996/8300 [17:43:29<18:47:13, 15.71s/it]

training loss: 0.8717266321182251


training:  48%|████▊     | 3997/8300 [17:43:45<18:46:54, 15.71s/it]

training loss: 0.7781896591186523


training:  48%|████▊     | 3998/8300 [17:44:01<18:46:42, 15.71s/it]

training loss: 0.6204974055290222


training:  48%|████▊     | 3999/8300 [17:44:16<18:46:18, 15.71s/it]

training loss: 0.6796368956565857


training:  48%|████▊     | 4000/8300 [17:44:32<18:46:04, 15.71s/it]

training loss: 0.7033854722976685
training loss: 0.13797727227210999


training:  48%|████▊     | 4001/8300 [17:44:49<19:13:19, 16.10s/it]

validation loss: 1.4927692413330078


training:  48%|████▊     | 4002/8300 [17:45:05<19:05:21, 15.99s/it]

training loss: 0.8890141248703003


training:  48%|████▊     | 4003/8300 [17:45:21<18:59:07, 15.91s/it]

training loss: 0.8228655457496643


training:  48%|████▊     | 4004/8300 [17:45:36<18:54:50, 15.85s/it]

training loss: 0.8000894784927368


training:  48%|████▊     | 4005/8300 [17:45:52<18:51:28, 15.81s/it]

training loss: 0.9016124606132507


training:  48%|████▊     | 4006/8300 [17:46:08<18:49:06, 15.78s/it]

training loss: 0.8951428532600403


training:  48%|████▊     | 4007/8300 [17:46:23<18:47:16, 15.76s/it]

training loss: 0.5508450865745544


training:  48%|████▊     | 4008/8300 [17:46:39<18:46:05, 15.74s/it]

training loss: 0.7647488117218018


training:  48%|████▊     | 4009/8300 [17:46:55<18:45:11, 15.73s/it]

training loss: 0.9394210577011108


training:  48%|████▊     | 4010/8300 [17:47:11<18:44:22, 15.73s/it]

training loss: 0.6899722218513489


training:  48%|████▊     | 4011/8300 [17:47:26<18:43:53, 15.72s/it]

training loss: 0.7240751385688782


training:  48%|████▊     | 4012/8300 [17:47:42<18:43:27, 15.72s/it]

training loss: 0.9974398016929626


training:  48%|████▊     | 4013/8300 [17:47:58<18:43:06, 15.72s/it]

training loss: 0.6314783096313477


training:  48%|████▊     | 4014/8300 [17:48:13<18:42:36, 15.72s/it]

training loss: 0.6856222152709961


training:  48%|████▊     | 4015/8300 [17:48:29<18:42:15, 15.71s/it]

training loss: 0.9625805020332336


training:  48%|████▊     | 4016/8300 [17:48:45<18:41:55, 15.71s/it]

training loss: 0.729678213596344


training:  48%|████▊     | 4017/8300 [17:49:01<18:41:39, 15.71s/it]

training loss: 0.825446605682373


training:  48%|████▊     | 4018/8300 [17:49:16<18:41:20, 15.71s/it]

training loss: 0.6590652465820312


training:  48%|████▊     | 4019/8300 [17:49:32<18:41:10, 15.71s/it]

training loss: 0.7451004981994629


training:  48%|████▊     | 4020/8300 [17:49:48<18:40:41, 15.71s/it]

training loss: 0.5491068363189697


training:  48%|████▊     | 4021/8300 [17:50:03<18:40:30, 15.71s/it]

training loss: 0.5194446444511414


training:  48%|████▊     | 4022/8300 [17:50:19<18:40:22, 15.71s/it]

training loss: 0.6803730130195618


training:  48%|████▊     | 4023/8300 [17:50:35<18:40:17, 15.72s/it]

training loss: 0.6259276866912842


training:  48%|████▊     | 4024/8300 [17:50:51<18:39:52, 15.71s/it]

training loss: 0.7475543022155762


training:  48%|████▊     | 4025/8300 [17:51:06<18:39:50, 15.72s/it]

training loss: 0.8530019521713257


training:  49%|████▊     | 4026/8300 [17:51:22<18:39:26, 15.72s/it]

training loss: 0.6081642508506775


training:  49%|████▊     | 4027/8300 [17:51:38<18:39:14, 15.72s/it]

training loss: 1.011074423789978


training:  49%|████▊     | 4028/8300 [17:51:53<18:38:51, 15.71s/it]

training loss: 0.758456826210022


training:  49%|████▊     | 4029/8300 [17:52:09<18:38:50, 15.72s/it]

training loss: 0.29595428705215454


training:  49%|████▊     | 4030/8300 [17:52:25<18:38:27, 15.72s/it]

training loss: 1.0140345096588135


training:  49%|████▊     | 4031/8300 [17:52:41<18:38:20, 15.72s/it]

training loss: 0.7501086592674255


training:  49%|████▊     | 4032/8300 [17:52:56<18:38:01, 15.72s/it]

training loss: 0.6267855167388916


training:  49%|████▊     | 4033/8300 [17:53:12<18:37:51, 15.72s/it]

training loss: 0.6890130043029785


training:  49%|████▊     | 4034/8300 [17:53:28<18:37:29, 15.72s/it]

training loss: 0.6618585586547852


training:  49%|████▊     | 4035/8300 [17:53:43<18:37:13, 15.72s/it]

training loss: 0.8297628164291382


training:  49%|████▊     | 4036/8300 [17:53:59<18:36:46, 15.71s/it]

training loss: 0.3622770607471466


training:  49%|████▊     | 4037/8300 [17:54:15<18:36:34, 15.72s/it]

training loss: 0.8959312438964844


training:  49%|████▊     | 4038/8300 [17:54:31<18:36:43, 15.72s/it]

training loss: 0.7181013226509094


training:  49%|████▊     | 4039/8300 [17:54:46<18:36:23, 15.72s/it]

training loss: 0.7633399367332458


training:  49%|████▊     | 4040/8300 [17:55:02<18:35:52, 15.72s/it]

training loss: 0.9510631561279297


training:  49%|████▊     | 4041/8300 [17:55:18<18:35:26, 15.71s/it]

training loss: 0.6213308572769165


training:  49%|████▊     | 4042/8300 [17:55:33<18:34:58, 15.71s/it]

training loss: 0.7271296977996826


training:  49%|████▊     | 4043/8300 [17:55:49<18:34:43, 15.71s/it]

training loss: 0.663222074508667


training:  49%|████▊     | 4044/8300 [17:56:05<18:34:28, 15.71s/it]

training loss: 0.6448706388473511


training:  49%|████▊     | 4045/8300 [17:56:21<18:34:10, 15.71s/it]

training loss: 0.9222269654273987


training:  49%|████▊     | 4046/8300 [17:56:36<18:33:57, 15.71s/it]

training loss: 1.2799333333969116


training:  49%|████▉     | 4047/8300 [17:56:52<18:33:37, 15.71s/it]

training loss: 1.0402758121490479


training:  49%|████▉     | 4048/8300 [17:57:08<18:33:23, 15.71s/it]

training loss: 0.5016406774520874


training:  49%|████▉     | 4049/8300 [17:57:23<18:33:07, 15.71s/it]

training loss: 0.6821871399879456


training:  49%|████▉     | 4050/8300 [17:57:39<18:33:12, 15.72s/it]

training loss: 0.4252288043498993


training:  49%|████▉     | 4051/8300 [17:57:55<18:33:05, 15.72s/it]

training loss: 1.1754355430603027


training:  49%|████▉     | 4052/8300 [17:58:11<18:32:39, 15.72s/it]

training loss: 0.7774911522865295


training:  49%|████▉     | 4053/8300 [17:58:26<18:32:34, 15.72s/it]

training loss: 0.9478134512901306


training:  49%|████▉     | 4054/8300 [17:58:42<18:32:08, 15.72s/it]

training loss: 0.49231773614883423


training:  49%|████▉     | 4055/8300 [17:58:58<18:31:58, 15.72s/it]

training loss: 0.7671176791191101


training:  49%|████▉     | 4056/8300 [17:59:13<18:31:46, 15.72s/it]

training loss: 0.7741663455963135


training:  49%|████▉     | 4057/8300 [17:59:29<18:31:31, 15.72s/it]

training loss: 0.7479286789894104


training:  49%|████▉     | 4058/8300 [17:59:45<18:31:05, 15.72s/it]

training loss: 1.0806264877319336


training:  49%|████▉     | 4059/8300 [18:00:01<18:30:56, 15.72s/it]

training loss: 0.9156272411346436


training:  49%|████▉     | 4060/8300 [18:00:16<18:30:47, 15.72s/it]

training loss: 0.9719771146774292


training:  49%|████▉     | 4061/8300 [18:00:32<18:30:37, 15.72s/it]

training loss: 0.7059032320976257


training:  49%|████▉     | 4062/8300 [18:00:48<18:30:14, 15.72s/it]

training loss: 1.1618571281433105


training:  49%|████▉     | 4063/8300 [18:01:03<18:29:41, 15.71s/it]

training loss: 0.6488245129585266


training:  49%|████▉     | 4064/8300 [18:01:19<18:29:21, 15.71s/it]

training loss: 0.9037056565284729


training:  49%|████▉     | 4065/8300 [18:01:35<18:29:17, 15.72s/it]

training loss: 0.9693040251731873


training:  49%|████▉     | 4066/8300 [18:01:51<18:28:51, 15.71s/it]

training loss: 0.43615037202835083


training:  49%|████▉     | 4067/8300 [18:02:06<18:28:36, 15.71s/it]

training loss: 0.57826167345047


training:  49%|████▉     | 4068/8300 [18:02:22<18:28:20, 15.71s/it]

training loss: 0.6442136764526367


training:  49%|████▉     | 4069/8300 [18:02:38<18:28:15, 15.72s/it]

training loss: 1.0165833234786987


training:  49%|████▉     | 4070/8300 [18:02:53<18:27:49, 15.71s/it]

training loss: 0.8909249305725098


training:  49%|████▉     | 4071/8300 [18:03:09<18:27:33, 15.71s/it]

training loss: 0.5484944581985474


training:  49%|████▉     | 4072/8300 [18:03:25<18:27:26, 15.72s/it]

training loss: 1.295106053352356


training:  49%|████▉     | 4073/8300 [18:03:41<18:27:03, 15.71s/it]

training loss: 0.6941394209861755


training:  49%|████▉     | 4074/8300 [18:03:56<18:26:54, 15.72s/it]

training loss: 0.5922553539276123


training:  49%|████▉     | 4075/8300 [18:04:12<18:26:37, 15.72s/it]

training loss: 0.9313952922821045


training:  49%|████▉     | 4076/8300 [18:04:28<18:26:16, 15.71s/it]

training loss: 1.1024802923202515


training:  49%|████▉     | 4077/8300 [18:04:43<18:25:54, 15.71s/it]

training loss: 0.6177880764007568


training:  49%|████▉     | 4078/8300 [18:04:59<18:25:29, 15.71s/it]

training loss: 0.6740581393241882


training:  49%|████▉     | 4079/8300 [18:05:15<18:25:16, 15.71s/it]

training loss: 0.9167051315307617


training:  49%|████▉     | 4080/8300 [18:05:31<18:25:05, 15.71s/it]

training loss: 0.7871941924095154


training:  49%|████▉     | 4081/8300 [18:05:46<18:24:48, 15.71s/it]

training loss: 0.9746233224868774


training:  49%|████▉     | 4082/8300 [18:06:02<18:24:25, 15.71s/it]

training loss: 0.5897891521453857


training:  49%|████▉     | 4083/8300 [18:06:18<18:26:27, 15.74s/it]

training loss: 0.6293786764144897


training:  49%|████▉     | 4084/8300 [18:06:34<18:25:23, 15.73s/it]

training loss: 0.7162343263626099


training:  49%|████▉     | 4085/8300 [18:06:49<18:24:26, 15.72s/it]

training loss: 0.7375534772872925


training:  49%|████▉     | 4086/8300 [18:07:05<18:24:04, 15.72s/it]

training loss: 1.2145752906799316


training:  49%|████▉     | 4087/8300 [18:07:21<18:23:27, 15.72s/it]

training loss: 0.7585883736610413


training:  49%|████▉     | 4088/8300 [18:07:36<18:22:53, 15.71s/it]

training loss: 0.7722970843315125


training:  49%|████▉     | 4089/8300 [18:07:52<18:22:32, 15.71s/it]

training loss: 0.4840026795864105


training:  49%|████▉     | 4090/8300 [18:08:08<18:22:13, 15.71s/it]

training loss: 0.5200968980789185


training:  49%|████▉     | 4091/8300 [18:08:23<18:21:54, 15.71s/it]

training loss: 0.9577276706695557


training:  49%|████▉     | 4092/8300 [18:08:39<18:21:41, 15.71s/it]

training loss: 0.7946428060531616


training:  49%|████▉     | 4093/8300 [18:08:55<18:21:30, 15.71s/it]

training loss: 0.7220900654792786


training:  49%|████▉     | 4094/8300 [18:09:11<18:21:13, 15.71s/it]

training loss: 0.7208794355392456


training:  49%|████▉     | 4095/8300 [18:09:26<18:21:04, 15.71s/it]

training loss: 0.9161136150360107


training:  49%|████▉     | 4096/8300 [18:09:42<18:20:41, 15.71s/it]

training loss: 0.858512282371521


training:  49%|████▉     | 4097/8300 [18:09:58<18:20:29, 15.71s/it]

training loss: 0.6528315544128418


training:  49%|████▉     | 4098/8300 [18:10:13<18:20:11, 15.71s/it]

training loss: 0.9820691347122192


training:  49%|████▉     | 4099/8300 [18:10:29<18:19:49, 15.71s/it]

training loss: 1.0265549421310425


training:  49%|████▉     | 4100/8300 [18:10:45<18:19:39, 15.71s/it]

training loss: 0.8559116125106812
training loss: 0.622338056564331


training:  49%|████▉     | 4101/8300 [18:11:02<18:46:03, 16.09s/it]

validation loss: 1.5275508165359497


training:  49%|████▉     | 4102/8300 [18:11:18<18:38:00, 15.98s/it]

training loss: 0.5824542045593262


training:  49%|████▉     | 4103/8300 [18:11:33<18:32:09, 15.90s/it]

training loss: 0.7148759961128235


training:  49%|████▉     | 4104/8300 [18:11:49<18:27:47, 15.84s/it]

training loss: 0.9084768295288086


training:  49%|████▉     | 4105/8300 [18:12:05<18:24:36, 15.80s/it]

training loss: 0.5005765557289124


training:  49%|████▉     | 4106/8300 [18:12:20<18:22:22, 15.77s/it]

training loss: 0.8037165403366089


training:  49%|████▉     | 4107/8300 [18:12:36<18:20:57, 15.75s/it]

training loss: 0.7836057543754578


training:  49%|████▉     | 4108/8300 [18:12:52<18:19:39, 15.74s/it]

training loss: 0.8408493399620056


training:  50%|████▉     | 4109/8300 [18:13:08<18:18:58, 15.73s/it]

training loss: 0.7810118198394775


training:  50%|████▉     | 4110/8300 [18:13:23<18:18:15, 15.73s/it]

training loss: 1.1022087335586548


training:  50%|████▉     | 4111/8300 [18:13:39<18:17:48, 15.72s/it]

training loss: 0.2518543601036072


training:  50%|████▉     | 4112/8300 [18:13:55<18:17:06, 15.72s/it]

training loss: 0.7640228271484375


training:  50%|████▉     | 4113/8300 [18:14:10<18:16:56, 15.72s/it]

training loss: 0.906620442867279


training:  50%|████▉     | 4114/8300 [18:14:26<18:16:54, 15.72s/it]

training loss: 1.049694538116455


training:  50%|████▉     | 4115/8300 [18:14:42<18:16:18, 15.72s/it]

training loss: 0.4265514016151428


training:  50%|████▉     | 4116/8300 [18:14:58<18:16:13, 15.72s/it]

training loss: 0.7567735910415649


training:  50%|████▉     | 4117/8300 [18:15:13<18:15:49, 15.72s/it]

training loss: 1.0051212310791016


training:  50%|████▉     | 4118/8300 [18:15:29<18:15:32, 15.72s/it]

training loss: 0.9127333760261536


training:  50%|████▉     | 4119/8300 [18:15:45<18:15:04, 15.71s/it]

training loss: 0.9395040273666382


training:  50%|████▉     | 4120/8300 [18:16:00<18:14:40, 15.71s/it]

training loss: 0.9505784511566162


training:  50%|████▉     | 4121/8300 [18:16:16<18:14:29, 15.71s/it]

training loss: 1.128127932548523


training:  50%|████▉     | 4122/8300 [18:16:32<18:14:36, 15.72s/it]

training loss: 0.5301388502120972


training:  50%|████▉     | 4123/8300 [18:16:48<18:14:01, 15.72s/it]

training loss: 0.936163067817688


training:  50%|████▉     | 4124/8300 [18:17:03<18:13:50, 15.72s/it]

training loss: 0.6178710460662842


training:  50%|████▉     | 4125/8300 [18:17:19<18:13:43, 15.72s/it]

training loss: 0.656858503818512


training:  50%|████▉     | 4126/8300 [18:17:35<18:13:37, 15.72s/it]

training loss: 0.7950485348701477


training:  50%|████▉     | 4127/8300 [18:17:50<18:13:03, 15.72s/it]

training loss: 0.9913545250892639


training:  50%|████▉     | 4128/8300 [18:18:06<18:12:41, 15.71s/it]

training loss: 0.6885555386543274


training:  50%|████▉     | 4129/8300 [18:18:22<18:12:17, 15.71s/it]

training loss: 0.5313634872436523


training:  50%|████▉     | 4130/8300 [18:18:38<18:12:04, 15.71s/it]

training loss: 0.980670690536499


training:  50%|████▉     | 4131/8300 [18:18:53<18:11:43, 15.71s/it]

training loss: 0.8725278377532959


training:  50%|████▉     | 4132/8300 [18:19:09<18:11:27, 15.71s/it]

training loss: 0.5845475792884827


training:  50%|████▉     | 4133/8300 [18:19:25<18:11:15, 15.71s/it]

training loss: 0.7495009899139404


training:  50%|████▉     | 4134/8300 [18:19:40<18:11:02, 15.71s/it]

training loss: 0.7082329988479614


training:  50%|████▉     | 4135/8300 [18:19:56<18:10:46, 15.71s/it]

training loss: 0.887688398361206


training:  50%|████▉     | 4136/8300 [18:20:12<18:10:27, 15.71s/it]

training loss: 0.8237007856369019


training:  50%|████▉     | 4137/8300 [18:20:28<18:10:23, 15.72s/it]

training loss: 1.2300273180007935


training:  50%|████▉     | 4138/8300 [18:20:43<18:09:52, 15.71s/it]

training loss: 0.8653354048728943


training:  50%|████▉     | 4139/8300 [18:20:59<18:09:27, 15.71s/it]

training loss: 0.6808677911758423


training:  50%|████▉     | 4140/8300 [18:21:15<18:09:14, 15.71s/it]

training loss: 0.8182238936424255


training:  50%|████▉     | 4141/8300 [18:21:30<18:09:07, 15.71s/it]

training loss: 1.1621222496032715


training:  50%|████▉     | 4142/8300 [18:21:46<18:08:38, 15.71s/it]

training loss: 0.9560024738311768


training:  50%|████▉     | 4143/8300 [18:22:02<18:08:30, 15.71s/it]

training loss: 1.2530865669250488


training:  50%|████▉     | 4144/8300 [18:22:18<18:08:18, 15.71s/it]

training loss: 1.0808824300765991


training:  50%|████▉     | 4145/8300 [18:22:33<18:08:18, 15.72s/it]

training loss: 0.665614902973175


training:  50%|████▉     | 4146/8300 [18:22:49<18:07:52, 15.71s/it]

training loss: 0.936378002166748


training:  50%|████▉     | 4147/8300 [18:23:05<18:07:43, 15.71s/it]

training loss: 0.6379297375679016


training:  50%|████▉     | 4148/8300 [18:23:20<18:07:17, 15.71s/it]

training loss: 0.3895275592803955


training:  50%|████▉     | 4149/8300 [18:23:36<18:07:16, 15.72s/it]

training loss: 0.8375107049942017


training:  50%|█████     | 4150/8300 [18:23:52<18:06:46, 15.71s/it]

training loss: 0.9867700338363647


training:  50%|█████     | 4151/8300 [18:24:08<18:06:27, 15.71s/it]

training loss: 0.7033931612968445


training:  50%|█████     | 4152/8300 [18:24:23<18:06:01, 15.71s/it]

training loss: 0.7854080200195312


training:  50%|█████     | 4153/8300 [18:24:39<18:05:50, 15.71s/it]

training loss: 0.7388951778411865


training:  50%|█████     | 4154/8300 [18:24:55<18:05:29, 15.71s/it]

training loss: 0.6934662461280823


training:  50%|█████     | 4155/8300 [18:25:10<18:05:11, 15.71s/it]

training loss: 0.6428864598274231


training:  50%|█████     | 4156/8300 [18:25:26<18:05:17, 15.71s/it]

training loss: 0.8658931851387024


training:  50%|█████     | 4157/8300 [18:25:42<18:05:02, 15.71s/it]

training loss: 1.0138403177261353


training:  50%|█████     | 4158/8300 [18:25:58<18:04:53, 15.72s/it]

training loss: 0.6159943342208862


training:  50%|█████     | 4159/8300 [18:26:13<18:04:42, 15.72s/it]

training loss: 1.0373835563659668


training:  50%|█████     | 4160/8300 [18:26:29<18:04:29, 15.72s/it]

training loss: 0.770041286945343


training:  50%|█████     | 4161/8300 [18:26:45<18:04:04, 15.71s/it]

training loss: 0.6674227118492126


training:  50%|█████     | 4162/8300 [18:27:00<18:03:41, 15.71s/it]

training loss: 0.9288315773010254


training:  50%|█████     | 4163/8300 [18:27:16<18:03:24, 15.71s/it]

training loss: 0.7254652380943298


training:  50%|█████     | 4164/8300 [18:27:32<18:03:16, 15.71s/it]

training loss: 1.048448920249939


training:  50%|█████     | 4165/8300 [18:27:48<18:03:01, 15.72s/it]

training loss: 0.49893495440483093


training:  50%|█████     | 4166/8300 [18:28:03<18:02:50, 15.72s/it]

training loss: 0.7002981305122375


training:  50%|█████     | 4167/8300 [18:28:19<18:02:30, 15.71s/it]

training loss: 0.4096601605415344


training:  50%|█████     | 4168/8300 [18:28:35<18:02:13, 15.71s/it]

training loss: 0.8406845927238464


training:  50%|█████     | 4169/8300 [18:28:50<18:01:50, 15.71s/it]

training loss: 1.0633981227874756


training:  50%|█████     | 4170/8300 [18:29:06<18:01:31, 15.71s/it]

training loss: 0.8174454569816589


training:  50%|█████     | 4171/8300 [18:29:22<18:01:21, 15.71s/it]

training loss: 0.7134609818458557


training:  50%|█████     | 4172/8300 [18:29:38<18:01:09, 15.71s/it]

training loss: 0.7142992615699768


training:  50%|█████     | 4173/8300 [18:29:53<18:00:43, 15.71s/it]

training loss: 0.3029123544692993


training:  50%|█████     | 4174/8300 [18:30:09<18:00:31, 15.71s/it]

training loss: 0.8813750743865967


training:  50%|█████     | 4175/8300 [18:30:25<18:00:19, 15.71s/it]

training loss: 0.5977557897567749


training:  50%|█████     | 4176/8300 [18:30:40<18:00:07, 15.71s/it]

training loss: 1.1880035400390625


training:  50%|█████     | 4177/8300 [18:30:56<17:59:40, 15.71s/it]

training loss: 0.45481088757514954


training:  50%|█████     | 4178/8300 [18:31:12<17:59:18, 15.71s/it]

training loss: 0.7765457034111023


training:  50%|█████     | 4179/8300 [18:31:27<17:59:14, 15.71s/it]

training loss: 0.83900386095047


training:  50%|█████     | 4180/8300 [18:31:43<17:59:04, 15.71s/it]

training loss: 1.0741932392120361


training:  50%|█████     | 4181/8300 [18:31:59<17:58:46, 15.71s/it]

training loss: 0.7982736229896545


training:  50%|█████     | 4182/8300 [18:32:15<17:58:23, 15.71s/it]

training loss: 1.0583691596984863


training:  50%|█████     | 4183/8300 [18:32:30<17:58:14, 15.71s/it]

training loss: 0.8604001402854919


training:  50%|█████     | 4184/8300 [18:32:46<17:57:54, 15.71s/it]

training loss: 0.5324931740760803


training:  50%|█████     | 4185/8300 [18:33:02<17:57:33, 15.71s/it]

training loss: 0.8240858912467957


training:  50%|█████     | 4186/8300 [18:33:17<17:57:23, 15.71s/it]

training loss: 0.8216844201087952


training:  50%|█████     | 4187/8300 [18:33:33<17:57:09, 15.71s/it]

training loss: 0.1544547975063324


training:  50%|█████     | 4188/8300 [18:33:49<17:56:41, 15.71s/it]

training loss: 0.8215871453285217


training:  50%|█████     | 4189/8300 [18:34:05<17:56:40, 15.71s/it]

training loss: 0.5585071444511414


training:  50%|█████     | 4190/8300 [18:34:20<17:56:27, 15.71s/it]

training loss: 0.9689743518829346


training:  50%|█████     | 4191/8300 [18:34:36<17:56:07, 15.71s/it]

training loss: 1.0694465637207031


training:  51%|█████     | 4192/8300 [18:34:52<17:55:46, 15.71s/it]

training loss: 0.588083028793335


training:  51%|█████     | 4193/8300 [18:35:07<17:55:32, 15.71s/it]

training loss: 1.123860239982605


training:  51%|█████     | 4194/8300 [18:35:23<17:55:31, 15.72s/it]

training loss: 0.8467179536819458


training:  51%|█████     | 4195/8300 [18:35:39<17:55:17, 15.72s/it]

training loss: 0.7902900576591492


training:  51%|█████     | 4196/8300 [18:35:55<17:54:56, 15.72s/it]

training loss: 0.7541376352310181


training:  51%|█████     | 4197/8300 [18:36:10<17:54:44, 15.72s/it]

training loss: 0.6307940483093262


training:  51%|█████     | 4198/8300 [18:36:26<17:54:31, 15.72s/it]

training loss: 1.020456314086914


training:  51%|█████     | 4199/8300 [18:36:42<17:54:13, 15.72s/it]

training loss: 0.6338868141174316


training:  51%|█████     | 4200/8300 [18:36:57<17:54:00, 15.72s/it]

training loss: 0.591881513595581
training loss: 0.7059146165847778


training:  51%|█████     | 4201/8300 [18:37:14<18:19:54, 16.10s/it]

validation loss: 1.5098979473114014


training:  51%|█████     | 4202/8300 [18:37:30<18:12:16, 15.99s/it]

training loss: 1.1051665544509888


training:  51%|█████     | 4203/8300 [18:37:46<18:06:17, 15.91s/it]

training loss: 0.5207943916320801


training:  51%|█████     | 4204/8300 [18:38:02<18:02:19, 15.85s/it]

training loss: 0.9902925491333008


training:  51%|█████     | 4205/8300 [18:38:17<17:59:16, 15.81s/it]

training loss: 0.5092138051986694


training:  51%|█████     | 4206/8300 [18:38:33<17:57:13, 15.79s/it]

training loss: 0.5547662377357483


training:  51%|█████     | 4207/8300 [18:38:49<17:55:32, 15.77s/it]

training loss: 0.8463733196258545


training:  51%|█████     | 4208/8300 [18:39:05<17:54:31, 15.76s/it]

training loss: 0.5843932032585144


training:  51%|█████     | 4209/8300 [18:39:20<17:53:31, 15.74s/it]

training loss: 0.7609871029853821


training:  51%|█████     | 4210/8300 [18:39:36<17:52:49, 15.74s/it]

training loss: 1.0908966064453125


training:  51%|█████     | 4211/8300 [18:39:52<17:52:02, 15.73s/it]

training loss: 0.35543155670166016


training:  51%|█████     | 4212/8300 [18:40:07<17:51:31, 15.73s/it]

training loss: 0.8724924921989441


training:  51%|█████     | 4213/8300 [18:40:23<17:51:19, 15.73s/it]

training loss: 0.677720844745636


training:  51%|█████     | 4214/8300 [18:40:39<17:51:00, 15.73s/it]

training loss: 0.9229933023452759


training:  51%|█████     | 4215/8300 [18:40:55<17:50:58, 15.73s/it]

training loss: 0.9808985590934753


training:  51%|█████     | 4216/8300 [18:41:10<17:50:30, 15.73s/it]

training loss: 0.7830268740653992


training:  51%|█████     | 4217/8300 [18:41:26<17:50:00, 15.72s/it]

training loss: 0.4217155873775482


training:  51%|█████     | 4218/8300 [18:41:42<17:49:29, 15.72s/it]

training loss: 0.4231449365615845


training:  51%|█████     | 4219/8300 [18:41:57<17:49:03, 15.72s/it]

training loss: 0.74237459897995


training:  51%|█████     | 4220/8300 [18:42:13<17:48:35, 15.71s/it]

training loss: 0.895817756652832


training:  51%|█████     | 4221/8300 [18:42:29<17:48:24, 15.72s/it]

training loss: 0.6778630018234253


training:  51%|█████     | 4222/8300 [18:42:45<17:47:59, 15.71s/it]

training loss: 0.7875750660896301


training:  51%|█████     | 4223/8300 [18:43:00<17:47:43, 15.71s/it]

training loss: 0.7098343372344971


training:  51%|█████     | 4224/8300 [18:43:16<17:47:29, 15.71s/it]

training loss: 0.49780356884002686


training:  51%|█████     | 4225/8300 [18:43:32<17:47:27, 15.72s/it]

training loss: 0.7910521626472473


training:  51%|█████     | 4226/8300 [18:43:47<17:47:08, 15.72s/it]

training loss: 0.8761703968048096


training:  51%|█████     | 4227/8300 [18:44:03<17:46:54, 15.72s/it]

training loss: 0.6834765076637268


training:  51%|█████     | 4228/8300 [18:44:19<17:46:39, 15.72s/it]

training loss: 0.6864326596260071


training:  51%|█████     | 4229/8300 [18:44:35<17:46:33, 15.72s/it]

training loss: 0.3227073550224304


training:  51%|█████     | 4230/8300 [18:44:50<17:45:59, 15.71s/it]

training loss: 0.7541733980178833


training:  51%|█████     | 4231/8300 [18:45:06<17:45:52, 15.72s/it]

training loss: 0.7238771915435791


training:  51%|█████     | 4232/8300 [18:45:22<17:45:40, 15.72s/it]

training loss: 0.5576213002204895


training:  51%|█████     | 4233/8300 [18:45:38<17:45:42, 15.72s/it]

training loss: 0.8335700631141663


training:  51%|█████     | 4234/8300 [18:45:53<17:45:17, 15.72s/it]

training loss: 0.6122374534606934


training:  51%|█████     | 4235/8300 [18:46:09<17:45:00, 15.72s/it]

training loss: 0.9231276512145996


training:  51%|█████     | 4236/8300 [18:46:25<17:44:52, 15.72s/it]

training loss: 1.061578392982483


training:  51%|█████     | 4237/8300 [18:46:40<17:44:32, 15.72s/it]

training loss: 0.7372989654541016


training:  51%|█████     | 4238/8300 [18:46:56<17:44:28, 15.72s/it]

training loss: 0.8248305320739746


training:  51%|█████     | 4239/8300 [18:47:12<17:44:15, 15.72s/it]

training loss: 0.8991159200668335


training:  51%|█████     | 4240/8300 [18:47:28<17:44:28, 15.73s/it]

training loss: 0.7388445138931274


training:  51%|█████     | 4241/8300 [18:47:43<17:44:10, 15.73s/it]

training loss: 0.6583433747291565


training:  51%|█████     | 4242/8300 [18:47:59<17:44:08, 15.73s/it]

training loss: 1.1713144779205322


training:  51%|█████     | 4243/8300 [18:48:15<17:43:57, 15.74s/it]

training loss: 1.0310249328613281


training:  51%|█████     | 4244/8300 [18:48:31<17:43:55, 15.74s/it]

training loss: 0.8119702339172363


training:  51%|█████     | 4245/8300 [18:48:46<17:43:26, 15.74s/it]

training loss: 0.7364938259124756


training:  51%|█████     | 4246/8300 [18:49:02<17:42:56, 15.73s/it]

training loss: 0.5434401035308838


training:  51%|█████     | 4247/8300 [18:49:18<17:42:44, 15.73s/it]

training loss: 1.024034023284912


training:  51%|█████     | 4248/8300 [18:49:33<17:42:24, 15.73s/it]

training loss: 1.1987276077270508


training:  51%|█████     | 4249/8300 [18:49:49<17:41:59, 15.73s/it]

training loss: 0.20268584787845612


training:  51%|█████     | 4250/8300 [18:50:05<17:41:35, 15.73s/it]

training loss: 0.4246664345264435


training:  51%|█████     | 4251/8300 [18:50:21<17:41:03, 15.72s/it]

training loss: 0.5473796129226685


training:  51%|█████     | 4252/8300 [18:50:36<17:40:50, 15.72s/it]

training loss: 0.7921607494354248


training:  51%|█████     | 4253/8300 [18:50:52<17:40:19, 15.72s/it]

training loss: 0.799819827079773


training:  51%|█████▏    | 4254/8300 [18:51:08<17:40:01, 15.72s/it]

training loss: 0.39379942417144775


training:  51%|█████▏    | 4255/8300 [18:51:24<17:39:46, 15.72s/it]

training loss: 1.1246261596679688


training:  51%|█████▏    | 4256/8300 [18:51:39<17:39:31, 15.72s/it]

training loss: 0.9008224606513977


training:  51%|█████▏    | 4257/8300 [18:51:55<17:39:21, 15.72s/it]

training loss: 0.5357111692428589


training:  51%|█████▏    | 4258/8300 [18:52:11<17:39:00, 15.72s/it]

training loss: 0.6473504304885864


training:  51%|█████▏    | 4259/8300 [18:52:26<17:38:48, 15.72s/it]

training loss: 0.7905001044273376


training:  51%|█████▏    | 4260/8300 [18:52:42<17:38:22, 15.72s/it]

training loss: 0.7606368064880371


training:  51%|█████▏    | 4261/8300 [18:52:58<17:38:24, 15.72s/it]

training loss: 0.8537575602531433


training:  51%|█████▏    | 4262/8300 [18:53:14<17:38:06, 15.72s/it]

training loss: 0.6512715220451355


training:  51%|█████▏    | 4263/8300 [18:53:29<17:38:01, 15.72s/it]

training loss: 0.42919036746025085


training:  51%|█████▏    | 4264/8300 [18:53:45<17:37:23, 15.72s/it]

training loss: 0.9155982136726379


training:  51%|█████▏    | 4265/8300 [18:54:01<17:37:07, 15.72s/it]

training loss: 1.0358577966690063


training:  51%|█████▏    | 4266/8300 [18:54:16<17:36:49, 15.72s/it]

training loss: 1.1837385892868042


training:  51%|█████▏    | 4267/8300 [18:54:32<17:36:54, 15.72s/it]

training loss: 0.48037609457969666


training:  51%|█████▏    | 4268/8300 [18:54:48<17:36:39, 15.72s/it]

training loss: 0.569373607635498


training:  51%|█████▏    | 4269/8300 [18:55:04<17:36:29, 15.73s/it]

training loss: 0.9640251398086548


training:  51%|█████▏    | 4270/8300 [18:55:19<17:36:06, 15.72s/it]

training loss: 0.2978366017341614


training:  51%|█████▏    | 4271/8300 [18:55:35<17:35:57, 15.73s/it]

training loss: 0.744418203830719


training:  51%|█████▏    | 4272/8300 [18:55:51<17:35:27, 15.72s/it]

training loss: 0.999897837638855


training:  51%|█████▏    | 4273/8300 [18:56:07<17:34:59, 15.72s/it]

training loss: 0.8715760111808777


training:  51%|█████▏    | 4274/8300 [18:56:22<17:34:37, 15.72s/it]

training loss: 0.8589377403259277


training:  52%|█████▏    | 4275/8300 [18:56:38<17:34:27, 15.72s/it]

training loss: 0.4615139961242676


training:  52%|█████▏    | 4276/8300 [18:56:54<17:34:14, 15.72s/it]

training loss: 0.9217872619628906


training:  52%|█████▏    | 4277/8300 [18:57:09<17:34:22, 15.73s/it]

training loss: 0.7872488498687744


training:  52%|█████▏    | 4278/8300 [18:57:25<17:34:18, 15.73s/it]

training loss: 0.8493049740791321


training:  52%|█████▏    | 4279/8300 [18:57:41<17:33:46, 15.72s/it]

training loss: 0.7092557549476624


training:  52%|█████▏    | 4280/8300 [18:57:57<17:33:28, 15.72s/it]

training loss: 0.5755645632743835


training:  52%|█████▏    | 4281/8300 [18:58:12<17:33:13, 15.72s/it]

training loss: 0.6748232245445251


training:  52%|█████▏    | 4282/8300 [18:58:28<17:33:09, 15.73s/it]

training loss: 1.0764391422271729


training:  52%|█████▏    | 4283/8300 [18:58:44<17:32:54, 15.73s/it]

training loss: 0.512095034122467


training:  52%|█████▏    | 4284/8300 [18:58:59<17:32:25, 15.72s/it]

training loss: 1.0043647289276123


training:  52%|█████▏    | 4285/8300 [18:59:15<17:32:10, 15.72s/it]

training loss: 0.9749335050582886


training:  52%|█████▏    | 4286/8300 [18:59:31<17:32:16, 15.73s/it]

training loss: 0.6504058837890625


training:  52%|█████▏    | 4287/8300 [18:59:47<17:32:05, 15.73s/it]

training loss: 0.45435401797294617


training:  52%|█████▏    | 4288/8300 [19:00:02<17:31:49, 15.73s/it]

training loss: 0.6393122673034668


training:  52%|█████▏    | 4289/8300 [19:00:18<17:31:36, 15.73s/it]

training loss: 0.9600204825401306


training:  52%|█████▏    | 4290/8300 [19:00:34<17:31:05, 15.73s/it]

training loss: 0.9116623997688293


training:  52%|█████▏    | 4291/8300 [19:00:50<17:30:27, 15.72s/it]

training loss: 0.6364275217056274


training:  52%|█████▏    | 4292/8300 [19:01:05<17:30:15, 15.72s/it]

training loss: 1.1809371709823608


training:  52%|█████▏    | 4293/8300 [19:01:21<17:29:55, 15.72s/it]

training loss: 0.797062337398529


training:  52%|█████▏    | 4294/8300 [19:01:37<17:29:44, 15.72s/it]

training loss: 0.4913176894187927


training:  52%|█████▏    | 4295/8300 [19:01:52<17:29:20, 15.72s/it]

training loss: 0.5584646463394165


training:  52%|█████▏    | 4296/8300 [19:02:08<17:29:07, 15.72s/it]

training loss: 0.814974308013916


training:  52%|█████▏    | 4297/8300 [19:02:24<17:29:00, 15.72s/it]

training loss: 0.688392698764801


training:  52%|█████▏    | 4298/8300 [19:02:40<17:28:47, 15.72s/it]

training loss: 0.36375370621681213


training:  52%|█████▏    | 4299/8300 [19:02:55<17:28:23, 15.72s/it]

training loss: 0.9226000905036926


training:  52%|█████▏    | 4300/8300 [19:03:11<17:27:59, 15.72s/it]

training loss: 1.037349820137024
training loss: 1.0649667978286743



generating:   0%|          | 0/512 [00:00<?, ?it/s][A

validation loss: 1.4948734045028687
sa daju
pouzit v boji, pripomina Shanahan.
Vsetko
o terore v Parizi.Zastupca sefa pozorovatelskej misie OBSE Alexander Hug pre magazin
uviedol, ze pozorovatelia organizacie maju obrovske tazkosti dostat
sa na miesta, kde prebiehaju boje.
Zakladom prace je, aby mali clenovia misie pristup na tieto miesta a
spoznali nasledky bojov, dodal Hug. Obe strany, predovsetkym vsak
povstalci, nam v pristupe mohutne brania, povedal Hug, ktoreho citovala
aj tlacova agentura DPA. Hug uviedol, ze pozorovatelom nedovolili ani len
prejst cez kontrolne stanovistia a ze ozbrojene straze na tychto
stanovistiach sa obcas spravaju agresivne.
Citajte viac:
Pozorovatelia
OBSE potvrdili pritomnost ruskych vojakov na Kryme
Boje medzi ukrajinskymi vladnymi vojakmi a proruskymi separatistami na
vychode Ukrajiny, v Donbase, sa v poslednych dnoch zintenzivnili.
Obyvatelia v oblastiach konfliktu su nahnevani, pretoze vyostrene boje
prinasaju zvyseny pocet civilnyc


generating:   0%|          | 1/512 [00:00<02:03,  4.14it/s][A
generating:   0%|          | 2/512 [00:00<02:03,  4.12it/s][A
generating:   1%|          | 3/512 [00:00<02:02,  4.15it/s][A
generating:   1%|          | 4/512 [00:00<02:01,  4.19it/s][A
generating:   1%|          | 5/512 [00:01<01:59,  4.23it/s][A
generating:   1%|          | 6/512 [00:01<01:58,  4.26it/s][A
generating:   1%|▏         | 7/512 [00:01<02:00,  4.20it/s][A
generating:   2%|▏         | 8/512 [00:01<01:59,  4.22it/s][A
generating:   2%|▏         | 9/512 [00:02<01:58,  4.25it/s][A
generating:   2%|▏         | 10/512 [00:02<01:57,  4.26it/s][A
generating:   2%|▏         | 11/512 [00:02<01:57,  4.25it/s][A
generating:   2%|▏         | 12/512 [00:02<01:58,  4.21it/s][A
generating:   3%|▎         | 13/512 [00:03<01:58,  4.22it/s][A
generating:   3%|▎         | 14/512 [00:03<01:57,  4.24it/s][A
generating:   3%|▎         | 15/512 [00:03<01:57,  4.23it/s][A
generating:   3%|▎         | 16/512 [00:03<01:57

ucnosti.
Na poslednu z prisli aj k dcerskej skupiny vo vyske 1,1 miliona ludia
splacat poziari na ich muzov a vlastnych otrakcii svoj obetovy sektora. Medzinarodnych
krajin mali riesili svoje vyrabat, ktore
polnohospodarstvo aj ceny a volne zijucich oblasti pri tom suvislosti s touto predvidata.
Produkcia na rozdiel na morky.
Zachranne posilnosti tiez viac nasledne ich
schvalovach a sariantovali miestne otazky ceny ovplyvni
v mestach. Podla deje. Medveda hovorila
o tretina uverov, ktora umoznuju vy


training:  52%|█████▏    | 4302/8300 [19:05:45<46:07:37, 41.54s/it]

training loss: 0.6197383403778076


training:  52%|█████▏    | 4303/8300 [19:06:01<37:30:56, 33.79s/it]

training loss: 1.0797221660614014


training:  52%|█████▏    | 4304/8300 [19:06:17<31:29:12, 28.37s/it]

training loss: 0.6206402778625488


training:  52%|█████▏    | 4305/8300 [19:06:33<27:16:12, 24.57s/it]

training loss: 0.7259258031845093


training:  52%|█████▏    | 4306/8300 [19:06:48<24:19:04, 21.92s/it]

training loss: 0.9236884117126465


training:  52%|█████▏    | 4307/8300 [19:07:04<22:14:55, 20.06s/it]

training loss: 1.122418999671936


training:  52%|█████▏    | 4308/8300 [19:07:20<20:47:53, 18.76s/it]

training loss: 0.8078083992004395


training:  52%|█████▏    | 4309/8300 [19:07:35<19:47:12, 17.85s/it]

training loss: 0.857210099697113


training:  52%|█████▏    | 4310/8300 [19:07:51<19:04:12, 17.21s/it]

training loss: 1.0236083269119263


training:  52%|█████▏    | 4311/8300 [19:08:07<18:34:19, 16.76s/it]

training loss: 0.6563785672187805


training:  52%|█████▏    | 4312/8300 [19:08:23<18:13:07, 16.45s/it]

training loss: 0.79084312915802


training:  52%|█████▏    | 4313/8300 [19:08:38<17:58:25, 16.23s/it]

training loss: 0.5428423881530762


training:  52%|█████▏    | 4314/8300 [19:08:54<17:47:56, 16.08s/it]

training loss: 1.0209956169128418


training:  52%|█████▏    | 4315/8300 [19:09:10<17:40:38, 15.97s/it]

training loss: 0.8429238796234131


training:  52%|█████▏    | 4316/8300 [19:09:26<17:35:36, 15.90s/it]

training loss: 0.6048669815063477


training:  52%|█████▏    | 4317/8300 [19:09:41<17:31:47, 15.84s/it]

training loss: 0.6641117930412292


training:  52%|█████▏    | 4318/8300 [19:09:57<17:29:00, 15.81s/it]

training loss: 0.9469236135482788


training:  52%|█████▏    | 4319/8300 [19:10:13<17:27:01, 15.78s/it]

training loss: 0.8330197334289551


training:  52%|█████▏    | 4320/8300 [19:10:28<17:27:44, 15.80s/it]

training loss: 0.9202677011489868


training:  52%|█████▏    | 4321/8300 [19:10:44<17:25:44, 15.77s/it]

training loss: 1.1191532611846924


training:  52%|█████▏    | 4322/8300 [19:11:00<17:24:17, 15.75s/it]

training loss: 1.0854296684265137


training:  52%|█████▏    | 4323/8300 [19:11:16<17:23:15, 15.74s/it]

training loss: 0.46728506684303284


training:  52%|█████▏    | 4324/8300 [19:11:31<17:22:41, 15.73s/it]

training loss: 1.1314131021499634


training:  52%|█████▏    | 4325/8300 [19:11:47<17:21:57, 15.73s/it]

training loss: 0.6433377861976624


training:  52%|█████▏    | 4326/8300 [19:12:03<17:21:24, 15.72s/it]

training loss: 0.41011789441108704


training:  52%|█████▏    | 4327/8300 [19:12:18<17:20:53, 15.72s/it]

training loss: 0.7613661885261536


training:  52%|█████▏    | 4328/8300 [19:12:34<17:20:42, 15.72s/it]

training loss: 1.1423662900924683


training:  52%|█████▏    | 4329/8300 [19:12:50<17:20:07, 15.72s/it]

training loss: 0.9899663925170898


training:  52%|█████▏    | 4330/8300 [19:13:06<17:20:05, 15.72s/it]

training loss: 0.9349887371063232


training:  52%|█████▏    | 4331/8300 [19:13:21<17:19:46, 15.72s/it]

training loss: 0.5926543474197388


training:  52%|█████▏    | 4332/8300 [19:13:37<17:19:36, 15.72s/it]

training loss: 0.3304261565208435


training:  52%|█████▏    | 4333/8300 [19:13:53<17:19:05, 15.72s/it]

training loss: 0.9308288097381592


training:  52%|█████▏    | 4334/8300 [19:14:08<17:18:49, 15.72s/it]

training loss: 0.4866005480289459


training:  52%|█████▏    | 4335/8300 [19:14:24<17:18:45, 15.72s/it]

training loss: 1.0972509384155273


training:  52%|█████▏    | 4336/8300 [19:14:40<17:18:23, 15.72s/it]

training loss: 0.7306418418884277


training:  52%|█████▏    | 4337/8300 [19:14:56<17:17:58, 15.71s/it]

training loss: 0.8258075714111328


training:  52%|█████▏    | 4338/8300 [19:15:11<17:17:40, 15.71s/it]

training loss: 1.1198748350143433


training:  52%|█████▏    | 4339/8300 [19:15:27<17:17:58, 15.72s/it]

training loss: 0.6746070981025696


training:  52%|█████▏    | 4340/8300 [19:15:43<17:17:57, 15.73s/it]

training loss: 1.2210466861724854


training:  52%|█████▏    | 4341/8300 [19:15:59<17:17:37, 15.73s/it]

training loss: 0.8932090997695923


training:  52%|█████▏    | 4342/8300 [19:16:14<17:17:36, 15.73s/it]

training loss: 0.688123345375061


training:  52%|█████▏    | 4343/8300 [19:16:30<17:17:36, 15.73s/it]

training loss: 0.9683032035827637


training:  52%|█████▏    | 4344/8300 [19:16:46<17:17:23, 15.73s/it]

training loss: 0.7740243077278137


training:  52%|█████▏    | 4345/8300 [19:17:02<17:17:10, 15.73s/it]

training loss: 1.213308572769165


training:  52%|█████▏    | 4346/8300 [19:17:17<17:16:57, 15.74s/it]

training loss: 0.8680405020713806


training:  52%|█████▏    | 4347/8300 [19:17:33<17:16:47, 15.74s/it]

training loss: 0.6532267928123474


training:  52%|█████▏    | 4348/8300 [19:17:49<17:15:59, 15.73s/it]

training loss: 0.4654388129711151


training:  52%|█████▏    | 4349/8300 [19:18:04<17:15:24, 15.72s/it]

training loss: 0.7608131766319275


training:  52%|█████▏    | 4350/8300 [19:18:20<17:14:58, 15.72s/it]

training loss: 0.7477072477340698


training:  52%|█████▏    | 4351/8300 [19:18:36<17:14:40, 15.72s/it]

training loss: 0.484708309173584


training:  52%|█████▏    | 4352/8300 [19:18:52<17:14:04, 15.72s/it]

training loss: 0.6493097543716431


training:  52%|█████▏    | 4353/8300 [19:19:07<17:13:52, 15.72s/it]

training loss: 0.6327098608016968


training:  52%|█████▏    | 4354/8300 [19:19:23<17:13:39, 15.72s/it]

training loss: 0.7866369485855103


training:  52%|█████▏    | 4355/8300 [19:19:39<17:13:36, 15.72s/it]

training loss: 0.8505512475967407


training:  52%|█████▏    | 4356/8300 [19:19:54<17:13:33, 15.72s/it]

training loss: 0.8157520294189453


training:  52%|█████▏    | 4357/8300 [19:20:10<17:13:03, 15.72s/it]

training loss: 0.7325966954231262


training:  53%|█████▎    | 4358/8300 [19:20:26<17:12:55, 15.72s/it]

training loss: 0.5094718933105469


training:  53%|█████▎    | 4359/8300 [19:20:42<17:12:22, 15.72s/it]

training loss: 1.0344809293746948


training:  53%|█████▎    | 4360/8300 [19:20:57<17:12:08, 15.72s/it]

training loss: 0.7166165113449097


training:  53%|█████▎    | 4361/8300 [19:21:13<17:11:42, 15.72s/it]

training loss: 1.1460981369018555


training:  53%|█████▎    | 4362/8300 [19:21:29<17:11:35, 15.72s/it]

training loss: 1.014721393585205


training:  53%|█████▎    | 4363/8300 [19:21:44<17:11:22, 15.72s/it]

training loss: 0.7235349416732788


training:  53%|█████▎    | 4364/8300 [19:22:00<17:11:21, 15.72s/it]

training loss: 0.9983669519424438


training:  53%|█████▎    | 4365/8300 [19:22:16<17:10:58, 15.72s/it]

training loss: 0.8108906745910645


training:  53%|█████▎    | 4366/8300 [19:22:32<17:10:41, 15.72s/it]

training loss: 0.9399988651275635


training:  53%|█████▎    | 4367/8300 [19:22:47<17:10:13, 15.72s/it]

training loss: 0.6886345148086548


training:  53%|█████▎    | 4368/8300 [19:23:03<17:09:56, 15.72s/it]

training loss: 0.9222050905227661


training:  53%|█████▎    | 4369/8300 [19:23:19<17:09:38, 15.72s/it]

training loss: 0.6591871976852417


training:  53%|█████▎    | 4370/8300 [19:23:34<17:09:34, 15.72s/it]

training loss: 0.6235325336456299


training:  53%|█████▎    | 4371/8300 [19:23:50<17:09:09, 15.72s/it]

training loss: 0.7092497944831848


training:  53%|█████▎    | 4372/8300 [19:24:06<17:09:01, 15.72s/it]

training loss: 0.6305034756660461


training:  53%|█████▎    | 4373/8300 [19:24:22<17:08:45, 15.72s/it]

training loss: 1.0530279874801636


training:  53%|█████▎    | 4374/8300 [19:24:37<17:08:33, 15.72s/it]

training loss: 0.8022668957710266


training:  53%|█████▎    | 4375/8300 [19:24:53<17:08:09, 15.72s/it]

training loss: 0.40130266547203064


training:  53%|█████▎    | 4376/8300 [19:25:09<17:07:58, 15.72s/it]

training loss: 0.43552619218826294


training:  53%|█████▎    | 4377/8300 [19:25:25<17:08:01, 15.72s/it]

training loss: 0.9322014451026917


training:  53%|█████▎    | 4378/8300 [19:25:40<17:07:45, 15.72s/it]

training loss: 0.8232831954956055


training:  53%|█████▎    | 4379/8300 [19:25:56<17:07:23, 15.72s/it]

training loss: 0.6150546073913574


training:  53%|█████▎    | 4380/8300 [19:26:12<17:06:56, 15.72s/it]

training loss: 0.6527145504951477


training:  53%|█████▎    | 4381/8300 [19:26:27<17:06:44, 15.72s/it]

training loss: 0.6752579212188721


training:  53%|█████▎    | 4382/8300 [19:26:43<17:06:28, 15.72s/it]

training loss: 0.7191766500473022


training:  53%|█████▎    | 4383/8300 [19:26:59<17:06:16, 15.72s/it]

training loss: 0.9529983997344971


training:  53%|█████▎    | 4384/8300 [19:27:15<17:06:01, 15.72s/it]

training loss: 1.0913032293319702


training:  53%|█████▎    | 4385/8300 [19:27:30<17:05:40, 15.72s/it]

training loss: 0.9721902012825012


training:  53%|█████▎    | 4386/8300 [19:27:46<17:05:19, 15.72s/it]

training loss: 0.8795474171638489


training:  53%|█████▎    | 4387/8300 [19:28:02<17:05:04, 15.72s/it]

training loss: 0.6486560106277466


training:  53%|█████▎    | 4388/8300 [19:28:17<17:04:52, 15.72s/it]

training loss: 0.9176562428474426


training:  53%|█████▎    | 4389/8300 [19:28:33<17:04:44, 15.72s/it]

training loss: 0.606566309928894


training:  53%|█████▎    | 4390/8300 [19:28:49<17:04:21, 15.72s/it]

training loss: 0.8479245901107788


training:  53%|█████▎    | 4391/8300 [19:29:05<17:04:07, 15.72s/it]

training loss: 1.076358675956726


training:  53%|█████▎    | 4392/8300 [19:29:20<17:03:56, 15.72s/it]

training loss: 1.08454430103302


training:  53%|█████▎    | 4393/8300 [19:29:36<17:03:50, 15.72s/it]

training loss: 0.77898108959198


training:  53%|█████▎    | 4394/8300 [19:29:52<17:03:31, 15.72s/it]

training loss: 0.8770842552185059


training:  53%|█████▎    | 4395/8300 [19:30:07<17:03:20, 15.72s/it]

training loss: 0.9649533033370972


training:  53%|█████▎    | 4396/8300 [19:30:23<17:02:59, 15.72s/it]

training loss: 0.9610380530357361


training:  53%|█████▎    | 4397/8300 [19:30:39<17:02:55, 15.73s/it]

training loss: 0.7506952285766602


training:  53%|█████▎    | 4398/8300 [19:30:55<17:02:33, 15.72s/it]

training loss: 1.1402838230133057


training:  53%|█████▎    | 4399/8300 [19:31:10<17:02:25, 15.73s/it]

training loss: 0.9047490358352661


training:  53%|█████▎    | 4400/8300 [19:31:26<17:02:14, 15.73s/it]

training loss: 1.3009884357452393
training loss: 0.5905243754386902


training:  53%|█████▎    | 4401/8300 [19:31:43<17:26:56, 16.11s/it]

validation loss: 1.4559755325317383


training:  53%|█████▎    | 4402/8300 [19:31:59<17:19:19, 16.00s/it]

training loss: 0.9521949291229248


training:  53%|█████▎    | 4403/8300 [19:32:15<17:13:35, 15.91s/it]

training loss: 0.7378619313240051


training:  53%|█████▎    | 4404/8300 [19:32:30<17:09:41, 15.86s/it]

training loss: 0.7281032204627991


training:  53%|█████▎    | 4405/8300 [19:32:46<17:06:36, 15.81s/it]

training loss: 0.8266730904579163


training:  53%|█████▎    | 4406/8300 [19:33:02<17:04:39, 15.79s/it]

training loss: 0.9101263880729675


training:  53%|█████▎    | 4407/8300 [19:33:17<17:03:03, 15.77s/it]

training loss: 1.0090397596359253


training:  53%|█████▎    | 4408/8300 [19:33:33<17:02:01, 15.76s/it]

training loss: 0.5576725006103516


training:  53%|█████▎    | 4409/8300 [19:33:49<17:00:53, 15.74s/it]

training loss: 0.7699767351150513


training:  53%|█████▎    | 4410/8300 [19:34:05<17:00:03, 15.73s/it]

training loss: 0.45848357677459717


training:  53%|█████▎    | 4411/8300 [19:34:20<16:59:21, 15.73s/it]

training loss: 0.639055609703064


training:  53%|█████▎    | 4412/8300 [19:34:36<16:59:02, 15.73s/it]

training loss: 0.6674506664276123


training:  53%|█████▎    | 4413/8300 [19:34:52<16:58:28, 15.72s/it]

training loss: 0.6797477602958679


training:  53%|█████▎    | 4414/8300 [19:35:07<16:58:10, 15.72s/it]

training loss: 0.8976508975028992


training:  53%|█████▎    | 4415/8300 [19:35:23<16:57:48, 15.72s/it]

training loss: 0.15166085958480835


training:  53%|█████▎    | 4416/8300 [19:35:39<16:57:36, 15.72s/it]

training loss: 0.7178629636764526


training:  53%|█████▎    | 4417/8300 [19:35:55<16:57:16, 15.72s/it]

training loss: 0.6203932166099548


training:  53%|█████▎    | 4418/8300 [19:36:10<16:57:01, 15.72s/it]

training loss: 0.9708461761474609


training:  53%|█████▎    | 4419/8300 [19:36:26<16:56:42, 15.72s/it]

training loss: 0.8550763130187988


training:  53%|█████▎    | 4420/8300 [19:36:42<16:56:23, 15.72s/it]

training loss: 0.9283270239830017


training:  53%|█████▎    | 4421/8300 [19:36:58<16:56:06, 15.72s/it]

training loss: 0.7446301579475403


training:  53%|█████▎    | 4422/8300 [19:37:13<16:55:53, 15.72s/it]

training loss: 0.6212955713272095


training:  53%|█████▎    | 4423/8300 [19:37:29<16:55:34, 15.72s/it]

training loss: 1.005344033241272


training:  53%|█████▎    | 4424/8300 [19:37:45<16:55:09, 15.71s/it]

training loss: 0.7316858172416687


training:  53%|█████▎    | 4425/8300 [19:38:00<16:54:46, 15.71s/it]

training loss: 0.883746862411499


training:  53%|█████▎    | 4426/8300 [19:38:16<16:54:33, 15.71s/it]

training loss: 0.5757664442062378


training:  53%|█████▎    | 4427/8300 [19:38:32<16:54:30, 15.72s/it]

training loss: 0.8096780180931091


training:  53%|█████▎    | 4428/8300 [19:38:48<16:54:08, 15.72s/it]

training loss: 0.776737630367279


training:  53%|█████▎    | 4429/8300 [19:39:03<16:54:00, 15.72s/it]

training loss: 0.5054137706756592


training:  53%|█████▎    | 4430/8300 [19:39:19<16:53:45, 15.72s/it]

training loss: 0.9500486254692078


training:  53%|█████▎    | 4431/8300 [19:39:35<16:53:44, 15.72s/it]

training loss: 0.875271201133728


training:  53%|█████▎    | 4432/8300 [19:39:50<16:53:20, 15.72s/it]

training loss: 0.686663806438446


training:  53%|█████▎    | 4433/8300 [19:40:06<16:53:15, 15.72s/it]

training loss: 0.8917825222015381


training:  53%|█████▎    | 4434/8300 [19:40:22<16:52:48, 15.72s/it]

training loss: 0.9448217749595642


training:  53%|█████▎    | 4435/8300 [19:40:38<16:53:11, 15.73s/it]

training loss: 1.0728883743286133


training:  53%|█████▎    | 4436/8300 [19:40:53<16:52:41, 15.73s/it]

training loss: 0.9800839424133301


training:  53%|█████▎    | 4437/8300 [19:41:09<16:52:29, 15.73s/it]

training loss: 0.8039584159851074


training:  53%|█████▎    | 4438/8300 [19:41:25<16:52:09, 15.72s/it]

training loss: 0.7732517123222351


training:  53%|█████▎    | 4439/8300 [19:41:40<16:51:56, 15.73s/it]

training loss: 0.8254057168960571


training:  53%|█████▎    | 4440/8300 [19:41:56<16:51:36, 15.72s/it]

training loss: 0.9085161685943604


training:  54%|█████▎    | 4441/8300 [19:42:12<16:51:05, 15.72s/it]

training loss: 0.8199887871742249


training:  54%|█████▎    | 4442/8300 [19:42:28<16:51:07, 15.73s/it]

training loss: 0.9515166878700256


training:  54%|█████▎    | 4443/8300 [19:42:43<16:50:55, 15.73s/it]

training loss: 0.4377153217792511


training:  54%|█████▎    | 4444/8300 [19:42:59<16:50:34, 15.72s/it]

training loss: 1.0085252523422241


training:  54%|█████▎    | 4445/8300 [19:43:15<16:50:19, 15.72s/it]

training loss: 0.6870607733726501


training:  54%|█████▎    | 4446/8300 [19:43:31<16:50:10, 15.73s/it]

training loss: 0.8247554898262024


training:  54%|█████▎    | 4447/8300 [19:43:46<16:49:49, 15.73s/it]

training loss: 0.5925716757774353


training:  54%|█████▎    | 4448/8300 [19:44:02<16:49:27, 15.72s/it]

training loss: 0.44698232412338257


training:  54%|█████▎    | 4449/8300 [19:44:18<16:49:23, 15.73s/it]

training loss: 0.9813125729560852


training:  54%|█████▎    | 4450/8300 [19:44:33<16:49:03, 15.73s/it]

training loss: 1.0354472398757935


training:  54%|█████▎    | 4451/8300 [19:44:49<16:48:42, 15.72s/it]

training loss: 0.7124965190887451


training:  54%|█████▎    | 4452/8300 [19:45:05<16:48:38, 15.73s/it]

training loss: 0.728042483329773


training:  54%|█████▎    | 4453/8300 [19:45:21<16:48:17, 15.73s/it]

training loss: 0.6484471559524536


training:  54%|█████▎    | 4454/8300 [19:45:36<16:47:59, 15.73s/it]

training loss: 0.9553171992301941


training:  54%|█████▎    | 4455/8300 [19:45:52<16:47:25, 15.72s/it]

training loss: 0.9696695804595947


training:  54%|█████▎    | 4456/8300 [19:46:08<16:47:11, 15.72s/it]

training loss: 0.6440542340278625


training:  54%|█████▎    | 4457/8300 [19:46:24<16:46:45, 15.72s/it]

training loss: 0.9555647969245911


training:  54%|█████▎    | 4458/8300 [19:46:39<16:46:43, 15.72s/it]

training loss: 0.9911139011383057


training:  54%|█████▎    | 4459/8300 [19:46:55<16:46:20, 15.72s/it]

training loss: 0.6310088634490967


training:  54%|█████▎    | 4460/8300 [19:47:11<16:45:55, 15.72s/it]

training loss: 0.7864954471588135


training:  54%|█████▎    | 4461/8300 [19:47:26<16:45:45, 15.72s/it]

training loss: 0.6998664140701294


training:  54%|█████▍    | 4462/8300 [19:47:42<16:45:14, 15.72s/it]

training loss: 0.9851076602935791


training:  54%|█████▍    | 4463/8300 [19:47:58<16:44:52, 15.71s/it]

training loss: 0.9391070604324341


training:  54%|█████▍    | 4464/8300 [19:48:14<16:44:46, 15.72s/it]

training loss: 0.9497774243354797


training:  54%|█████▍    | 4465/8300 [19:48:29<16:44:33, 15.72s/it]

training loss: 0.40944433212280273


training:  54%|█████▍    | 4466/8300 [19:48:45<16:44:09, 15.71s/it]

training loss: 0.997005820274353


training:  54%|█████▍    | 4467/8300 [19:49:01<16:43:52, 15.71s/it]

training loss: 0.6545600891113281


training:  54%|█████▍    | 4468/8300 [19:49:16<16:43:33, 15.71s/it]

training loss: 0.477750688791275


training:  54%|█████▍    | 4469/8300 [19:49:32<16:43:26, 15.72s/it]

training loss: 0.8112634420394897


training:  54%|█████▍    | 4470/8300 [19:49:48<16:43:06, 15.71s/it]

training loss: 0.47894051671028137


training:  54%|█████▍    | 4471/8300 [19:50:04<16:42:46, 15.71s/it]

training loss: 1.109316110610962


training:  54%|█████▍    | 4472/8300 [19:50:19<16:42:25, 15.71s/it]

training loss: 0.9063022136688232


training:  54%|█████▍    | 4473/8300 [19:50:35<16:42:25, 15.72s/it]

training loss: 0.9224984645843506


training:  54%|█████▍    | 4474/8300 [19:50:51<16:41:57, 15.71s/it]

training loss: 0.6352087259292603


training:  54%|█████▍    | 4475/8300 [19:51:06<16:41:48, 15.71s/it]

training loss: 0.8489020466804504


training:  54%|█████▍    | 4476/8300 [19:51:22<16:41:31, 15.71s/it]

training loss: 1.0626239776611328


training:  54%|█████▍    | 4477/8300 [19:51:38<16:41:24, 15.72s/it]

training loss: 0.6545205116271973


training:  54%|█████▍    | 4478/8300 [19:51:54<16:40:53, 15.71s/it]

training loss: 0.8947954177856445


training:  54%|█████▍    | 4479/8300 [19:52:09<16:40:48, 15.72s/it]

training loss: 0.8985210657119751


training:  54%|█████▍    | 4480/8300 [19:52:25<16:40:32, 15.72s/it]

training loss: 0.7945171594619751


training:  54%|█████▍    | 4481/8300 [19:52:41<16:40:08, 15.71s/it]

training loss: 0.6617516279220581


training:  54%|█████▍    | 4482/8300 [19:52:56<16:39:49, 15.71s/it]

training loss: 0.7342501878738403


training:  54%|█████▍    | 4483/8300 [19:53:12<16:39:36, 15.71s/it]

training loss: 0.31471917033195496


training:  54%|█████▍    | 4484/8300 [19:53:28<16:39:36, 15.72s/it]

training loss: 0.5956965684890747


training:  54%|█████▍    | 4485/8300 [19:53:44<16:39:20, 15.72s/it]

training loss: 0.587875247001648


training:  54%|█████▍    | 4486/8300 [19:53:59<16:39:13, 15.72s/it]

training loss: 0.758431077003479


training:  54%|█████▍    | 4487/8300 [19:54:15<16:38:51, 15.72s/it]

training loss: 0.8287469148635864


training:  54%|█████▍    | 4488/8300 [19:54:31<16:38:39, 15.72s/it]

training loss: 0.9251456260681152


training:  54%|█████▍    | 4489/8300 [19:54:46<16:38:17, 15.72s/it]

training loss: 0.6882451176643372


training:  54%|█████▍    | 4490/8300 [19:55:02<16:37:48, 15.71s/it]

training loss: 0.7568356990814209


training:  54%|█████▍    | 4491/8300 [19:55:18<16:37:35, 15.71s/it]

training loss: 1.1318163871765137


training:  54%|█████▍    | 4492/8300 [19:55:34<16:37:24, 15.72s/it]

training loss: 0.42833730578422546


training:  54%|█████▍    | 4493/8300 [19:55:49<16:37:11, 15.72s/it]

training loss: 0.5370887517929077


training:  54%|█████▍    | 4494/8300 [19:56:05<16:37:05, 15.72s/it]

training loss: 0.8679630756378174


training:  54%|█████▍    | 4495/8300 [19:56:21<16:36:46, 15.72s/it]

training loss: 0.8413097858428955


training:  54%|█████▍    | 4496/8300 [19:56:36<16:36:34, 15.72s/it]

training loss: 1.1324142217636108


training:  54%|█████▍    | 4497/8300 [19:56:52<16:36:14, 15.72s/it]

training loss: 0.9534192085266113


training:  54%|█████▍    | 4498/8300 [19:57:08<16:36:02, 15.72s/it]

training loss: 0.602656364440918


training:  54%|█████▍    | 4499/8300 [19:57:24<16:35:36, 15.72s/it]

training loss: 0.9296327829360962


training:  54%|█████▍    | 4500/8300 [19:57:39<16:35:35, 15.72s/it]

training loss: 0.5106181502342224
training loss: 1.0399243831634521


training:  54%|█████▍    | 4501/8300 [19:57:56<16:59:25, 16.10s/it]

validation loss: 1.574044942855835


training:  54%|█████▍    | 4502/8300 [19:58:12<16:52:05, 15.99s/it]

training loss: 0.7891770601272583


training:  54%|█████▍    | 4503/8300 [19:58:28<16:46:45, 15.91s/it]

training loss: 0.4175660014152527


training:  54%|█████▍    | 4504/8300 [19:58:43<16:42:46, 15.85s/it]

training loss: 0.8110538125038147


training:  54%|█████▍    | 4505/8300 [19:58:59<16:39:54, 15.81s/it]

training loss: 0.6008725166320801


training:  54%|█████▍    | 4506/8300 [19:59:15<16:38:01, 15.78s/it]

training loss: 1.0955601930618286


training:  54%|█████▍    | 4507/8300 [19:59:31<16:36:46, 15.77s/it]

training loss: 0.8997393250465393


training:  54%|█████▍    | 4508/8300 [19:59:46<16:35:38, 15.75s/it]

training loss: 0.4699481725692749


training:  54%|█████▍    | 4509/8300 [20:00:02<16:34:43, 15.74s/it]

training loss: 0.7444817423820496


training:  54%|█████▍    | 4510/8300 [20:00:18<16:34:00, 15.74s/it]

training loss: 0.7704889178276062


training:  54%|█████▍    | 4511/8300 [20:00:33<16:33:30, 15.73s/it]

training loss: 1.1085572242736816


training:  54%|█████▍    | 4512/8300 [20:00:49<16:32:50, 15.73s/it]

training loss: 0.7738571763038635


training:  54%|█████▍    | 4513/8300 [20:01:05<16:32:24, 15.72s/it]

training loss: 0.7356491684913635


training:  54%|█████▍    | 4514/8300 [20:01:21<16:31:53, 15.72s/it]

training loss: 0.8047707676887512


training:  54%|█████▍    | 4515/8300 [20:01:36<16:31:36, 15.72s/it]

training loss: 0.8122085928916931


training:  54%|█████▍    | 4516/8300 [20:01:52<16:31:27, 15.72s/it]

training loss: 0.8565203547477722


training:  54%|█████▍    | 4517/8300 [20:02:08<16:31:13, 15.72s/it]

training loss: 1.022491693496704


training:  54%|█████▍    | 4518/8300 [20:02:24<16:30:56, 15.72s/it]

training loss: 1.0440596342086792


training:  54%|█████▍    | 4519/8300 [20:02:39<16:30:42, 15.72s/it]

training loss: 0.7969827055931091


training:  54%|█████▍    | 4520/8300 [20:02:55<16:30:22, 15.72s/it]

training loss: 0.9065423607826233


training:  54%|█████▍    | 4521/8300 [20:03:11<16:30:05, 15.72s/it]

training loss: 1.0326565504074097


training:  54%|█████▍    | 4522/8300 [20:03:26<16:29:54, 15.72s/it]

training loss: 0.6693593263626099


training:  54%|█████▍    | 4523/8300 [20:03:42<16:29:42, 15.72s/it]

training loss: 0.5673923492431641


training:  55%|█████▍    | 4524/8300 [20:03:58<16:29:29, 15.72s/it]

training loss: 0.6631116271018982


training:  55%|█████▍    | 4525/8300 [20:04:14<16:29:09, 15.72s/it]

training loss: 0.5577847361564636


training:  55%|█████▍    | 4526/8300 [20:04:29<16:28:43, 15.72s/it]

training loss: 0.803826093673706


training:  55%|█████▍    | 4527/8300 [20:04:45<16:28:07, 15.71s/it]

training loss: 0.5880887508392334


training:  55%|█████▍    | 4528/8300 [20:05:01<16:27:44, 15.71s/it]

training loss: 0.6934062838554382


training:  55%|█████▍    | 4529/8300 [20:05:16<16:27:35, 15.71s/it]

training loss: 1.1647566556930542


training:  55%|█████▍    | 4530/8300 [20:05:32<16:27:25, 15.72s/it]

training loss: 0.6037898063659668


training:  55%|█████▍    | 4531/8300 [20:05:48<16:27:01, 15.71s/it]

training loss: 0.8404694199562073


training:  55%|█████▍    | 4532/8300 [20:06:04<16:26:46, 15.71s/it]

training loss: 0.4011983871459961


training:  55%|█████▍    | 4533/8300 [20:06:19<16:26:31, 15.71s/it]

training loss: 0.42182260751724243


training:  55%|█████▍    | 4534/8300 [20:06:35<16:26:07, 15.71s/it]

training loss: 0.9244959354400635


training:  55%|█████▍    | 4535/8300 [20:06:51<16:25:35, 15.71s/it]

training loss: 0.6019328832626343


training:  55%|█████▍    | 4536/8300 [20:07:06<16:25:39, 15.71s/it]

training loss: 0.6277294754981995


training:  55%|█████▍    | 4537/8300 [20:07:22<16:25:24, 15.71s/it]

training loss: 0.8130124807357788


training:  55%|█████▍    | 4538/8300 [20:07:38<16:25:22, 15.72s/it]

training loss: 0.8567807674407959


training:  55%|█████▍    | 4539/8300 [20:07:54<16:24:53, 15.71s/it]

training loss: 0.7540723085403442


training:  55%|█████▍    | 4540/8300 [20:08:09<16:24:50, 15.72s/it]

training loss: 0.5981773734092712


training:  55%|█████▍    | 4541/8300 [20:08:25<16:24:42, 15.72s/it]

training loss: 1.0384286642074585


training:  55%|█████▍    | 4542/8300 [20:08:41<16:24:27, 15.72s/it]

training loss: 0.6254370808601379


training:  55%|█████▍    | 4543/8300 [20:08:56<16:24:01, 15.72s/it]

training loss: 0.9123992919921875


training:  55%|█████▍    | 4544/8300 [20:09:12<16:23:45, 15.72s/it]

training loss: 0.5194876194000244


training:  55%|█████▍    | 4545/8300 [20:09:28<16:23:44, 15.72s/it]

training loss: 0.8224271535873413


training:  55%|█████▍    | 4546/8300 [20:09:44<16:23:25, 15.72s/it]

training loss: 1.0035147666931152


training:  55%|█████▍    | 4547/8300 [20:09:59<16:23:04, 15.72s/it]

training loss: 0.48617857694625854


training:  55%|█████▍    | 4548/8300 [20:10:15<16:22:44, 15.72s/it]

training loss: 0.6920629739761353


training:  55%|█████▍    | 4549/8300 [20:10:31<16:22:24, 15.71s/it]

training loss: 0.7472714185714722


training:  55%|█████▍    | 4550/8300 [20:10:46<16:22:03, 15.71s/it]

training loss: 0.8708394765853882


training:  55%|█████▍    | 4551/8300 [20:11:02<16:21:44, 15.71s/it]

training loss: 0.8685766458511353


training:  55%|█████▍    | 4552/8300 [20:11:18<16:21:40, 15.72s/it]

training loss: 1.2190890312194824


training:  55%|█████▍    | 4553/8300 [20:11:34<16:21:25, 15.72s/it]

training loss: 0.8199933767318726


training:  55%|█████▍    | 4554/8300 [20:11:49<16:20:58, 15.71s/it]

training loss: 0.9975329637527466


training:  55%|█████▍    | 4555/8300 [20:12:05<16:20:49, 15.71s/it]

training loss: 0.5326040387153625


training:  55%|█████▍    | 4556/8300 [20:12:21<16:20:43, 15.72s/it]

training loss: 0.9275735020637512


training:  55%|█████▍    | 4557/8300 [20:12:36<16:20:30, 15.72s/it]

training loss: 0.753122866153717


training:  55%|█████▍    | 4558/8300 [20:12:52<16:20:06, 15.72s/it]

training loss: 0.5347709655761719


training:  55%|█████▍    | 4559/8300 [20:13:08<16:20:00, 15.72s/it]

training loss: 0.5828999876976013


training:  55%|█████▍    | 4560/8300 [20:13:24<16:19:37, 15.72s/it]

training loss: 0.6947721242904663


training:  55%|█████▍    | 4561/8300 [20:13:39<16:19:22, 15.72s/it]

training loss: 0.7496743202209473


training:  55%|█████▍    | 4562/8300 [20:13:55<16:19:04, 15.72s/it]

training loss: 0.4849540591239929


training:  55%|█████▍    | 4563/8300 [20:14:11<16:18:45, 15.71s/it]

training loss: 0.7435086965560913


training:  55%|█████▍    | 4564/8300 [20:14:26<16:18:35, 15.72s/it]

training loss: 0.5734876394271851


training:  55%|█████▌    | 4565/8300 [20:14:42<16:18:02, 15.71s/it]

training loss: 0.6753904223442078


training:  55%|█████▌    | 4566/8300 [20:14:58<16:17:52, 15.71s/it]

training loss: 0.6194969415664673


training:  55%|█████▌    | 4567/8300 [20:15:14<16:17:45, 15.72s/it]

training loss: 0.9690030217170715


training:  55%|█████▌    | 4568/8300 [20:15:29<16:17:30, 15.72s/it]

training loss: 0.7655965685844421


training:  55%|█████▌    | 4569/8300 [20:15:45<16:17:11, 15.71s/it]

training loss: 0.8690025806427002


training:  55%|█████▌    | 4570/8300 [20:16:01<16:16:49, 15.71s/it]

training loss: 0.8485050201416016


training:  55%|█████▌    | 4571/8300 [20:16:16<16:16:42, 15.72s/it]

training loss: 0.8347519636154175


training:  55%|█████▌    | 4572/8300 [20:16:32<16:16:37, 15.72s/it]

training loss: 0.9170810580253601


training:  55%|█████▌    | 4573/8300 [20:16:48<16:16:24, 15.72s/it]

training loss: 0.7601200938224792


training:  55%|█████▌    | 4574/8300 [20:17:04<16:16:03, 15.72s/it]

training loss: 0.44005054235458374


training:  55%|█████▌    | 4575/8300 [20:17:19<16:15:45, 15.72s/it]

training loss: 0.7234019041061401


training:  55%|█████▌    | 4576/8300 [20:17:35<16:15:46, 15.72s/it]

training loss: 1.0339624881744385


training:  55%|█████▌    | 4577/8300 [20:17:51<16:15:13, 15.72s/it]

training loss: 0.9623546004295349


training:  55%|█████▌    | 4578/8300 [20:18:06<16:15:00, 15.72s/it]

training loss: 0.8912023901939392


training:  55%|█████▌    | 4579/8300 [20:18:22<16:14:33, 15.71s/it]

training loss: 0.6738688945770264


training:  55%|█████▌    | 4580/8300 [20:18:38<16:14:20, 15.72s/it]

training loss: 0.7240828275680542


training:  55%|█████▌    | 4581/8300 [20:18:54<16:13:59, 15.71s/it]

training loss: 0.5496393442153931


training:  55%|█████▌    | 4582/8300 [20:19:09<16:13:51, 15.72s/it]

training loss: 0.7159421443939209


training:  55%|█████▌    | 4583/8300 [20:19:25<16:13:39, 15.72s/it]

training loss: 0.9288086891174316


training:  55%|█████▌    | 4584/8300 [20:19:41<16:13:19, 15.72s/it]

training loss: 0.7257416844367981


training:  55%|█████▌    | 4585/8300 [20:19:56<16:12:59, 15.71s/it]

training loss: 0.8507423400878906


training:  55%|█████▌    | 4586/8300 [20:20:12<16:12:47, 15.72s/it]

training loss: 1.0835955142974854


training:  55%|█████▌    | 4587/8300 [20:20:28<16:12:36, 15.72s/it]

training loss: 0.7453979849815369


training:  55%|█████▌    | 4588/8300 [20:20:44<16:12:06, 15.71s/it]

training loss: 0.7974132299423218


training:  55%|█████▌    | 4589/8300 [20:20:59<16:11:52, 15.71s/it]

training loss: 0.36965158581733704


training:  55%|█████▌    | 4590/8300 [20:21:15<16:11:36, 15.71s/it]

training loss: 0.49568599462509155


training:  55%|█████▌    | 4591/8300 [20:21:31<16:11:41, 15.72s/it]

training loss: 0.8124571442604065


training:  55%|█████▌    | 4592/8300 [20:21:46<16:11:16, 15.72s/it]

training loss: 0.9881004095077515


training:  55%|█████▌    | 4593/8300 [20:22:02<16:10:59, 15.72s/it]

training loss: 0.44680067896842957


training:  55%|█████▌    | 4594/8300 [20:22:18<16:10:44, 15.72s/it]

training loss: 0.7338855266571045


training:  55%|█████▌    | 4595/8300 [20:22:34<16:10:40, 15.72s/it]

training loss: 0.9947229623794556


training:  55%|█████▌    | 4596/8300 [20:22:49<16:10:17, 15.72s/it]

training loss: 0.9922257661819458


training:  55%|█████▌    | 4597/8300 [20:23:05<16:10:01, 15.72s/it]

training loss: 0.6576717495918274


training:  55%|█████▌    | 4598/8300 [20:23:21<16:09:46, 15.72s/it]

training loss: 0.8915793299674988


training:  55%|█████▌    | 4599/8300 [20:23:36<16:09:28, 15.72s/it]

training loss: 0.8807322978973389


training:  55%|█████▌    | 4600/8300 [20:23:52<16:09:04, 15.71s/it]

training loss: 0.9000746607780457
training loss: 0.6792369484901428


training:  55%|█████▌    | 4601/8300 [20:24:10<16:43:38, 16.28s/it]

validation loss: 1.5315219163894653


training:  55%|█████▌    | 4602/8300 [20:24:26<16:33:24, 16.12s/it]

training loss: 0.8241256475448608


training:  55%|█████▌    | 4603/8300 [20:24:41<16:25:36, 16.00s/it]

training loss: 0.6122704744338989


training:  55%|█████▌    | 4604/8300 [20:24:57<16:20:08, 15.91s/it]

training loss: 0.6893311142921448


training:  55%|█████▌    | 4605/8300 [20:25:13<16:16:20, 15.85s/it]

training loss: 0.43925604224205017


training:  55%|█████▌    | 4606/8300 [20:25:28<16:13:51, 15.82s/it]

training loss: 0.9487267732620239


training:  56%|█████▌    | 4607/8300 [20:25:44<16:11:35, 15.79s/it]

training loss: 1.012656569480896


training:  56%|█████▌    | 4608/8300 [20:26:00<16:12:00, 15.80s/it]

training loss: 0.8898175954818726


training:  56%|█████▌    | 4609/8300 [20:26:16<16:10:12, 15.77s/it]

training loss: 0.2957391142845154


training:  56%|█████▌    | 4610/8300 [20:26:31<16:08:55, 15.75s/it]

training loss: 0.6721574664115906


training:  56%|█████▌    | 4611/8300 [20:26:47<16:07:49, 15.74s/it]

training loss: 0.6020316481590271


training:  56%|█████▌    | 4612/8300 [20:27:03<16:07:02, 15.73s/it]

training loss: 0.7253338098526001


training:  56%|█████▌    | 4613/8300 [20:27:19<16:06:27, 15.73s/it]

training loss: 0.9723191857337952


training:  56%|█████▌    | 4614/8300 [20:27:34<16:06:03, 15.73s/it]

training loss: 1.1686686277389526


training:  56%|█████▌    | 4615/8300 [20:27:50<16:05:30, 15.72s/it]

training loss: 0.8637991547584534


training:  56%|█████▌    | 4616/8300 [20:28:06<16:05:09, 15.72s/it]

training loss: 0.6244068145751953


training:  56%|█████▌    | 4617/8300 [20:28:21<16:04:52, 15.72s/it]

training loss: 0.9930117726325989


training:  56%|█████▌    | 4618/8300 [20:28:37<16:04:59, 15.73s/it]

training loss: 0.8692035675048828


training:  56%|█████▌    | 4619/8300 [20:28:53<16:05:02, 15.73s/it]

training loss: 0.9234499335289001


training:  56%|█████▌    | 4620/8300 [20:29:09<16:05:00, 15.73s/it]

training loss: 0.4181501269340515


training:  56%|█████▌    | 4621/8300 [20:29:24<16:04:58, 15.74s/it]

training loss: 0.7025054097175598


training:  56%|█████▌    | 4622/8300 [20:29:40<16:04:43, 15.74s/it]

training loss: 1.0073833465576172


training:  56%|█████▌    | 4623/8300 [20:29:56<16:04:22, 15.74s/it]

training loss: 0.35425958037376404


training:  56%|█████▌    | 4624/8300 [20:30:12<16:04:05, 15.74s/it]

training loss: 0.6705755591392517


training:  56%|█████▌    | 4625/8300 [20:30:27<16:03:49, 15.74s/it]

training loss: 0.7988542914390564


training:  56%|█████▌    | 4626/8300 [20:30:43<16:03:07, 15.73s/it]

training loss: 0.8361696600914001


training:  56%|█████▌    | 4627/8300 [20:30:59<16:02:22, 15.72s/it]

training loss: 1.003886103630066


training:  56%|█████▌    | 4628/8300 [20:31:14<16:01:52, 15.72s/it]

training loss: 1.1177059412002563


training:  56%|█████▌    | 4629/8300 [20:31:30<16:01:33, 15.72s/it]

training loss: 0.7679754495620728


training:  56%|█████▌    | 4630/8300 [20:31:46<16:01:14, 15.72s/it]

training loss: 0.7792662382125854


training:  56%|█████▌    | 4631/8300 [20:32:02<16:00:48, 15.71s/it]

training loss: 1.130273699760437


training:  56%|█████▌    | 4632/8300 [20:32:17<16:00:29, 15.71s/it]

training loss: 0.8226550817489624


training:  56%|█████▌    | 4633/8300 [20:32:33<16:00:02, 15.71s/it]

training loss: 0.4630284905433655


training:  56%|█████▌    | 4634/8300 [20:32:49<15:59:42, 15.71s/it]

training loss: 0.9682447910308838


training:  56%|█████▌    | 4635/8300 [20:33:04<15:59:35, 15.71s/it]

training loss: 0.7539767026901245


training:  56%|█████▌    | 4636/8300 [20:33:20<15:59:21, 15.71s/it]

training loss: 0.920246422290802


training:  56%|█████▌    | 4637/8300 [20:33:36<15:59:09, 15.71s/it]

training loss: 0.766016960144043


training:  56%|█████▌    | 4638/8300 [20:33:52<15:58:40, 15.71s/it]

training loss: 1.3180179595947266


training:  56%|█████▌    | 4639/8300 [20:34:07<15:58:28, 15.71s/it]

training loss: 0.7699382901191711


training:  56%|█████▌    | 4640/8300 [20:34:23<15:58:12, 15.71s/it]

training loss: 0.5353719592094421


training:  56%|█████▌    | 4641/8300 [20:34:39<15:57:59, 15.71s/it]

training loss: 0.8540724515914917


training:  56%|█████▌    | 4642/8300 [20:34:54<15:57:51, 15.71s/it]

training loss: 1.0072400569915771


training:  56%|█████▌    | 4643/8300 [20:35:10<15:57:37, 15.71s/it]

training loss: 1.0703434944152832


training:  56%|█████▌    | 4644/8300 [20:35:26<15:57:23, 15.71s/it]

training loss: 0.8412012457847595


training:  56%|█████▌    | 4645/8300 [20:35:41<15:57:08, 15.71s/it]

training loss: 1.2025753259658813


training:  56%|█████▌    | 4646/8300 [20:35:57<15:56:53, 15.71s/it]

training loss: 1.0177675485610962


training:  56%|█████▌    | 4647/8300 [20:36:13<15:56:37, 15.71s/it]

training loss: 0.5979763269424438


training:  56%|█████▌    | 4648/8300 [20:36:29<15:56:22, 15.71s/it]

training loss: 0.7089816331863403


training:  56%|█████▌    | 4649/8300 [20:36:44<15:56:11, 15.71s/it]

training loss: 0.6300349831581116


training:  56%|█████▌    | 4650/8300 [20:37:00<15:55:50, 15.71s/it]

training loss: 1.0137537717819214


training:  56%|█████▌    | 4651/8300 [20:37:16<15:55:42, 15.71s/it]

training loss: 0.7326855659484863


training:  56%|█████▌    | 4652/8300 [20:37:31<15:55:27, 15.71s/it]

training loss: 1.1328561305999756


training:  56%|█████▌    | 4653/8300 [20:37:47<15:55:02, 15.71s/it]

training loss: 0.8821188807487488


training:  56%|█████▌    | 4654/8300 [20:38:03<15:54:38, 15.71s/it]

training loss: 0.6016133427619934


training:  56%|█████▌    | 4655/8300 [20:38:19<15:54:29, 15.71s/it]

training loss: 1.1663659811019897


training:  56%|█████▌    | 4656/8300 [20:38:34<15:54:14, 15.71s/it]

training loss: 1.2069447040557861


training:  56%|█████▌    | 4657/8300 [20:38:50<15:53:57, 15.71s/it]

training loss: 0.5499585270881653


training:  56%|█████▌    | 4658/8300 [20:39:06<15:53:55, 15.72s/it]

training loss: 0.6016297340393066


training:  56%|█████▌    | 4659/8300 [20:39:21<15:53:35, 15.71s/it]

training loss: 0.8967738747596741


training:  56%|█████▌    | 4660/8300 [20:39:37<15:53:22, 15.71s/it]

training loss: 1.0314855575561523


training:  56%|█████▌    | 4661/8300 [20:39:53<15:52:58, 15.71s/it]

training loss: 0.6620960831642151


training:  56%|█████▌    | 4662/8300 [20:40:09<15:52:45, 15.71s/it]

training loss: 1.0492054224014282


training:  56%|█████▌    | 4663/8300 [20:40:24<15:52:30, 15.71s/it]

training loss: 1.066812515258789


training:  56%|█████▌    | 4664/8300 [20:40:40<15:52:17, 15.71s/it]

training loss: 0.6976900100708008


training:  56%|█████▌    | 4665/8300 [20:40:56<15:51:52, 15.71s/it]

training loss: 0.997365415096283


training:  56%|█████▌    | 4666/8300 [20:41:11<15:51:45, 15.71s/it]

training loss: 0.5841938853263855


training:  56%|█████▌    | 4667/8300 [20:41:27<15:51:26, 15.71s/it]

training loss: 0.46478915214538574


training:  56%|█████▌    | 4668/8300 [20:41:43<15:51:05, 15.71s/it]

training loss: 0.7234848737716675


training:  56%|█████▋    | 4669/8300 [20:41:59<15:50:38, 15.71s/it]

training loss: 0.9255524277687073


training:  56%|█████▋    | 4670/8300 [20:42:14<15:50:31, 15.71s/it]

training loss: 0.8740837574005127


training:  56%|█████▋    | 4671/8300 [20:42:30<15:50:13, 15.71s/it]

training loss: 0.9804078340530396


training:  56%|█████▋    | 4672/8300 [20:42:46<15:50:00, 15.71s/it]

training loss: 0.8872254490852356


training:  56%|█████▋    | 4673/8300 [20:43:01<15:49:51, 15.71s/it]

training loss: 0.7526593208312988


training:  56%|█████▋    | 4674/8300 [20:43:17<15:49:37, 15.71s/it]

training loss: 1.1664708852767944


training:  56%|█████▋    | 4675/8300 [20:43:33<15:49:20, 15.71s/it]

training loss: 0.8130300045013428


training:  56%|█████▋    | 4676/8300 [20:43:49<15:49:01, 15.71s/it]

training loss: 1.1391059160232544


training:  56%|█████▋    | 4677/8300 [20:44:04<15:48:41, 15.71s/it]

training loss: 0.6029998660087585


training:  56%|█████▋    | 4678/8300 [20:44:20<15:48:39, 15.71s/it]

training loss: 0.7382304668426514


training:  56%|█████▋    | 4679/8300 [20:44:36<15:48:20, 15.71s/it]

training loss: 1.0971708297729492


training:  56%|█████▋    | 4680/8300 [20:44:51<15:48:06, 15.71s/it]

training loss: 0.7014256715774536


training:  56%|█████▋    | 4681/8300 [20:45:07<15:47:55, 15.72s/it]

training loss: 0.9027674198150635


training:  56%|█████▋    | 4682/8300 [20:45:23<15:47:31, 15.71s/it]

training loss: 0.9207731485366821


training:  56%|█████▋    | 4683/8300 [20:45:39<15:47:31, 15.72s/it]

training loss: 0.7796135544776917


training:  56%|█████▋    | 4684/8300 [20:45:54<15:47:08, 15.72s/it]

training loss: 1.0027092695236206


training:  56%|█████▋    | 4685/8300 [20:46:10<15:46:47, 15.71s/it]

training loss: 0.39018186926841736


training:  56%|█████▋    | 4686/8300 [20:46:26<15:46:25, 15.71s/it]

training loss: 0.7537122368812561


training:  56%|█████▋    | 4687/8300 [20:46:41<15:46:04, 15.71s/it]

training loss: 0.6218123435974121


training:  56%|█████▋    | 4688/8300 [20:46:57<15:45:57, 15.71s/it]

training loss: 0.5227605700492859


training:  56%|█████▋    | 4689/8300 [20:47:13<15:45:42, 15.71s/it]

training loss: 0.7056050300598145


training:  57%|█████▋    | 4690/8300 [20:47:29<15:45:20, 15.71s/it]

training loss: 0.6585764288902283


training:  57%|█████▋    | 4691/8300 [20:47:44<15:45:13, 15.71s/it]

training loss: 0.9614720940589905


training:  57%|█████▋    | 4692/8300 [20:48:00<15:44:48, 15.71s/it]

training loss: 0.5642582774162292


training:  57%|█████▋    | 4693/8300 [20:48:16<15:44:32, 15.71s/it]

training loss: 0.7816528081893921


training:  57%|█████▋    | 4694/8300 [20:48:31<15:44:22, 15.71s/it]

training loss: 0.7856112122535706


training:  57%|█████▋    | 4695/8300 [20:48:47<15:44:04, 15.71s/it]

training loss: 0.9272161722183228


training:  57%|█████▋    | 4696/8300 [20:49:03<15:43:37, 15.71s/it]

training loss: 0.8995327949523926


training:  57%|█████▋    | 4697/8300 [20:49:19<15:43:32, 15.71s/it]

training loss: 1.0064111948013306


training:  57%|█████▋    | 4698/8300 [20:49:34<15:43:17, 15.71s/it]

training loss: 0.7345301508903503


training:  57%|█████▋    | 4699/8300 [20:49:50<15:42:50, 15.71s/it]

training loss: 0.8406573534011841


training:  57%|█████▋    | 4700/8300 [20:50:06<15:42:40, 15.71s/it]

training loss: 0.6077168583869934
training loss: 0.7869316339492798


training:  57%|█████▋    | 4701/8300 [20:50:23<16:05:46, 16.10s/it]

validation loss: 1.527502179145813


training:  57%|█████▋    | 4702/8300 [20:50:38<15:58:46, 15.99s/it]

training loss: 0.7229017019271851


training:  57%|█████▋    | 4703/8300 [20:50:54<15:53:36, 15.91s/it]

training loss: 0.6628674864768982


training:  57%|█████▋    | 4704/8300 [20:51:10<15:49:57, 15.85s/it]

training loss: 0.8097875714302063


training:  57%|█████▋    | 4705/8300 [20:51:26<15:47:27, 15.81s/it]

training loss: 0.39187949895858765


training:  57%|█████▋    | 4706/8300 [20:51:41<15:45:23, 15.78s/it]

training loss: 0.4913434684276581


training:  57%|█████▋    | 4707/8300 [20:51:57<15:43:58, 15.76s/it]

training loss: 1.2412571907043457


training:  57%|█████▋    | 4708/8300 [20:52:13<15:42:47, 15.75s/it]

training loss: 0.7074971795082092


training:  57%|█████▋    | 4709/8300 [20:52:28<15:41:55, 15.74s/it]

training loss: 0.6143389344215393


training:  57%|█████▋    | 4710/8300 [20:52:44<15:41:17, 15.73s/it]

training loss: 0.8063103556632996


training:  57%|█████▋    | 4711/8300 [20:53:00<15:40:32, 15.72s/it]

training loss: 1.1598637104034424


training:  57%|█████▋    | 4712/8300 [20:53:16<15:40:13, 15.72s/it]

training loss: 0.6334537267684937


training:  57%|█████▋    | 4713/8300 [20:53:31<15:40:03, 15.72s/it]

training loss: 0.634065568447113


training:  57%|█████▋    | 4714/8300 [20:53:47<15:39:38, 15.72s/it]

training loss: 0.797960102558136


training:  57%|█████▋    | 4715/8300 [20:54:03<15:39:19, 15.72s/it]

training loss: 0.3680500388145447


training:  57%|█████▋    | 4716/8300 [20:54:18<15:38:56, 15.72s/it]

training loss: 0.7609854340553284


training:  57%|█████▋    | 4717/8300 [20:54:34<15:38:33, 15.72s/it]

training loss: 1.023682713508606


training:  57%|█████▋    | 4718/8300 [20:54:50<15:38:05, 15.71s/it]

training loss: 0.9894163012504578


training:  57%|█████▋    | 4719/8300 [20:55:06<15:37:45, 15.71s/it]

training loss: 0.9442422986030579


training:  57%|█████▋    | 4720/8300 [20:55:21<15:37:24, 15.71s/it]

training loss: 0.7232400178909302


training:  57%|█████▋    | 4721/8300 [20:55:37<15:37:07, 15.71s/it]

training loss: 0.5212418437004089


training:  57%|█████▋    | 4722/8300 [20:55:53<15:36:52, 15.71s/it]

training loss: 0.7411258220672607


training:  57%|█████▋    | 4723/8300 [20:56:08<15:36:38, 15.71s/it]

training loss: 0.7926064133644104


training:  57%|█████▋    | 4724/8300 [20:56:24<15:36:25, 15.71s/it]

training loss: 0.7576411366462708


training:  57%|█████▋    | 4725/8300 [20:56:40<15:36:16, 15.71s/it]

training loss: 0.8027676343917847


training:  57%|█████▋    | 4726/8300 [20:56:56<15:36:00, 15.71s/it]

training loss: 1.0274994373321533


training:  57%|█████▋    | 4727/8300 [20:57:11<15:35:49, 15.71s/it]

training loss: 0.7036166787147522


training:  57%|█████▋    | 4728/8300 [20:57:27<15:35:36, 15.72s/it]

training loss: 0.7501548528671265


training:  57%|█████▋    | 4729/8300 [20:57:43<15:35:21, 15.72s/it]

training loss: 0.9626752138137817


training:  57%|█████▋    | 4730/8300 [20:57:58<15:35:03, 15.72s/it]

training loss: 0.8506892323493958


training:  57%|█████▋    | 4731/8300 [20:58:14<15:34:46, 15.72s/it]

training loss: 0.6586085557937622


training:  57%|█████▋    | 4732/8300 [20:58:30<15:34:26, 15.71s/it]

training loss: 0.6339439153671265


training:  57%|█████▋    | 4733/8300 [20:58:46<15:34:35, 15.72s/it]

training loss: 0.500239372253418


training:  57%|█████▋    | 4734/8300 [20:59:01<15:34:34, 15.72s/it]

training loss: 1.155964970588684


training:  57%|█████▋    | 4735/8300 [20:59:17<15:34:05, 15.72s/it]

training loss: 0.7732130289077759


training:  57%|█████▋    | 4736/8300 [20:59:33<15:33:50, 15.72s/it]

training loss: 0.7574883103370667


training:  57%|█████▋    | 4737/8300 [20:59:48<15:33:22, 15.72s/it]

training loss: 0.6736909747123718


training:  57%|█████▋    | 4738/8300 [21:00:04<15:32:57, 15.72s/it]

training loss: 0.9229565858840942


training:  57%|█████▋    | 4739/8300 [21:00:20<15:32:49, 15.72s/it]

training loss: 0.9140310883522034


training:  57%|█████▋    | 4740/8300 [21:00:36<15:32:27, 15.72s/it]

training loss: 0.7438991069793701


training:  57%|█████▋    | 4741/8300 [21:00:51<15:32:08, 15.71s/it]

training loss: 0.651940643787384


training:  57%|█████▋    | 4742/8300 [21:01:07<15:31:49, 15.71s/it]

training loss: 0.7909815311431885


training:  57%|█████▋    | 4743/8300 [21:01:23<15:31:27, 15.71s/it]

training loss: 1.104032278060913


training:  57%|█████▋    | 4744/8300 [21:01:38<15:31:20, 15.71s/it]

training loss: 0.6727321743965149


training:  57%|█████▋    | 4745/8300 [21:01:54<15:31:00, 15.71s/it]

training loss: 0.6404566764831543


training:  57%|█████▋    | 4746/8300 [21:02:10<15:30:41, 15.71s/it]

training loss: 0.7895016670227051


training:  57%|█████▋    | 4747/8300 [21:02:26<15:30:24, 15.71s/it]

training loss: 0.9057624340057373


training:  57%|█████▋    | 4748/8300 [21:02:41<15:30:05, 15.71s/it]

training loss: 0.7240731120109558


training:  57%|█████▋    | 4749/8300 [21:02:57<15:29:48, 15.71s/it]

training loss: 0.9541690349578857


training:  57%|█████▋    | 4750/8300 [21:03:13<15:29:29, 15.71s/it]

training loss: 0.714453935623169


training:  57%|█████▋    | 4751/8300 [21:03:28<15:29:27, 15.71s/it]

training loss: 0.7680602073669434


training:  57%|█████▋    | 4752/8300 [21:03:44<15:29:35, 15.72s/it]

training loss: 0.9823055863380432


training:  57%|█████▋    | 4753/8300 [21:04:00<15:29:41, 15.73s/it]

training loss: 0.7256124019622803


training:  57%|█████▋    | 4754/8300 [21:04:16<15:29:35, 15.73s/it]

training loss: 1.1208828687667847


training:  57%|█████▋    | 4755/8300 [21:04:31<15:29:27, 15.73s/it]

training loss: 0.7761354446411133


training:  57%|█████▋    | 4756/8300 [21:04:47<15:29:07, 15.73s/it]

training loss: 0.518708348274231


training:  57%|█████▋    | 4757/8300 [21:05:03<15:28:57, 15.73s/it]

training loss: 1.2619261741638184


training:  57%|█████▋    | 4758/8300 [21:05:19<15:28:37, 15.73s/it]

training loss: 0.8629729747772217


training:  57%|█████▋    | 4759/8300 [21:05:34<15:28:22, 15.73s/it]

training loss: 1.1231828927993774


training:  57%|█████▋    | 4760/8300 [21:05:50<15:27:42, 15.72s/it]

training loss: 0.6614302396774292


training:  57%|█████▋    | 4761/8300 [21:06:06<15:27:16, 15.72s/it]

training loss: 0.5268315672874451


training:  57%|█████▋    | 4762/8300 [21:06:21<15:26:38, 15.71s/it]

training loss: 0.40717804431915283


training:  57%|█████▋    | 4763/8300 [21:06:37<15:26:15, 15.71s/it]

training loss: 0.4240122139453888


training:  57%|█████▋    | 4764/8300 [21:06:53<15:26:08, 15.72s/it]

training loss: 0.5802413821220398


training:  57%|█████▋    | 4765/8300 [21:07:09<15:25:44, 15.71s/it]

training loss: 0.7890352606773376


training:  57%|█████▋    | 4766/8300 [21:07:24<15:25:34, 15.71s/it]

training loss: 0.8747314214706421


training:  57%|█████▋    | 4767/8300 [21:07:40<15:25:24, 15.72s/it]

training loss: 0.299758642911911


training:  57%|█████▋    | 4768/8300 [21:07:56<15:24:58, 15.71s/it]

training loss: 0.5569867491722107


training:  57%|█████▋    | 4769/8300 [21:08:11<15:24:36, 15.71s/it]

training loss: 0.8222590088844299


training:  57%|█████▋    | 4770/8300 [21:08:27<15:24:24, 15.71s/it]

training loss: 0.935595691204071


training:  57%|█████▋    | 4771/8300 [21:08:43<15:24:14, 15.71s/it]

training loss: 0.9382381439208984


training:  57%|█████▋    | 4772/8300 [21:08:59<15:23:50, 15.71s/it]

training loss: 0.7394452095031738


training:  58%|█████▊    | 4773/8300 [21:09:14<15:23:55, 15.72s/it]

training loss: 0.9214333891868591


training:  58%|█████▊    | 4774/8300 [21:09:30<15:23:37, 15.72s/it]

training loss: 1.1493726968765259


training:  58%|█████▊    | 4775/8300 [21:09:46<15:23:30, 15.72s/it]

training loss: 0.7584123015403748


training:  58%|█████▊    | 4776/8300 [21:10:01<15:23:24, 15.72s/it]

training loss: 1.0369731187820435


training:  58%|█████▊    | 4777/8300 [21:10:17<15:22:55, 15.72s/it]

training loss: 0.59690260887146


training:  58%|█████▊    | 4778/8300 [21:10:33<15:22:37, 15.72s/it]

training loss: 0.8678351044654846


training:  58%|█████▊    | 4779/8300 [21:10:49<15:22:15, 15.72s/it]

training loss: 0.5460960865020752


training:  58%|█████▊    | 4780/8300 [21:11:04<15:22:03, 15.72s/it]

training loss: 0.7446027398109436


training:  58%|█████▊    | 4781/8300 [21:11:20<15:21:52, 15.72s/it]

training loss: 0.7086600661277771


training:  58%|█████▊    | 4782/8300 [21:11:36<15:21:43, 15.72s/it]

training loss: 0.6165884733200073


training:  58%|█████▊    | 4783/8300 [21:11:52<15:21:21, 15.72s/it]

training loss: 0.6188549995422363


training:  58%|█████▊    | 4784/8300 [21:12:07<15:20:54, 15.72s/it]

training loss: 0.8232125639915466


training:  58%|█████▊    | 4785/8300 [21:12:23<15:20:35, 15.71s/it]

training loss: 0.6583303809165955


training:  58%|█████▊    | 4786/8300 [21:12:39<15:20:21, 15.71s/it]

training loss: 0.9097669124603271


training:  58%|█████▊    | 4787/8300 [21:12:54<15:20:23, 15.72s/it]

training loss: 0.7310041189193726


training:  58%|█████▊    | 4788/8300 [21:13:10<15:19:59, 15.72s/it]

training loss: 0.632585346698761


training:  58%|█████▊    | 4789/8300 [21:13:26<15:19:43, 15.72s/it]

training loss: 0.7906700372695923


training:  58%|█████▊    | 4790/8300 [21:13:42<15:19:27, 15.72s/it]

training loss: 0.8608924150466919


training:  58%|█████▊    | 4791/8300 [21:13:57<15:19:21, 15.72s/it]

training loss: 0.5930405259132385


training:  58%|█████▊    | 4792/8300 [21:14:13<15:19:05, 15.72s/it]

training loss: 0.7574957013130188


training:  58%|█████▊    | 4793/8300 [21:14:29<15:18:52, 15.72s/it]

training loss: 0.8075128793716431


training:  58%|█████▊    | 4794/8300 [21:14:44<15:18:35, 15.72s/it]

training loss: 0.9454602003097534


training:  58%|█████▊    | 4795/8300 [21:15:00<15:18:23, 15.72s/it]

training loss: 0.8306457996368408


training:  58%|█████▊    | 4796/8300 [21:15:16<15:18:16, 15.72s/it]

training loss: 0.640136182308197


training:  58%|█████▊    | 4797/8300 [21:15:32<15:18:13, 15.73s/it]

training loss: 0.36822617053985596


training:  58%|█████▊    | 4798/8300 [21:15:47<15:17:58, 15.73s/it]

training loss: 0.9572028517723083


training:  58%|█████▊    | 4799/8300 [21:16:03<15:17:36, 15.73s/it]

training loss: 1.078662395477295


training:  58%|█████▊    | 4800/8300 [21:16:19<15:17:00, 15.72s/it]

training loss: 0.7336484789848328
training loss: 0.9240743517875671



generating:   0%|          | 0/512 [00:00<?, ?it/s][A

validation loss: 1.4536259174346924
vom kole. Ani som neverila
v taky triumf, poznamenala Le Penova pre dennik Le Figaro, ked
komentovala zisk nadpolovicnej vacsiny hlasov stranickeho kolegu Steeva
Brioisa. Dalsi kandidati Narodneho frontu mozu mat sancu uspiet
napriklad v juzne leziacich mestach Beziers, Avignon a Perpignan, kde
postupili do druheho kola vsade z prveho miesta.
Dobry ohlas krajnej pravice medzi volicmi ma viacero dovodov. Tazili
z nizsej volebnej ucasti, pod ktoru sa podpisal nezaujem privrzencov
lavice, ktori su nespokojni s vladnutim socialistickeho prezidenta
Francoisa Hollanda. Pokial ide o samotny Narodny front, v poslednych
rokoch co stranu prevzala Le Penova po svojom radikalnom otcovi, snazi sa
o zmiernenie retoriky. Zo svojich radov pritom vylucili niektorych clenov
za rasisticke vyroky.
Kritici upozornuju, ze nenavistne pozadie strany sa napriek tomu
nezmenilo, ale pripustaju, ze jej strategia moze zlanarit novych
ludi. Narodny front je menej o


generating:   0%|          | 1/512 [00:00<01:56,  4.38it/s][A
generating:   0%|          | 2/512 [00:00<01:57,  4.36it/s][A
generating:   1%|          | 3/512 [00:00<01:57,  4.35it/s][A
generating:   1%|          | 4/512 [00:00<01:57,  4.34it/s][A
generating:   1%|          | 5/512 [00:01<01:57,  4.33it/s][A
generating:   1%|          | 6/512 [00:01<01:56,  4.34it/s][A
generating:   1%|▏         | 7/512 [00:01<01:57,  4.30it/s][A
generating:   2%|▏         | 8/512 [00:01<01:57,  4.29it/s][A
generating:   2%|▏         | 9/512 [00:02<01:57,  4.26it/s][A
generating:   2%|▏         | 10/512 [00:02<01:56,  4.30it/s][A
generating:   2%|▏         | 11/512 [00:02<01:56,  4.31it/s][A
generating:   2%|▏         | 12/512 [00:02<01:57,  4.26it/s][A
generating:   3%|▎         | 13/512 [00:03<01:57,  4.23it/s][A
generating:   3%|▎         | 14/512 [00:03<01:57,  4.24it/s][A
generating:   3%|▎         | 15/512 [00:03<01:57,  4.23it/s][A
generating:   3%|▎         | 16/512 [00:03<01:58

mandat si po roku 1948. V pondelok stromy a islo o stouto impeach.
Priemerna konkurzuju musiet na najst
viac poklea vyhladava od minuleho roka.
Co mu mu vo vztahu do Ciny este nie krajiny. Zavod si vedlozimerne zhorsilo
v stredu a zeny potencial britsky podobne, aby sa stav povedat, ale podla
exekucie, som si mohli predpokladam, ktory je to znacny dokoncenim prirodenim je
trospodarstve, ale vyrazne rizikove spravy viziu.
Aj v EU vlani v prvom rade mozete bolo
v bezpecnost Vollamsku spolupracovat a z


training:  58%|█████▊    | 4802/8300 [21:18:53<40:15:04, 41.43s/it]

training loss: 1.0877217054367065


training:  58%|█████▊    | 4803/8300 [21:19:08<32:44:55, 33.71s/it]

training loss: 0.5060027837753296


training:  58%|█████▊    | 4804/8300 [21:19:24<27:29:48, 28.31s/it]

training loss: 1.1445705890655518


training:  58%|█████▊    | 4805/8300 [21:19:40<23:49:24, 24.54s/it]

training loss: 0.7343817949295044


training:  58%|█████▊    | 4806/8300 [21:19:55<21:14:44, 21.89s/it]

training loss: 0.8106542229652405


training:  58%|█████▊    | 4807/8300 [21:20:11<19:26:25, 20.04s/it]

training loss: 0.772092342376709


training:  58%|█████▊    | 4808/8300 [21:20:27<18:10:37, 18.74s/it]

training loss: 1.0189602375030518


training:  58%|█████▊    | 4809/8300 [21:20:43<17:17:32, 17.83s/it]

training loss: 0.8762158751487732


training:  58%|█████▊    | 4810/8300 [21:20:58<16:40:17, 17.20s/it]

training loss: 0.7051314115524292


training:  58%|█████▊    | 4811/8300 [21:21:14<16:14:18, 16.76s/it]

training loss: 0.872516393661499


training:  58%|█████▊    | 4812/8300 [21:21:30<15:56:02, 16.45s/it]

training loss: 1.1665832996368408


training:  58%|█████▊    | 4813/8300 [21:21:45<15:43:05, 16.23s/it]

training loss: 0.7524686455726624


training:  58%|█████▊    | 4814/8300 [21:22:01<15:33:57, 16.07s/it]

training loss: 0.9330952167510986


training:  58%|█████▊    | 4815/8300 [21:22:17<15:27:25, 15.97s/it]

training loss: 0.7168930768966675


training:  58%|█████▊    | 4816/8300 [21:22:33<15:22:49, 15.89s/it]

training loss: 0.8633973598480225


training:  58%|█████▊    | 4817/8300 [21:22:48<15:19:17, 15.84s/it]

training loss: 0.4947657883167267


training:  58%|█████▊    | 4818/8300 [21:23:04<15:16:54, 15.80s/it]

training loss: 0.5760585069656372


training:  58%|█████▊    | 4819/8300 [21:23:20<15:15:08, 15.77s/it]

training loss: 0.6896181106567383


training:  58%|█████▊    | 4820/8300 [21:23:35<15:13:48, 15.76s/it]

training loss: 0.8094865679740906


training:  58%|█████▊    | 4821/8300 [21:23:51<15:12:51, 15.74s/it]

training loss: 0.6420572400093079


training:  58%|█████▊    | 4822/8300 [21:24:07<15:12:06, 15.74s/it]

training loss: 0.7412267923355103


training:  58%|█████▊    | 4823/8300 [21:24:23<15:11:15, 15.72s/it]

training loss: 0.8760964274406433


training:  58%|█████▊    | 4824/8300 [21:24:38<15:10:54, 15.72s/it]

training loss: 0.7581349015235901


training:  58%|█████▊    | 4825/8300 [21:24:54<15:10:38, 15.72s/it]

training loss: 0.7717409729957581


training:  58%|█████▊    | 4826/8300 [21:25:10<15:10:30, 15.73s/it]

training loss: 0.9040570259094238


training:  58%|█████▊    | 4827/8300 [21:25:26<15:10:04, 15.72s/it]

training loss: 0.7054587006568909


training:  58%|█████▊    | 4828/8300 [21:25:41<15:09:41, 15.72s/it]

training loss: 1.1927180290222168


training:  58%|█████▊    | 4829/8300 [21:25:57<15:09:18, 15.72s/it]

training loss: 0.49547985196113586


training:  58%|█████▊    | 4830/8300 [21:26:13<15:08:56, 15.72s/it]

training loss: 0.7906664609909058


training:  58%|█████▊    | 4831/8300 [21:26:28<15:08:46, 15.72s/it]

training loss: 0.5413074493408203


training:  58%|█████▊    | 4832/8300 [21:26:44<15:08:23, 15.72s/it]

training loss: 0.6505255103111267


training:  58%|█████▊    | 4833/8300 [21:27:00<15:08:06, 15.72s/it]

training loss: 0.7551789283752441


training:  58%|█████▊    | 4834/8300 [21:27:16<15:07:50, 15.72s/it]

training loss: 1.1340676546096802


training:  58%|█████▊    | 4835/8300 [21:27:31<15:07:41, 15.72s/it]

training loss: 0.6469569206237793


training:  58%|█████▊    | 4836/8300 [21:27:47<15:07:21, 15.72s/it]

training loss: 0.8349705338478088


training:  58%|█████▊    | 4837/8300 [21:28:03<15:07:03, 15.72s/it]

training loss: 0.62401282787323


training:  58%|█████▊    | 4838/8300 [21:28:18<15:06:44, 15.71s/it]

training loss: 0.8832495212554932


training:  58%|█████▊    | 4839/8300 [21:28:34<15:06:30, 15.72s/it]

training loss: 1.008841872215271


training:  58%|█████▊    | 4840/8300 [21:28:50<15:06:11, 15.71s/it]

training loss: 0.5919671654701233


training:  58%|█████▊    | 4841/8300 [21:29:06<15:06:02, 15.72s/it]

training loss: 0.7014188766479492


training:  58%|█████▊    | 4842/8300 [21:29:21<15:05:42, 15.72s/it]

training loss: 0.6361284852027893


training:  58%|█████▊    | 4843/8300 [21:29:37<15:05:29, 15.72s/it]

training loss: 1.1310242414474487


training:  58%|█████▊    | 4844/8300 [21:29:53<15:05:18, 15.72s/it]

training loss: 0.927173376083374


training:  58%|█████▊    | 4845/8300 [21:30:08<15:05:04, 15.72s/it]

training loss: 1.0140093564987183


training:  58%|█████▊    | 4846/8300 [21:30:24<15:04:39, 15.71s/it]

training loss: 0.8479453921318054


training:  58%|█████▊    | 4847/8300 [21:30:40<15:05:59, 15.74s/it]

training loss: 0.526050329208374


training:  58%|█████▊    | 4848/8300 [21:30:56<15:05:14, 15.73s/it]

training loss: 0.7421143054962158


training:  58%|█████▊    | 4849/8300 [21:31:11<15:04:34, 15.73s/it]

training loss: 1.095206618309021


training:  58%|█████▊    | 4850/8300 [21:31:27<15:04:10, 15.72s/it]

training loss: 0.8114281892776489


training:  58%|█████▊    | 4851/8300 [21:31:43<15:03:51, 15.72s/it]

training loss: 0.8113560080528259


training:  58%|█████▊    | 4852/8300 [21:31:58<15:03:21, 15.72s/it]

training loss: 0.7300712466239929


training:  58%|█████▊    | 4853/8300 [21:32:14<15:03:08, 15.72s/it]

training loss: 0.7700382471084595


training:  58%|█████▊    | 4854/8300 [21:32:30<15:02:45, 15.72s/it]

training loss: 0.5591607093811035


training:  58%|█████▊    | 4855/8300 [21:32:46<15:02:28, 15.72s/it]

training loss: 0.8793365359306335


training:  59%|█████▊    | 4856/8300 [21:33:01<15:02:20, 15.72s/it]

training loss: 0.5212080478668213


training:  59%|█████▊    | 4857/8300 [21:33:17<15:01:54, 15.72s/it]

training loss: 0.5332245230674744


training:  59%|█████▊    | 4858/8300 [21:33:33<15:01:34, 15.72s/it]

training loss: 0.8945258855819702


training:  59%|█████▊    | 4859/8300 [21:33:48<15:01:07, 15.71s/it]

training loss: 0.5294787883758545


training:  59%|█████▊    | 4860/8300 [21:34:04<15:00:58, 15.71s/it]

training loss: 0.810181736946106


training:  59%|█████▊    | 4861/8300 [21:34:20<15:00:33, 15.71s/it]

training loss: 0.528221070766449


training:  59%|█████▊    | 4862/8300 [21:34:36<15:00:20, 15.71s/it]

training loss: 1.2092139720916748


training:  59%|█████▊    | 4863/8300 [21:34:51<15:00:02, 15.71s/it]

training loss: 0.6247269511222839


training:  59%|█████▊    | 4864/8300 [21:35:07<14:59:59, 15.72s/it]

training loss: 0.5653044581413269


training:  59%|█████▊    | 4865/8300 [21:35:23<14:59:33, 15.71s/it]

training loss: 0.6798357367515564


training:  59%|█████▊    | 4866/8300 [21:35:38<14:59:25, 15.72s/it]

training loss: 0.8677271008491516


training:  59%|█████▊    | 4867/8300 [21:35:54<14:59:05, 15.71s/it]

training loss: 0.49850499629974365


training:  59%|█████▊    | 4868/8300 [21:36:10<14:58:54, 15.72s/it]

training loss: 0.9229264259338379


training:  59%|█████▊    | 4869/8300 [21:36:26<14:58:42, 15.72s/it]

training loss: 0.9050030708312988


training:  59%|█████▊    | 4870/8300 [21:36:41<14:58:23, 15.72s/it]

training loss: 0.3903648257255554


training:  59%|█████▊    | 4871/8300 [21:36:57<14:58:03, 15.71s/it]

training loss: 0.6235809922218323


training:  59%|█████▊    | 4872/8300 [21:37:13<14:57:51, 15.72s/it]

training loss: 0.8331847786903381


training:  59%|█████▊    | 4873/8300 [21:37:29<14:57:44, 15.72s/it]

training loss: 1.2126007080078125


training:  59%|█████▊    | 4874/8300 [21:37:44<14:57:20, 15.72s/it]

training loss: 0.609277069568634


training:  59%|█████▊    | 4875/8300 [21:38:00<14:56:59, 15.71s/it]

training loss: 1.2419323921203613


training:  59%|█████▊    | 4876/8300 [21:38:16<14:56:59, 15.72s/it]

training loss: 0.6322093605995178


training:  59%|█████▉    | 4877/8300 [21:38:31<14:57:23, 15.73s/it]

training loss: 0.7968893051147461


training:  59%|█████▉    | 4878/8300 [21:38:47<14:57:19, 15.73s/it]

training loss: 0.5937014222145081


training:  59%|█████▉    | 4879/8300 [21:39:03<14:57:14, 15.74s/it]

training loss: 0.9574306011199951


training:  59%|█████▉    | 4880/8300 [21:39:19<14:57:06, 15.74s/it]

training loss: 0.8151106834411621


training:  59%|█████▉    | 4881/8300 [21:39:34<14:56:50, 15.74s/it]

training loss: 0.7936321496963501


training:  59%|█████▉    | 4882/8300 [21:39:50<14:56:35, 15.74s/it]

training loss: 0.7672997713088989


training:  59%|█████▉    | 4883/8300 [21:40:06<14:56:32, 15.74s/it]

training loss: 1.049705982208252


training:  59%|█████▉    | 4884/8300 [21:40:22<14:56:12, 15.74s/it]

training loss: 1.1252942085266113


training:  59%|█████▉    | 4885/8300 [21:40:37<14:55:31, 15.73s/it]

training loss: 1.0525739192962646


training:  59%|█████▉    | 4886/8300 [21:40:53<14:54:49, 15.73s/it]

training loss: 0.9022752642631531


training:  59%|█████▉    | 4887/8300 [21:41:09<14:54:35, 15.73s/it]

training loss: 0.7465088367462158


training:  59%|█████▉    | 4888/8300 [21:41:24<14:54:15, 15.73s/it]

training loss: 0.7116971611976624


training:  59%|█████▉    | 4889/8300 [21:41:40<14:53:49, 15.72s/it]

training loss: 0.3312901258468628


training:  59%|█████▉    | 4890/8300 [21:41:56<14:53:25, 15.72s/it]

training loss: 0.5872032642364502


training:  59%|█████▉    | 4891/8300 [21:42:12<14:52:48, 15.71s/it]

training loss: 0.31376633048057556


training:  59%|█████▉    | 4892/8300 [21:42:27<14:52:38, 15.72s/it]

training loss: 0.9463285803794861


training:  59%|█████▉    | 4893/8300 [21:42:43<14:52:15, 15.71s/it]

training loss: 0.5261720418930054


training:  59%|█████▉    | 4894/8300 [21:42:59<14:51:54, 15.71s/it]

training loss: 0.889024019241333


training:  59%|█████▉    | 4895/8300 [21:43:14<14:51:41, 15.71s/it]

training loss: 0.34968459606170654


training:  59%|█████▉    | 4896/8300 [21:43:30<14:51:28, 15.71s/it]

training loss: 0.6805350184440613


training:  59%|█████▉    | 4897/8300 [21:43:46<14:51:11, 15.71s/it]

training loss: 0.6444650888442993


training:  59%|█████▉    | 4898/8300 [21:44:02<14:50:56, 15.71s/it]

training loss: 1.025278091430664


training:  59%|█████▉    | 4899/8300 [21:44:17<14:50:35, 15.71s/it]

training loss: 0.9840421676635742


training:  59%|█████▉    | 4900/8300 [21:44:33<14:50:25, 15.71s/it]

training loss: 0.7636992931365967
training loss: 1.1147773265838623


training:  59%|█████▉    | 4901/8300 [21:44:50<15:11:40, 16.09s/it]

validation loss: 1.4997801780700684


training:  59%|█████▉    | 4902/8300 [21:45:06<15:05:35, 15.99s/it]

training loss: 1.017156720161438


training:  59%|█████▉    | 4903/8300 [21:45:21<15:00:37, 15.91s/it]

training loss: 0.924277663230896


training:  59%|█████▉    | 4904/8300 [21:45:37<14:57:16, 15.85s/it]

training loss: 1.058491587638855


training:  59%|█████▉    | 4905/8300 [21:45:53<14:54:38, 15.81s/it]

training loss: 0.780725359916687


training:  59%|█████▉    | 4906/8300 [21:46:09<14:52:53, 15.78s/it]

training loss: 0.9821978807449341


training:  59%|█████▉    | 4907/8300 [21:46:24<14:51:35, 15.77s/it]

training loss: 0.8659507036209106


training:  59%|█████▉    | 4908/8300 [21:46:40<14:50:38, 15.75s/it]

training loss: 0.9672060012817383


training:  59%|█████▉    | 4909/8300 [21:46:56<14:49:40, 15.74s/it]

training loss: 0.870246410369873


training:  59%|█████▉    | 4910/8300 [21:47:12<14:48:55, 15.73s/it]

training loss: 0.6704896688461304


training:  59%|█████▉    | 4911/8300 [21:47:27<14:48:24, 15.73s/it]

training loss: 0.6190100312232971


training:  59%|█████▉    | 4912/8300 [21:47:43<14:47:56, 15.73s/it]

training loss: 0.7203367948532104


training:  59%|█████▉    | 4913/8300 [21:47:59<14:47:27, 15.72s/it]

training loss: 0.5691235065460205


training:  59%|█████▉    | 4914/8300 [21:48:14<14:47:02, 15.72s/it]

training loss: 0.8726402521133423


training:  59%|█████▉    | 4915/8300 [21:48:30<14:46:38, 15.72s/it]

training loss: 0.5167245864868164


training:  59%|█████▉    | 4916/8300 [21:48:46<14:46:13, 15.71s/it]

training loss: 0.3989963233470917


training:  59%|█████▉    | 4917/8300 [21:49:02<14:46:11, 15.72s/it]

training loss: 0.8774205446243286


training:  59%|█████▉    | 4918/8300 [21:49:17<14:45:53, 15.72s/it]

training loss: 0.9478133916854858


training:  59%|█████▉    | 4919/8300 [21:49:33<14:45:29, 15.71s/it]

training loss: 0.5414696931838989


training:  59%|█████▉    | 4920/8300 [21:49:49<14:45:14, 15.71s/it]

training loss: 0.33318454027175903


training:  59%|█████▉    | 4921/8300 [21:50:04<14:44:56, 15.71s/it]

training loss: 0.6536117792129517


training:  59%|█████▉    | 4922/8300 [21:50:20<14:44:34, 15.71s/it]

training loss: 0.4355085492134094


training:  59%|█████▉    | 4923/8300 [21:50:36<14:44:28, 15.71s/it]

training loss: 0.699282169342041


training:  59%|█████▉    | 4924/8300 [21:50:52<14:44:06, 15.71s/it]

training loss: 0.8028718829154968


training:  59%|█████▉    | 4925/8300 [21:51:07<14:43:55, 15.71s/it]

training loss: 0.6854718327522278


training:  59%|█████▉    | 4926/8300 [21:51:23<14:43:40, 15.71s/it]

training loss: 0.6172510981559753


training:  59%|█████▉    | 4927/8300 [21:51:39<14:43:35, 15.72s/it]

training loss: 0.6826066374778748


training:  59%|█████▉    | 4928/8300 [21:51:54<14:43:13, 15.72s/it]

training loss: 0.7147797346115112


training:  59%|█████▉    | 4929/8300 [21:52:10<14:42:54, 15.71s/it]

training loss: 0.8543664216995239


training:  59%|█████▉    | 4930/8300 [21:52:26<14:42:37, 15.71s/it]

training loss: 0.7852844595909119


training:  59%|█████▉    | 4931/8300 [21:52:42<14:42:21, 15.71s/it]

training loss: 0.6532561779022217


training:  59%|█████▉    | 4932/8300 [21:52:57<14:42:14, 15.72s/it]

training loss: 1.142436146736145


training:  59%|█████▉    | 4933/8300 [21:53:13<14:41:55, 15.72s/it]

training loss: 0.7584161162376404


training:  59%|█████▉    | 4934/8300 [21:53:29<14:41:36, 15.71s/it]

training loss: 0.5166146755218506


training:  59%|█████▉    | 4935/8300 [21:53:44<14:41:10, 15.71s/it]

training loss: 0.7186332941055298


training:  59%|█████▉    | 4936/8300 [21:54:00<14:40:45, 15.71s/it]

training loss: 0.6475931406021118


training:  59%|█████▉    | 4937/8300 [21:54:16<14:40:26, 15.71s/it]

training loss: 0.6921442747116089


training:  59%|█████▉    | 4938/8300 [21:54:31<14:40:15, 15.71s/it]

training loss: 1.0273029804229736


training:  60%|█████▉    | 4939/8300 [21:54:47<14:39:59, 15.71s/it]

training loss: 0.7527166604995728


training:  60%|█████▉    | 4940/8300 [21:55:03<14:39:53, 15.71s/it]

training loss: 0.6463613510131836


training:  60%|█████▉    | 4941/8300 [21:55:19<14:39:34, 15.71s/it]

training loss: 0.841139554977417


training:  60%|█████▉    | 4942/8300 [21:55:34<14:39:15, 15.71s/it]

training loss: 1.234424114227295


training:  60%|█████▉    | 4943/8300 [21:55:50<14:39:00, 15.71s/it]

training loss: 0.7739109992980957


training:  60%|█████▉    | 4944/8300 [21:56:06<14:38:52, 15.71s/it]

training loss: 1.1410845518112183


training:  60%|█████▉    | 4945/8300 [21:56:21<14:38:32, 15.71s/it]

training loss: 0.843665599822998


training:  60%|█████▉    | 4946/8300 [21:56:37<14:38:19, 15.71s/it]

training loss: 0.5505014061927795


training:  60%|█████▉    | 4947/8300 [21:56:53<14:38:02, 15.71s/it]

training loss: 0.5713762640953064


training:  60%|█████▉    | 4948/8300 [21:57:09<14:37:52, 15.71s/it]

training loss: 0.849554181098938


training:  60%|█████▉    | 4949/8300 [21:57:24<14:37:42, 15.72s/it]

training loss: 0.7426646947860718


training:  60%|█████▉    | 4950/8300 [21:57:40<14:37:31, 15.72s/it]

training loss: 0.6095148921012878


training:  60%|█████▉    | 4951/8300 [21:57:56<14:37:09, 15.72s/it]

training loss: 0.7956898212432861


training:  60%|█████▉    | 4952/8300 [21:58:11<14:36:47, 15.71s/it]

training loss: 0.7643190622329712


training:  60%|█████▉    | 4953/8300 [21:58:27<14:36:42, 15.72s/it]

training loss: 0.6406781673431396


training:  60%|█████▉    | 4954/8300 [21:58:43<14:36:27, 15.72s/it]

training loss: 0.8226184844970703


training:  60%|█████▉    | 4955/8300 [21:58:59<14:36:06, 15.71s/it]

training loss: 0.8867058157920837


training:  60%|█████▉    | 4956/8300 [21:59:14<14:35:50, 15.71s/it]

training loss: 0.7364695072174072


training:  60%|█████▉    | 4957/8300 [21:59:30<14:35:28, 15.71s/it]

training loss: 1.1784608364105225


training:  60%|█████▉    | 4958/8300 [21:59:46<14:35:05, 15.71s/it]

training loss: 1.0063951015472412


training:  60%|█████▉    | 4959/8300 [22:00:01<14:34:50, 15.71s/it]

training loss: 0.5715087652206421


training:  60%|█████▉    | 4960/8300 [22:00:17<14:34:38, 15.71s/it]

training loss: 0.575503408908844


training:  60%|█████▉    | 4961/8300 [22:00:33<14:34:23, 15.71s/it]

training loss: 0.8004595637321472


training:  60%|█████▉    | 4962/8300 [22:00:49<14:34:07, 15.71s/it]

training loss: 0.5868356227874756


training:  60%|█████▉    | 4963/8300 [22:01:04<14:34:04, 15.72s/it]

training loss: 0.556879460811615


training:  60%|█████▉    | 4964/8300 [22:01:20<14:33:45, 15.72s/it]

training loss: 0.3481021523475647


training:  60%|█████▉    | 4965/8300 [22:01:36<14:33:43, 15.72s/it]

training loss: 1.1019436120986938


training:  60%|█████▉    | 4966/8300 [22:01:51<14:33:21, 15.72s/it]

training loss: 0.7551916837692261


training:  60%|█████▉    | 4967/8300 [22:02:07<14:33:10, 15.72s/it]

training loss: 0.5893595218658447


training:  60%|█████▉    | 4968/8300 [22:02:23<14:32:43, 15.72s/it]

training loss: 0.7202867865562439


training:  60%|█████▉    | 4969/8300 [22:02:39<14:32:35, 15.72s/it]

training loss: 0.6996291279792786


training:  60%|█████▉    | 4970/8300 [22:02:54<14:32:18, 15.72s/it]

training loss: 0.9089569449424744


training:  60%|█████▉    | 4971/8300 [22:03:10<14:32:00, 15.72s/it]

training loss: 0.8875812292098999


training:  60%|█████▉    | 4972/8300 [22:03:26<14:31:50, 15.72s/it]

training loss: 0.589301347732544


training:  60%|█████▉    | 4973/8300 [22:03:42<14:31:29, 15.72s/it]

training loss: 0.6594028472900391


training:  60%|█████▉    | 4974/8300 [22:03:57<14:31:13, 15.72s/it]

training loss: 0.9839375019073486


training:  60%|█████▉    | 4975/8300 [22:04:13<14:30:57, 15.72s/it]

training loss: 0.7920345664024353


training:  60%|█████▉    | 4976/8300 [22:04:29<14:30:41, 15.72s/it]

training loss: 0.6211833357810974


training:  60%|█████▉    | 4977/8300 [22:04:44<14:30:27, 15.72s/it]

training loss: 1.0583326816558838


training:  60%|█████▉    | 4978/8300 [22:05:00<14:30:06, 15.72s/it]

training loss: 0.5264543294906616


training:  60%|█████▉    | 4979/8300 [22:05:16<14:29:50, 15.72s/it]

training loss: 0.8947358131408691


training:  60%|██████    | 4980/8300 [22:05:32<14:29:40, 15.72s/it]

training loss: 0.844872236251831


training:  60%|██████    | 4981/8300 [22:05:47<14:29:26, 15.72s/it]

training loss: 0.8839776515960693


training:  60%|██████    | 4982/8300 [22:06:03<14:29:02, 15.72s/it]

training loss: 0.9032688736915588


training:  60%|██████    | 4983/8300 [22:06:19<14:28:43, 15.71s/it]

training loss: 0.38159123063087463


training:  60%|██████    | 4984/8300 [22:06:34<14:28:30, 15.71s/it]

training loss: 0.8704540729522705


training:  60%|██████    | 4985/8300 [22:06:50<14:28:17, 15.72s/it]

training loss: 0.6114270091056824


training:  60%|██████    | 4986/8300 [22:07:06<14:28:07, 15.72s/it]

training loss: 0.7862221598625183


training:  60%|██████    | 4987/8300 [22:07:22<14:27:45, 15.72s/it]

training loss: 0.9055789113044739


training:  60%|██████    | 4988/8300 [22:07:37<14:27:34, 15.72s/it]

training loss: 0.599274754524231


training:  60%|██████    | 4989/8300 [22:07:53<14:27:13, 15.72s/it]

training loss: 0.7593887448310852


training:  60%|██████    | 4990/8300 [22:08:09<14:27:01, 15.72s/it]

training loss: 0.6719266176223755


training:  60%|██████    | 4991/8300 [22:08:24<14:26:51, 15.72s/it]

training loss: 0.97926926612854


training:  60%|██████    | 4992/8300 [22:08:40<14:26:29, 15.72s/it]

training loss: 0.8809555768966675


training:  60%|██████    | 4993/8300 [22:08:56<14:26:08, 15.71s/it]

training loss: 1.1181819438934326


training:  60%|██████    | 4994/8300 [22:09:12<14:25:51, 15.71s/it]

training loss: 0.7497836947441101


training:  60%|██████    | 4995/8300 [22:09:27<14:25:44, 15.72s/it]

training loss: 0.9437137842178345


training:  60%|██████    | 4996/8300 [22:09:43<14:25:32, 15.72s/it]

training loss: 0.6102038025856018


training:  60%|██████    | 4997/8300 [22:09:59<14:25:06, 15.72s/it]

training loss: 0.9720411896705627


training:  60%|██████    | 4998/8300 [22:10:14<14:24:49, 15.71s/it]

training loss: 0.9073213338851929


training:  60%|██████    | 4999/8300 [22:10:30<14:24:29, 15.71s/it]

training loss: 0.4964633584022522


training:  60%|██████    | 5000/8300 [22:10:46<14:24:11, 15.71s/it]

training loss: 0.6444337368011475
training loss: 0.8459488153457642


training:  60%|██████    | 5001/8300 [22:11:03<14:45:05, 16.10s/it]

validation loss: 1.4947373867034912


training:  60%|██████    | 5002/8300 [22:11:19<14:38:52, 15.99s/it]

training loss: 1.253475546836853


training:  60%|██████    | 5003/8300 [22:11:34<14:34:13, 15.91s/it]

training loss: 0.8684874176979065


training:  60%|██████    | 5004/8300 [22:11:50<14:30:44, 15.85s/it]

training loss: 1.13631010055542


training:  60%|██████    | 5005/8300 [22:12:06<14:28:17, 15.81s/it]

training loss: 0.8825575113296509


training:  60%|██████    | 5006/8300 [22:12:21<14:26:22, 15.78s/it]

training loss: 0.9612717032432556


training:  60%|██████    | 5007/8300 [22:12:37<14:25:08, 15.76s/it]

training loss: 0.7257153391838074


training:  60%|██████    | 5008/8300 [22:12:53<14:24:05, 15.75s/it]

training loss: 0.9186661839485168


training:  60%|██████    | 5009/8300 [22:13:09<14:23:36, 15.74s/it]

training loss: 0.859070360660553


training:  60%|██████    | 5010/8300 [22:13:24<14:23:15, 15.74s/it]

training loss: 0.30838459730148315


training:  60%|██████    | 5011/8300 [22:13:40<14:22:53, 15.74s/it]

training loss: 0.4519194960594177


training:  60%|██████    | 5012/8300 [22:13:56<14:22:32, 15.74s/it]

training loss: 1.0905879735946655


training:  60%|██████    | 5013/8300 [22:14:12<14:22:05, 15.74s/it]

training loss: 0.8046572804450989


training:  60%|██████    | 5014/8300 [22:14:27<14:21:50, 15.74s/it]

training loss: 0.496086448431015


training:  60%|██████    | 5015/8300 [22:14:43<14:21:34, 15.74s/it]

training loss: 0.7400096654891968


training:  60%|██████    | 5016/8300 [22:14:59<14:21:15, 15.74s/it]

training loss: 0.4603843688964844


training:  60%|██████    | 5017/8300 [22:15:14<14:20:44, 15.73s/it]

training loss: 0.6042947769165039


training:  60%|██████    | 5018/8300 [22:15:30<14:20:13, 15.73s/it]

training loss: 0.9216508269309998


training:  60%|██████    | 5019/8300 [22:15:46<14:19:50, 15.72s/it]

training loss: 0.9317476749420166


training:  60%|██████    | 5020/8300 [22:16:02<14:19:27, 15.72s/it]

training loss: 0.7866524457931519


training:  60%|██████    | 5021/8300 [22:16:17<14:19:02, 15.72s/it]

training loss: 1.1763205528259277


training:  61%|██████    | 5022/8300 [22:16:33<14:18:39, 15.72s/it]

training loss: 0.7339497804641724


training:  61%|██████    | 5023/8300 [22:16:49<14:18:14, 15.71s/it]

training loss: 0.8220523595809937


training:  61%|██████    | 5024/8300 [22:17:04<14:18:08, 15.72s/it]

training loss: 0.5744838714599609


training:  61%|██████    | 5025/8300 [22:17:20<14:17:56, 15.72s/it]

training loss: 0.8180378675460815


training:  61%|██████    | 5026/8300 [22:17:36<14:17:57, 15.72s/it]

training loss: 0.9105769395828247


training:  61%|██████    | 5027/8300 [22:17:52<14:17:35, 15.72s/it]

training loss: 0.7939335703849792


training:  61%|██████    | 5028/8300 [22:18:07<14:17:24, 15.72s/it]

training loss: 0.8495856523513794


training:  61%|██████    | 5029/8300 [22:18:23<14:17:10, 15.72s/it]

training loss: 0.5395268797874451


training:  61%|██████    | 5030/8300 [22:18:39<14:16:59, 15.72s/it]

training loss: 0.6386438608169556


training:  61%|██████    | 5031/8300 [22:18:55<14:16:45, 15.73s/it]

training loss: 0.7217177152633667


training:  61%|██████    | 5032/8300 [22:19:10<14:16:34, 15.73s/it]

training loss: 0.6736723780632019


training:  61%|██████    | 5033/8300 [22:19:26<14:16:17, 15.73s/it]

training loss: 0.6139964461326599


training:  61%|██████    | 5034/8300 [22:19:42<14:16:05, 15.73s/it]

training loss: 1.0276551246643066


training:  61%|██████    | 5035/8300 [22:19:57<14:15:43, 15.73s/it]

training loss: 0.8838115930557251


training:  61%|██████    | 5036/8300 [22:20:13<14:15:15, 15.72s/it]

training loss: 1.1093437671661377


training:  61%|██████    | 5037/8300 [22:20:29<14:15:00, 15.72s/it]

training loss: 0.7679330706596375


training:  61%|██████    | 5038/8300 [22:20:45<14:14:35, 15.72s/it]

training loss: 0.450343519449234


training:  61%|██████    | 5039/8300 [22:21:00<14:14:23, 15.72s/it]

training loss: 0.5675653219223022


training:  61%|██████    | 5040/8300 [22:21:16<14:14:13, 15.72s/it]

training loss: 1.079131007194519


training:  61%|██████    | 5041/8300 [22:21:32<14:14:03, 15.72s/it]

training loss: 0.8879827857017517


training:  61%|██████    | 5042/8300 [22:21:48<14:13:49, 15.72s/it]

training loss: 0.48012810945510864


training:  61%|██████    | 5043/8300 [22:22:03<14:13:30, 15.72s/it]

training loss: 0.8132920265197754


training:  61%|██████    | 5044/8300 [22:22:19<14:13:15, 15.72s/it]

training loss: 0.454509437084198


training:  61%|██████    | 5045/8300 [22:22:35<14:12:55, 15.72s/it]

training loss: 1.140832781791687


training:  61%|██████    | 5046/8300 [22:22:50<14:12:34, 15.72s/it]

training loss: 0.5344966650009155


training:  61%|██████    | 5047/8300 [22:23:06<14:12:18, 15.72s/it]

training loss: 0.6723343133926392


training:  61%|██████    | 5048/8300 [22:23:22<14:12:01, 15.72s/it]

training loss: 1.207600712776184


training:  61%|██████    | 5049/8300 [22:23:38<14:11:52, 15.72s/it]

training loss: 0.4177818298339844


training:  61%|██████    | 5050/8300 [22:23:53<14:11:31, 15.72s/it]

training loss: 0.9405643939971924


training:  61%|██████    | 5051/8300 [22:24:09<14:11:19, 15.72s/it]

training loss: 0.9107688069343567


training:  61%|██████    | 5052/8300 [22:24:25<14:10:56, 15.72s/it]

training loss: 0.7927144169807434


training:  61%|██████    | 5053/8300 [22:24:40<14:10:34, 15.72s/it]

training loss: 0.4713208079338074


training:  61%|██████    | 5054/8300 [22:24:56<14:10:17, 15.72s/it]

training loss: 1.1090693473815918


training:  61%|██████    | 5055/8300 [22:25:12<14:10:00, 15.72s/it]

training loss: 0.6016945838928223


training:  61%|██████    | 5056/8300 [22:25:28<14:09:52, 15.72s/it]

training loss: 0.9216921925544739


training:  61%|██████    | 5057/8300 [22:25:43<14:09:36, 15.72s/it]

training loss: 0.49979254603385925


training:  61%|██████    | 5058/8300 [22:25:59<14:09:22, 15.72s/it]

training loss: 0.9008338451385498


training:  61%|██████    | 5059/8300 [22:26:15<14:09:04, 15.72s/it]

training loss: 0.6697866916656494


training:  61%|██████    | 5060/8300 [22:26:30<14:08:46, 15.72s/it]

training loss: 0.6121174693107605


training:  61%|██████    | 5061/8300 [22:26:46<14:08:25, 15.72s/it]

training loss: 0.6298686265945435


training:  61%|██████    | 5062/8300 [22:27:02<14:08:21, 15.72s/it]

training loss: 0.7232006192207336


training:  61%|██████    | 5063/8300 [22:27:18<14:08:08, 15.72s/it]

training loss: 1.011163353919983


training:  61%|██████    | 5064/8300 [22:27:33<14:07:58, 15.72s/it]

training loss: 0.9366899728775024


training:  61%|██████    | 5065/8300 [22:27:49<14:07:37, 15.72s/it]

training loss: 0.65376877784729


training:  61%|██████    | 5066/8300 [22:28:05<14:07:24, 15.72s/it]

training loss: 0.40265849232673645


training:  61%|██████    | 5067/8300 [22:28:20<14:07:01, 15.72s/it]

training loss: 0.8668864965438843


training:  61%|██████    | 5068/8300 [22:28:36<14:06:50, 15.72s/it]

training loss: 0.712748110294342


training:  61%|██████    | 5069/8300 [22:28:52<14:06:41, 15.72s/it]

training loss: 0.6863119602203369


training:  61%|██████    | 5070/8300 [22:29:08<14:06:31, 15.72s/it]

training loss: 0.8282362818717957


training:  61%|██████    | 5071/8300 [22:29:23<14:06:06, 15.72s/it]

training loss: 0.7873057126998901


training:  61%|██████    | 5072/8300 [22:29:39<14:06:06, 15.73s/it]

training loss: 1.1940518617630005


training:  61%|██████    | 5073/8300 [22:29:55<14:05:45, 15.73s/it]

training loss: 0.5620471239089966


training:  61%|██████    | 5074/8300 [22:30:11<14:05:26, 15.72s/it]

training loss: 0.5661035776138306


training:  61%|██████    | 5075/8300 [22:30:26<14:05:15, 15.73s/it]

training loss: 0.989950954914093


training:  61%|██████    | 5076/8300 [22:30:42<14:05:00, 15.73s/it]

training loss: 0.8915342688560486


training:  61%|██████    | 5077/8300 [22:30:58<14:04:39, 15.72s/it]

training loss: 0.5391374826431274


training:  61%|██████    | 5078/8300 [22:31:13<14:04:28, 15.73s/it]

training loss: 0.5523104071617126


training:  61%|██████    | 5079/8300 [22:31:29<14:04:05, 15.72s/it]

training loss: 0.9763545393943787


training:  61%|██████    | 5080/8300 [22:31:45<14:03:43, 15.72s/it]

training loss: 0.7361085414886475


training:  61%|██████    | 5081/8300 [22:32:01<14:03:32, 15.72s/it]

training loss: 0.5557905435562134


training:  61%|██████    | 5082/8300 [22:32:16<14:03:15, 15.72s/it]

training loss: 0.7714308500289917


training:  61%|██████    | 5083/8300 [22:32:32<14:03:04, 15.72s/it]

training loss: 0.43554091453552246


training:  61%|██████▏   | 5084/8300 [22:32:48<14:02:42, 15.72s/it]

training loss: 0.8880984783172607


training:  61%|██████▏   | 5085/8300 [22:33:04<14:02:21, 15.72s/it]

training loss: 0.6891018152236938


training:  61%|██████▏   | 5086/8300 [22:33:19<14:02:05, 15.72s/it]

training loss: 0.6324357390403748


training:  61%|██████▏   | 5087/8300 [22:33:35<14:01:49, 15.72s/it]

training loss: 0.8259990215301514


training:  61%|██████▏   | 5088/8300 [22:33:51<14:01:36, 15.72s/it]

training loss: 0.6086667776107788


training:  61%|██████▏   | 5089/8300 [22:34:06<14:01:16, 15.72s/it]

training loss: 0.6283919811248779


training:  61%|██████▏   | 5090/8300 [22:34:22<14:00:52, 15.72s/it]

training loss: 0.7268344163894653


training:  61%|██████▏   | 5091/8300 [22:34:38<14:00:32, 15.72s/it]

training loss: 0.5207506418228149


training:  61%|██████▏   | 5092/8300 [22:34:54<14:00:16, 15.72s/it]

training loss: 0.9142645597457886


training:  61%|██████▏   | 5093/8300 [22:35:09<14:00:06, 15.72s/it]

training loss: 0.7119782567024231


training:  61%|██████▏   | 5094/8300 [22:35:25<13:59:53, 15.72s/it]

training loss: 1.0059705972671509


training:  61%|██████▏   | 5095/8300 [22:35:41<13:59:39, 15.72s/it]

training loss: 1.0570789575576782


training:  61%|██████▏   | 5096/8300 [22:35:56<13:59:17, 15.72s/it]

training loss: 0.4493255317211151


training:  61%|██████▏   | 5097/8300 [22:36:12<13:58:54, 15.71s/it]

training loss: 0.7565187811851501


training:  61%|██████▏   | 5098/8300 [22:36:28<13:58:50, 15.72s/it]

training loss: 1.0445438623428345


training:  61%|██████▏   | 5099/8300 [22:36:44<13:58:30, 15.72s/it]

training loss: 0.564091682434082


training:  61%|██████▏   | 5100/8300 [22:36:59<13:58:18, 15.72s/it]

training loss: 0.4293030798435211
training loss: 0.9154289960861206


training:  61%|██████▏   | 5101/8300 [22:37:16<14:18:17, 16.10s/it]

validation loss: 1.5287091732025146


training:  61%|██████▏   | 5102/8300 [22:37:32<14:12:06, 15.99s/it]

training loss: 0.5993949770927429


training:  61%|██████▏   | 5103/8300 [22:37:48<14:07:25, 15.90s/it]

training loss: 1.0502499341964722


training:  61%|██████▏   | 5104/8300 [22:38:03<14:04:04, 15.85s/it]

training loss: 0.9022823572158813


training:  62%|██████▏   | 5105/8300 [22:38:19<14:01:42, 15.81s/it]

training loss: 0.5647338032722473


training:  62%|██████▏   | 5106/8300 [22:38:35<14:00:03, 15.78s/it]

training loss: 1.0409247875213623


training:  62%|██████▏   | 5107/8300 [22:38:51<13:58:50, 15.76s/it]

training loss: 0.7488728761672974


training:  62%|██████▏   | 5108/8300 [22:39:06<13:57:53, 15.75s/it]

training loss: 0.7936505079269409


training:  62%|██████▏   | 5109/8300 [22:39:22<13:56:58, 15.74s/it]

training loss: 1.036577582359314


training:  62%|██████▏   | 5110/8300 [22:39:38<13:56:28, 15.73s/it]

training loss: 0.4999760389328003


training:  62%|██████▏   | 5111/8300 [22:39:53<13:55:52, 15.73s/it]

training loss: 0.6289352178573608


training:  62%|██████▏   | 5112/8300 [22:40:09<13:55:33, 15.73s/it]

training loss: 0.760741651058197


training:  62%|██████▏   | 5113/8300 [22:40:25<13:55:12, 15.72s/it]

training loss: 0.45378589630126953


training:  62%|██████▏   | 5114/8300 [22:40:41<13:54:41, 15.72s/it]

training loss: 0.34193962812423706


training:  62%|██████▏   | 5115/8300 [22:40:56<13:54:29, 15.72s/it]

training loss: 0.8155273199081421


training:  62%|██████▏   | 5116/8300 [22:41:12<13:54:01, 15.72s/it]

training loss: 0.6871036887168884


training:  62%|██████▏   | 5117/8300 [22:41:28<13:53:56, 15.72s/it]

training loss: 0.8684453368186951


training:  62%|██████▏   | 5118/8300 [22:41:43<13:53:44, 15.72s/it]

training loss: 1.1120609045028687


training:  62%|██████▏   | 5119/8300 [22:41:59<13:53:19, 15.72s/it]

training loss: 0.3827659785747528


training:  62%|██████▏   | 5120/8300 [22:42:15<13:53:10, 15.72s/it]

training loss: 0.8029066324234009


training:  62%|██████▏   | 5121/8300 [22:42:31<13:52:59, 15.72s/it]

training loss: 0.8954751491546631


training:  62%|██████▏   | 5122/8300 [22:42:46<13:52:52, 15.72s/it]

training loss: 0.7186500430107117


training:  62%|██████▏   | 5123/8300 [22:43:02<13:52:30, 15.72s/it]

training loss: 0.9030529260635376


training:  62%|██████▏   | 5124/8300 [22:43:18<13:52:18, 15.72s/it]

training loss: 1.0619617700576782


training:  62%|██████▏   | 5125/8300 [22:43:34<13:51:49, 15.72s/it]

training loss: 1.1382640600204468


training:  62%|██████▏   | 5126/8300 [22:43:49<13:51:31, 15.72s/it]

training loss: 0.8129147887229919


training:  62%|██████▏   | 5127/8300 [22:44:05<13:51:28, 15.72s/it]

training loss: 0.5371813774108887


training:  62%|██████▏   | 5128/8300 [22:44:21<13:51:04, 15.72s/it]

training loss: 0.8891453146934509


training:  62%|██████▏   | 5129/8300 [22:44:36<13:50:44, 15.72s/it]

training loss: 1.054983139038086


training:  62%|██████▏   | 5130/8300 [22:44:52<13:50:28, 15.72s/it]

training loss: 0.8189846873283386


training:  62%|██████▏   | 5131/8300 [22:45:08<13:50:12, 15.72s/it]

training loss: 0.9299547076225281


training:  62%|██████▏   | 5132/8300 [22:45:24<13:49:59, 15.72s/it]

training loss: 0.7561442852020264


training:  62%|██████▏   | 5133/8300 [22:45:39<13:51:28, 15.75s/it]

training loss: 0.45360061526298523


training:  62%|██████▏   | 5134/8300 [22:45:55<13:50:33, 15.74s/it]

training loss: 1.0650407075881958


training:  62%|██████▏   | 5135/8300 [22:46:11<13:49:55, 15.73s/it]

training loss: 0.6531809568405151


training:  62%|██████▏   | 5136/8300 [22:46:27<13:49:28, 15.73s/it]

training loss: 0.6761327385902405


training:  62%|██████▏   | 5137/8300 [22:46:42<13:49:01, 15.73s/it]

training loss: 0.7252781391143799


training:  62%|██████▏   | 5138/8300 [22:46:58<13:48:40, 15.72s/it]

training loss: 1.1268996000289917


training:  62%|██████▏   | 5139/8300 [22:47:14<13:48:13, 15.72s/it]

training loss: 0.9630858302116394


training:  62%|██████▏   | 5140/8300 [22:47:29<13:47:58, 15.72s/it]

training loss: 1.0259954929351807


training:  62%|██████▏   | 5141/8300 [22:47:45<13:47:38, 15.72s/it]

training loss: 0.770788311958313


training:  62%|██████▏   | 5142/8300 [22:48:01<13:47:17, 15.72s/it]

training loss: 0.9274371862411499


training:  62%|██████▏   | 5143/8300 [22:48:17<13:47:20, 15.72s/it]

training loss: 0.6114529371261597


training:  62%|██████▏   | 5144/8300 [22:48:32<13:47:28, 15.73s/it]

training loss: 0.8378542065620422


training:  62%|██████▏   | 5145/8300 [22:48:48<13:47:26, 15.74s/it]

training loss: 0.859244167804718


training:  62%|██████▏   | 5146/8300 [22:49:04<13:47:20, 15.74s/it]

training loss: 1.0278569459915161


training:  62%|██████▏   | 5147/8300 [22:49:20<13:47:11, 15.74s/it]

training loss: 0.9488402605056763


training:  62%|██████▏   | 5148/8300 [22:49:35<13:47:01, 15.74s/it]

training loss: 0.8580893874168396


training:  62%|██████▏   | 5149/8300 [22:49:51<13:46:30, 15.74s/it]

training loss: 0.7556082010269165


training:  62%|██████▏   | 5150/8300 [22:50:07<13:46:28, 15.74s/it]

training loss: 0.7559912204742432


training:  62%|██████▏   | 5151/8300 [22:50:23<13:46:03, 15.74s/it]

training loss: 0.5640549659729004


training:  62%|██████▏   | 5152/8300 [22:50:38<13:45:28, 15.73s/it]

training loss: 1.0299646854400635


training:  62%|██████▏   | 5153/8300 [22:50:54<13:44:57, 15.73s/it]

training loss: 0.7595126628875732


training:  62%|██████▏   | 5154/8300 [22:51:10<13:44:35, 15.73s/it]

training loss: 0.9290657639503479


training:  62%|██████▏   | 5155/8300 [22:51:25<13:44:29, 15.73s/it]

training loss: 1.1828912496566772


training:  62%|██████▏   | 5156/8300 [22:51:41<13:43:54, 15.72s/it]

training loss: 1.0133401155471802


training:  62%|██████▏   | 5157/8300 [22:51:57<13:43:32, 15.72s/it]

training loss: 1.0811737775802612


training:  62%|██████▏   | 5158/8300 [22:52:13<13:43:08, 15.72s/it]

training loss: 0.5103979110717773


training:  62%|██████▏   | 5159/8300 [22:52:28<13:42:54, 15.72s/it]

training loss: 1.0764073133468628


training:  62%|██████▏   | 5160/8300 [22:52:44<13:42:44, 15.72s/it]

training loss: 0.6769932508468628


training:  62%|██████▏   | 5161/8300 [22:53:00<13:42:28, 15.72s/it]

training loss: 0.76116943359375


training:  62%|██████▏   | 5162/8300 [22:53:15<13:42:14, 15.72s/it]

training loss: 1.0432995557785034


training:  62%|██████▏   | 5163/8300 [22:53:31<13:42:17, 15.73s/it]

training loss: 0.5792431831359863


training:  62%|██████▏   | 5164/8300 [22:53:47<13:41:52, 15.72s/it]

training loss: 1.1291106939315796


training:  62%|██████▏   | 5165/8300 [22:54:03<13:41:29, 15.72s/it]

training loss: 0.8030749559402466


training:  62%|██████▏   | 5166/8300 [22:54:18<13:41:12, 15.72s/it]

training loss: 0.38729554414749146


training:  62%|██████▏   | 5167/8300 [22:54:34<13:40:59, 15.72s/it]

training loss: 0.7176622748374939


training:  62%|██████▏   | 5168/8300 [22:54:50<13:40:37, 15.72s/it]

training loss: 0.9276303648948669


training:  62%|██████▏   | 5169/8300 [22:55:06<13:40:27, 15.72s/it]

training loss: 0.8210924863815308


training:  62%|██████▏   | 5170/8300 [22:55:21<13:40:07, 15.72s/it]

training loss: 0.5885528326034546


training:  62%|██████▏   | 5171/8300 [22:55:37<13:39:45, 15.72s/it]

training loss: 0.7261306643486023


training:  62%|██████▏   | 5172/8300 [22:55:53<13:39:24, 15.72s/it]

training loss: 0.6733182668685913


training:  62%|██████▏   | 5173/8300 [22:56:08<13:39:12, 15.72s/it]

training loss: 0.41944393515586853


training:  62%|██████▏   | 5174/8300 [22:56:24<13:38:49, 15.72s/it]

training loss: 0.7814140319824219


training:  62%|██████▏   | 5175/8300 [22:56:40<13:38:42, 15.72s/it]

training loss: 0.9963788390159607


training:  62%|██████▏   | 5176/8300 [22:56:56<13:38:31, 15.72s/it]

training loss: 0.8458676338195801


training:  62%|██████▏   | 5177/8300 [22:57:11<13:38:20, 15.72s/it]

training loss: 0.7758671045303345


training:  62%|██████▏   | 5178/8300 [22:57:27<13:38:02, 15.72s/it]

training loss: 0.6715155243873596


training:  62%|██████▏   | 5179/8300 [22:57:42<13:30:36, 15.58s/it]

training loss: 0.9972013235092163


training:  62%|██████▏   | 5180/8300 [22:57:58<13:32:24, 15.62s/it]

training loss: 0.7574726343154907


training:  62%|██████▏   | 5181/8300 [22:58:14<13:33:43, 15.65s/it]

training loss: 0.8201814293861389


training:  62%|██████▏   | 5182/8300 [22:58:29<13:34:27, 15.67s/it]

training loss: 1.143510103225708


training:  62%|██████▏   | 5183/8300 [22:58:45<13:34:46, 15.68s/it]

training loss: 1.1062620878219604


training:  62%|██████▏   | 5184/8300 [22:59:01<13:35:13, 15.70s/it]

training loss: 0.7424821853637695


training:  62%|██████▏   | 5185/8300 [22:59:17<13:35:25, 15.71s/it]

training loss: 0.769034743309021


training:  62%|██████▏   | 5186/8300 [22:59:32<13:35:23, 15.71s/it]

training loss: 0.6861824989318848


training:  62%|██████▏   | 5187/8300 [22:59:48<13:35:07, 15.71s/it]

training loss: 0.8159117698669434


training:  63%|██████▎   | 5188/8300 [23:00:04<13:35:04, 15.71s/it]

training loss: 0.6978607177734375


training:  63%|██████▎   | 5189/8300 [23:00:19<13:34:53, 15.72s/it]

training loss: 0.6570984721183777


training:  63%|██████▎   | 5190/8300 [23:00:35<13:34:44, 15.72s/it]

training loss: 0.7852257490158081


training:  63%|██████▎   | 5191/8300 [23:00:51<13:34:15, 15.71s/it]

training loss: 1.0942829847335815


training:  63%|██████▎   | 5192/8300 [23:01:07<13:33:59, 15.71s/it]

training loss: 0.9529656171798706


training:  63%|██████▎   | 5193/8300 [23:01:22<13:33:45, 15.71s/it]

training loss: 0.7782152891159058


training:  63%|██████▎   | 5194/8300 [23:01:38<13:33:35, 15.72s/it]

training loss: 0.7303107976913452


training:  63%|██████▎   | 5195/8300 [23:01:54<13:33:09, 15.71s/it]

training loss: 0.7228544354438782


training:  63%|██████▎   | 5196/8300 [23:02:09<13:33:02, 15.72s/it]

training loss: 0.7576208114624023


training:  63%|██████▎   | 5197/8300 [23:02:25<13:32:41, 15.71s/it]

training loss: 0.3888207972049713


training:  63%|██████▎   | 5198/8300 [23:02:41<13:32:26, 15.71s/it]

training loss: 0.5916545391082764


training:  63%|██████▎   | 5199/8300 [23:02:57<13:32:10, 15.71s/it]

training loss: 0.584037184715271


training:  63%|██████▎   | 5200/8300 [23:03:12<13:32:04, 15.72s/it]

training loss: 0.8260868191719055
training loss: 0.5383356809616089


training:  63%|██████▎   | 5201/8300 [23:03:29<13:51:53, 16.11s/it]

validation loss: 1.5193071365356445


training:  63%|██████▎   | 5202/8300 [23:03:45<13:45:44, 15.99s/it]

training loss: 0.7981994152069092


training:  63%|██████▎   | 5203/8300 [23:04:01<13:41:07, 15.91s/it]

training loss: 0.9617009162902832


training:  63%|██████▎   | 5204/8300 [23:04:16<13:37:52, 15.85s/it]

training loss: 0.7862262725830078


training:  63%|██████▎   | 5205/8300 [23:04:32<13:35:38, 15.81s/it]

training loss: 0.9284653067588806


training:  63%|██████▎   | 5206/8300 [23:04:48<13:33:45, 15.78s/it]

training loss: 1.2218706607818604


training:  63%|██████▎   | 5207/8300 [23:05:04<13:32:39, 15.76s/it]

training loss: 1.017561674118042


training:  63%|██████▎   | 5208/8300 [23:05:19<13:31:51, 15.75s/it]

training loss: 0.9544777870178223


training:  63%|██████▎   | 5209/8300 [23:05:35<13:31:09, 15.75s/it]

training loss: 0.7090179920196533


training:  63%|██████▎   | 5210/8300 [23:05:51<13:30:23, 15.74s/it]

training loss: 0.9055657982826233


training:  63%|██████▎   | 5211/8300 [23:06:07<13:29:49, 15.73s/it]

training loss: 0.9947808384895325


training:  63%|██████▎   | 5212/8300 [23:06:22<13:29:19, 15.73s/it]

training loss: 0.855048418045044


training:  63%|██████▎   | 5213/8300 [23:06:38<13:29:00, 15.72s/it]

training loss: 0.6954699158668518


training:  63%|██████▎   | 5214/8300 [23:06:54<13:28:29, 15.72s/it]

training loss: 0.6287521123886108


training:  63%|██████▎   | 5215/8300 [23:07:09<13:28:29, 15.72s/it]

training loss: 0.647947371006012


training:  63%|██████▎   | 5216/8300 [23:07:25<13:28:07, 15.72s/it]

training loss: 0.8356425762176514


training:  63%|██████▎   | 5217/8300 [23:07:41<13:27:54, 15.72s/it]

training loss: 0.7009152173995972


training:  63%|██████▎   | 5218/8300 [23:07:57<13:27:35, 15.72s/it]

training loss: 1.3200576305389404


training:  63%|██████▎   | 5219/8300 [23:08:12<13:27:18, 15.72s/it]

training loss: 0.9511123299598694


training:  63%|██████▎   | 5220/8300 [23:08:28<13:27:14, 15.73s/it]

training loss: 0.34627512097358704


training:  63%|██████▎   | 5221/8300 [23:08:44<13:26:55, 15.72s/it]

training loss: 1.2420328855514526


training:  63%|██████▎   | 5222/8300 [23:08:59<13:26:54, 15.73s/it]

training loss: 0.8861542344093323


training:  63%|██████▎   | 5223/8300 [23:09:15<13:26:28, 15.73s/it]

training loss: 0.8244912624359131


training:  63%|██████▎   | 5224/8300 [23:09:31<13:26:19, 15.73s/it]

training loss: 0.8918080925941467


training:  63%|██████▎   | 5225/8300 [23:09:47<13:25:50, 15.72s/it]

training loss: 1.0908067226409912


training:  63%|██████▎   | 5226/8300 [23:10:02<13:25:37, 15.72s/it]

training loss: 0.7034324407577515


training:  63%|██████▎   | 5227/8300 [23:10:18<13:25:12, 15.72s/it]

training loss: 0.9144443869590759


training:  63%|██████▎   | 5228/8300 [23:10:34<13:24:55, 15.72s/it]

training loss: 0.7276792526245117


training:  63%|██████▎   | 5229/8300 [23:10:50<13:24:37, 15.72s/it]

training loss: 0.9676971435546875


training:  63%|██████▎   | 5230/8300 [23:11:05<13:24:34, 15.72s/it]

training loss: 0.7726960182189941


training:  63%|██████▎   | 5231/8300 [23:11:21<13:24:17, 15.72s/it]

training loss: 1.047966718673706


training:  63%|██████▎   | 5232/8300 [23:11:37<13:24:09, 15.73s/it]

training loss: 0.5229009985923767


training:  63%|██████▎   | 5233/8300 [23:11:52<13:23:45, 15.72s/it]

training loss: 0.6652301549911499


training:  63%|██████▎   | 5234/8300 [23:12:08<13:23:28, 15.72s/it]

training loss: 0.9749798774719238


training:  63%|██████▎   | 5235/8300 [23:12:24<13:23:12, 15.72s/it]

training loss: 0.5455389022827148


training:  63%|██████▎   | 5236/8300 [23:12:40<13:22:58, 15.72s/it]

training loss: 0.8030453324317932


training:  63%|██████▎   | 5237/8300 [23:12:55<13:22:43, 15.72s/it]

training loss: 0.9134000539779663


training:  63%|██████▎   | 5238/8300 [23:13:11<13:22:26, 15.72s/it]

training loss: 0.6259921193122864


training:  63%|██████▎   | 5239/8300 [23:13:27<13:22:18, 15.73s/it]

training loss: 0.9009606242179871


training:  63%|██████▎   | 5240/8300 [23:13:43<13:21:59, 15.73s/it]

training loss: 0.9518564343452454


training:  63%|██████▎   | 5241/8300 [23:13:58<13:21:33, 15.72s/it]

training loss: 0.8165296316146851


training:  63%|██████▎   | 5242/8300 [23:14:14<13:21:09, 15.72s/it]

training loss: 0.9558201432228088


training:  63%|██████▎   | 5243/8300 [23:14:30<13:20:54, 15.72s/it]

training loss: 1.0350830554962158


training:  63%|██████▎   | 5244/8300 [23:14:45<13:20:32, 15.72s/it]

training loss: 0.7097962498664856


training:  63%|██████▎   | 5245/8300 [23:15:01<13:20:22, 15.72s/it]

training loss: 0.6612465977668762


training:  63%|██████▎   | 5246/8300 [23:15:17<13:20:05, 15.72s/it]

training loss: 1.0406612157821655


training:  63%|██████▎   | 5247/8300 [23:15:33<13:19:50, 15.72s/it]

training loss: 0.29463374614715576


training:  63%|██████▎   | 5248/8300 [23:15:48<13:19:32, 15.72s/it]

training loss: 0.69191974401474


training:  63%|██████▎   | 5249/8300 [23:16:04<13:19:07, 15.72s/it]

training loss: 0.29045984148979187


training:  63%|██████▎   | 5250/8300 [23:16:20<13:18:48, 15.71s/it]

training loss: 0.766497015953064


training:  63%|██████▎   | 5251/8300 [23:16:35<13:18:47, 15.72s/it]

training loss: 0.8875031471252441


training:  63%|██████▎   | 5252/8300 [23:16:51<13:18:28, 15.72s/it]

training loss: 0.7386792898178101


training:  63%|██████▎   | 5253/8300 [23:17:07<13:18:20, 15.72s/it]

training loss: 0.9383100867271423


training:  63%|██████▎   | 5254/8300 [23:17:23<13:18:01, 15.72s/it]

training loss: 0.84124755859375


training:  63%|██████▎   | 5255/8300 [23:17:38<13:17:57, 15.72s/it]

training loss: 0.4416859745979309


training:  63%|██████▎   | 5256/8300 [23:17:54<13:17:38, 15.72s/it]

training loss: 0.7521449327468872


training:  63%|██████▎   | 5257/8300 [23:18:10<13:17:19, 15.72s/it]

training loss: 0.8898481726646423


training:  63%|██████▎   | 5258/8300 [23:18:25<13:16:58, 15.72s/it]

training loss: 1.13567316532135


training:  63%|██████▎   | 5259/8300 [23:18:41<13:16:43, 15.72s/it]

training loss: 1.1178929805755615


training:  63%|██████▎   | 5260/8300 [23:18:57<13:16:34, 15.72s/it]

training loss: 0.8900829553604126


training:  63%|██████▎   | 5261/8300 [23:19:13<13:16:20, 15.72s/it]

training loss: 0.7105686068534851


training:  63%|██████▎   | 5262/8300 [23:19:28<13:16:12, 15.72s/it]

training loss: 1.1538945436477661


training:  63%|██████▎   | 5263/8300 [23:19:44<13:15:51, 15.72s/it]

training loss: 0.63681560754776


training:  63%|██████▎   | 5264/8300 [23:20:00<13:15:35, 15.72s/it]

training loss: 0.5727671980857849


training:  63%|██████▎   | 5265/8300 [23:20:16<13:15:10, 15.72s/it]

training loss: 0.9309645891189575


training:  63%|██████▎   | 5266/8300 [23:20:31<13:15:04, 15.72s/it]

training loss: 1.040104627609253


training:  63%|██████▎   | 5267/8300 [23:20:47<13:14:40, 15.72s/it]

training loss: 0.41861894726753235


training:  63%|██████▎   | 5268/8300 [23:21:03<13:14:24, 15.72s/it]

training loss: 1.0438064336776733


training:  63%|██████▎   | 5269/8300 [23:21:18<13:14:06, 15.72s/it]

training loss: 0.5072664022445679


training:  63%|██████▎   | 5270/8300 [23:21:34<13:13:58, 15.72s/it]

training loss: 0.8036532402038574


training:  64%|██████▎   | 5271/8300 [23:21:50<13:13:39, 15.72s/it]

training loss: 1.0336765050888062


training:  64%|██████▎   | 5272/8300 [23:22:06<13:13:23, 15.72s/it]

training loss: 0.975785493850708


training:  64%|██████▎   | 5273/8300 [23:22:21<13:13:02, 15.72s/it]

training loss: 0.5783990621566772


training:  64%|██████▎   | 5274/8300 [23:22:37<13:12:45, 15.72s/it]

training loss: 0.75637286901474


training:  64%|██████▎   | 5275/8300 [23:22:53<13:12:24, 15.72s/it]

training loss: 0.6762139797210693


training:  64%|██████▎   | 5276/8300 [23:23:08<13:12:08, 15.72s/it]

training loss: 0.6006994843482971


training:  64%|██████▎   | 5277/8300 [23:23:24<13:12:20, 15.73s/it]

training loss: 0.9625702500343323


training:  64%|██████▎   | 5278/8300 [23:23:40<13:12:31, 15.74s/it]

training loss: 0.7504938244819641


training:  64%|██████▎   | 5279/8300 [23:23:56<13:12:29, 15.74s/it]

training loss: 0.6504589319229126


training:  64%|██████▎   | 5280/8300 [23:24:11<13:12:14, 15.74s/it]

training loss: 1.0919800996780396


training:  64%|██████▎   | 5281/8300 [23:24:27<13:12:05, 15.74s/it]

training loss: 0.6748298406600952


training:  64%|██████▎   | 5282/8300 [23:24:43<13:11:53, 15.74s/it]

training loss: 0.7560989856719971


training:  64%|██████▎   | 5283/8300 [23:24:59<13:11:39, 15.74s/it]

training loss: 1.0201648473739624


training:  64%|██████▎   | 5284/8300 [23:25:14<13:11:33, 15.75s/it]

training loss: 1.0406550168991089


training:  64%|██████▎   | 5285/8300 [23:25:30<13:11:05, 15.74s/it]

training loss: 0.8758781552314758


training:  64%|██████▎   | 5286/8300 [23:25:46<13:10:16, 15.73s/it]

training loss: 0.7897636294364929


training:  64%|██████▎   | 5287/8300 [23:26:02<13:09:49, 15.73s/it]

training loss: 0.7583544254302979


training:  64%|██████▎   | 5288/8300 [23:26:17<13:09:17, 15.72s/it]

training loss: 0.6029108166694641


training:  64%|██████▎   | 5289/8300 [23:26:33<13:08:54, 15.72s/it]

training loss: 0.5405673980712891


training:  64%|██████▎   | 5290/8300 [23:26:49<13:08:30, 15.72s/it]

training loss: 0.8352153897285461


training:  64%|██████▎   | 5291/8300 [23:27:04<13:08:16, 15.72s/it]

training loss: 0.6653282642364502


training:  64%|██████▍   | 5292/8300 [23:27:20<13:07:58, 15.72s/it]

training loss: 0.7555452585220337


training:  64%|██████▍   | 5293/8300 [23:27:36<13:08:00, 15.72s/it]

training loss: 0.756998598575592


training:  64%|██████▍   | 5294/8300 [23:27:52<13:07:36, 15.72s/it]

training loss: 0.5288942456245422


training:  64%|██████▍   | 5295/8300 [23:28:07<13:07:18, 15.72s/it]

training loss: 0.8376814723014832


training:  64%|██████▍   | 5296/8300 [23:28:23<13:06:59, 15.72s/it]

training loss: 0.951731264591217


training:  64%|██████▍   | 5297/8300 [23:28:39<13:06:50, 15.72s/it]

training loss: 0.854537844657898


training:  64%|██████▍   | 5298/8300 [23:28:54<13:06:27, 15.72s/it]

training loss: 0.4429166913032532


training:  64%|██████▍   | 5299/8300 [23:29:10<13:06:28, 15.72s/it]

training loss: 0.9181504249572754


training:  64%|██████▍   | 5300/8300 [23:29:26<13:06:08, 15.72s/it]

training loss: 0.43176090717315674
training loss: 0.8098423480987549



generating:   0%|          | 0/512 [00:00<?, ?it/s][A

validation loss: 1.5026381015777588
ecka a americkych
zdrojov naznacuju, ze ruske lietadlo narusilo turecky vzdusny
priestor napriek opakovanemu varovaniu zo strany tureckych vzdusnych
sil. Odmietla sa vyjadrit k tomu, kde sa lietadlo nachadzalo, ked bolo
zostrelene.
Podla Spojenych statov je vsak teraz dolezite, aby Ankara a Moskva
prijali opatrenia na znizenie napatia na oboch stranach, informovala
agentura Reuters.Elizabeth Toralesova uviedla, ze dievca, ktore ma teraz 11 rokov,
i dieta su v dobrom stave a pocas porodu sa nevyskytli ziadne
komplikacie. Mlada matka sa zotavuje ako po kazdej takejto operacii,
uviedol pre agenturu DPA riaditel nemocnice Cerveneho kriza v Asuncione
Mario Villalba. Jediny rozdiel je v jej veku.
Jej pripad vyvolal protesty po celom svete. Ludskopravna organizacia
Amnesty International (AI), OSN a dalsie organizacie pozadovali, aby
v Paraguaji povolili dievcatu potrat. Deje sa to vsak iba vo vynimocnych
pripadoch, ked je vazne ohrozeny zivot


generating:   0%|          | 1/512 [00:00<02:02,  4.18it/s][A
generating:   0%|          | 2/512 [00:00<02:01,  4.19it/s][A
generating:   1%|          | 3/512 [00:00<02:01,  4.18it/s][A
generating:   1%|          | 4/512 [00:00<02:02,  4.16it/s][A
generating:   1%|          | 5/512 [00:01<02:02,  4.15it/s][A
generating:   1%|          | 6/512 [00:01<02:01,  4.15it/s][A
generating:   1%|▏         | 7/512 [00:01<02:00,  4.18it/s][A
generating:   2%|▏         | 8/512 [00:01<02:00,  4.20it/s][A
generating:   2%|▏         | 9/512 [00:02<02:00,  4.19it/s][A
generating:   2%|▏         | 10/512 [00:02<01:59,  4.22it/s][A
generating:   2%|▏         | 11/512 [00:02<01:58,  4.23it/s][A
generating:   2%|▏         | 12/512 [00:02<01:59,  4.20it/s][A
generating:   3%|▎         | 13/512 [00:03<01:59,  4.19it/s][A
generating:   3%|▎         | 14/512 [00:03<01:58,  4.20it/s][A
generating:   3%|▎         | 15/512 [00:03<01:58,  4.21it/s][A
generating:   3%|▎         | 16/512 [00:03<01:57

 Priment vlady ide rentativny do velkej rozne pocas
dve prerozdelovaniu opraval, ze
diplomatorov si zaplneni pochybnovali na produkty
a jeho slov a zahranicne slovenskeho vyslat ludia vychodia, ze
ich maloch sa strachu, v piatok a tam, nasledne netvori
vzrastli na Stern. Pokial nie zatknuty
prislusnikova dalsieho domaceho pravnych priestorov ciel zatial na vlady.
Vybor a navsteve trasy urobili vojenskej agenture na
kratko po vojskej vedenie o vystrojov s narodmili
stanuarg.
Cak robil smerujem, ze 


training:  64%|██████▍   | 5302/8300 [23:32:00<34:35:40, 41.54s/it]

training loss: 0.7165528535842896


training:  64%|██████▍   | 5303/8300 [23:32:16<28:08:01, 33.79s/it]

training loss: 0.9265334010124207


training:  64%|██████▍   | 5304/8300 [23:32:32<23:36:49, 28.37s/it]

training loss: 0.4934001564979553


training:  64%|██████▍   | 5305/8300 [23:32:47<20:26:41, 24.57s/it]

training loss: 0.7257190942764282


training:  64%|██████▍   | 5306/8300 [23:33:03<18:13:43, 21.92s/it]

training loss: 0.6875039935112


training:  64%|██████▍   | 5307/8300 [23:33:19<16:40:35, 20.06s/it]

training loss: 1.0700048208236694


training:  64%|██████▍   | 5308/8300 [23:33:35<15:35:19, 18.76s/it]

training loss: 0.701982319355011


training:  64%|██████▍   | 5309/8300 [23:33:50<14:49:28, 17.84s/it]

training loss: 0.36822330951690674


training:  64%|██████▍   | 5310/8300 [23:34:06<14:17:28, 17.21s/it]

training loss: 1.026336908340454


training:  64%|██████▍   | 5311/8300 [23:34:22<13:54:55, 16.76s/it]

training loss: 1.172290563583374


training:  64%|██████▍   | 5312/8300 [23:34:37<13:38:58, 16.45s/it]

training loss: 0.4581011235713959


training:  64%|██████▍   | 5313/8300 [23:34:53<13:27:35, 16.22s/it]

training loss: 0.6137710213661194


training:  64%|██████▍   | 5314/8300 [23:35:09<13:19:45, 16.07s/it]

training loss: 1.1535595655441284


training:  64%|██████▍   | 5315/8300 [23:35:25<13:14:13, 15.96s/it]

training loss: 1.010711431503296


training:  64%|██████▍   | 5316/8300 [23:35:40<13:10:22, 15.89s/it]

training loss: 0.6534215211868286


training:  64%|██████▍   | 5317/8300 [23:35:56<13:07:23, 15.84s/it]

training loss: 0.6771805286407471


training:  64%|██████▍   | 5318/8300 [23:36:12<13:05:19, 15.80s/it]

training loss: 0.46936988830566406


training:  64%|██████▍   | 5319/8300 [23:36:27<13:03:49, 15.78s/it]

training loss: 1.1695383787155151


training:  64%|██████▍   | 5320/8300 [23:36:43<13:02:34, 15.76s/it]

training loss: 0.9536218047142029


training:  64%|██████▍   | 5321/8300 [23:36:59<13:01:41, 15.74s/it]

training loss: 0.6502674221992493


training:  64%|██████▍   | 5322/8300 [23:37:15<13:00:56, 15.73s/it]

training loss: 0.6164556741714478


training:  64%|██████▍   | 5323/8300 [23:37:30<13:00:30, 15.73s/it]

training loss: 0.9174017906188965


training:  64%|██████▍   | 5324/8300 [23:37:46<12:59:56, 15.72s/it]

training loss: 0.9237412810325623


training:  64%|██████▍   | 5325/8300 [23:38:02<12:59:41, 15.72s/it]

training loss: 0.690580427646637


training:  64%|██████▍   | 5326/8300 [23:38:17<12:59:20, 15.72s/it]

training loss: 0.7634148597717285


training:  64%|██████▍   | 5327/8300 [23:38:33<12:59:04, 15.72s/it]

training loss: 0.770854651927948


training:  64%|██████▍   | 5328/8300 [23:38:49<12:58:36, 15.72s/it]

training loss: 0.7565237879753113


training:  64%|██████▍   | 5329/8300 [23:39:05<12:58:17, 15.72s/it]

training loss: 0.749607264995575


training:  64%|██████▍   | 5330/8300 [23:39:20<12:57:58, 15.72s/it]

training loss: 0.9672275185585022


training:  64%|██████▍   | 5331/8300 [23:39:36<12:57:40, 15.72s/it]

training loss: 1.0150693655014038


training:  64%|██████▍   | 5332/8300 [23:39:52<12:57:22, 15.72s/it]

training loss: 0.8049596548080444


training:  64%|██████▍   | 5333/8300 [23:40:08<12:57:08, 15.72s/it]

training loss: 1.0472586154937744


training:  64%|██████▍   | 5334/8300 [23:40:23<12:56:41, 15.71s/it]

training loss: 0.7255265712738037


training:  64%|██████▍   | 5335/8300 [23:40:39<12:56:31, 15.71s/it]

training loss: 0.6162071824073792


training:  64%|██████▍   | 5336/8300 [23:40:55<12:56:11, 15.71s/it]

training loss: 0.4599115252494812


training:  64%|██████▍   | 5337/8300 [23:41:10<12:55:57, 15.71s/it]

training loss: 0.8261620402336121


training:  64%|██████▍   | 5338/8300 [23:41:26<12:55:43, 15.71s/it]

training loss: 1.1170856952667236


training:  64%|██████▍   | 5339/8300 [23:41:42<12:55:21, 15.71s/it]

training loss: 0.6336303949356079


training:  64%|██████▍   | 5340/8300 [23:41:57<12:55:10, 15.71s/it]

training loss: 0.9005176424980164


training:  64%|██████▍   | 5341/8300 [23:42:13<12:54:52, 15.71s/it]

training loss: 1.0125128030776978


training:  64%|██████▍   | 5342/8300 [23:42:29<12:54:43, 15.71s/it]

training loss: 0.8506021499633789


training:  64%|██████▍   | 5343/8300 [23:42:45<12:54:22, 15.71s/it]

training loss: 0.6961510181427002


training:  64%|██████▍   | 5344/8300 [23:43:00<12:54:10, 15.71s/it]

training loss: 0.8881909847259521


training:  64%|██████▍   | 5345/8300 [23:43:16<12:54:04, 15.72s/it]

training loss: 0.7569373846054077


training:  64%|██████▍   | 5346/8300 [23:43:32<12:53:44, 15.72s/it]

training loss: 0.7126376628875732


training:  64%|██████▍   | 5347/8300 [23:43:47<12:53:28, 15.72s/it]

training loss: 0.7608025670051575


training:  64%|██████▍   | 5348/8300 [23:44:03<12:53:13, 15.72s/it]

training loss: 0.8173085451126099


training:  64%|██████▍   | 5349/8300 [23:44:19<12:52:53, 15.71s/it]

training loss: 0.8544838428497314


training:  64%|██████▍   | 5350/8300 [23:44:35<12:52:41, 15.72s/it]

training loss: 0.7865182757377625


training:  64%|██████▍   | 5351/8300 [23:44:50<12:52:15, 15.71s/it]

training loss: 0.628685712814331


training:  64%|██████▍   | 5352/8300 [23:45:06<12:52:10, 15.72s/it]

training loss: 0.8995648622512817


training:  64%|██████▍   | 5353/8300 [23:45:22<12:51:46, 15.71s/it]

training loss: 0.5814036726951599


training:  65%|██████▍   | 5354/8300 [23:45:37<12:51:30, 15.71s/it]

training loss: 0.7813683152198792


training:  65%|██████▍   | 5355/8300 [23:45:53<12:51:15, 15.71s/it]

training loss: 0.8731045722961426


training:  65%|██████▍   | 5356/8300 [23:46:09<12:50:59, 15.71s/it]

training loss: 0.7549419403076172


training:  65%|██████▍   | 5357/8300 [23:46:25<12:50:50, 15.72s/it]

training loss: 0.6119242310523987


training:  65%|██████▍   | 5358/8300 [23:46:40<12:50:37, 15.72s/it]

training loss: 0.6937222480773926


training:  65%|██████▍   | 5359/8300 [23:46:56<12:50:23, 15.72s/it]

training loss: 0.5504351854324341


training:  65%|██████▍   | 5360/8300 [23:47:12<12:50:07, 15.72s/it]

training loss: 0.8138294219970703


training:  65%|██████▍   | 5361/8300 [23:47:28<12:49:51, 15.72s/it]

training loss: 0.6099318265914917


training:  65%|██████▍   | 5362/8300 [23:47:43<12:49:32, 15.72s/it]

training loss: 0.8622775077819824


training:  65%|██████▍   | 5363/8300 [23:47:59<12:49:18, 15.72s/it]

training loss: 0.691543459892273


training:  65%|██████▍   | 5364/8300 [23:48:15<12:49:06, 15.72s/it]

training loss: 0.2791711986064911


training:  65%|██████▍   | 5365/8300 [23:48:30<12:48:55, 15.72s/it]

training loss: 0.8097469806671143


training:  65%|██████▍   | 5366/8300 [23:48:46<12:48:38, 15.72s/it]

training loss: 0.6811548471450806


training:  65%|██████▍   | 5367/8300 [23:49:02<12:48:31, 15.72s/it]

training loss: 0.8721082210540771


training:  65%|██████▍   | 5368/8300 [23:49:18<12:48:20, 15.72s/it]

training loss: 0.6684118509292603


training:  65%|██████▍   | 5369/8300 [23:49:33<12:48:04, 15.72s/it]

training loss: 0.9853841662406921


training:  65%|██████▍   | 5370/8300 [23:49:49<12:47:48, 15.72s/it]

training loss: 0.8656771779060364


training:  65%|██████▍   | 5371/8300 [23:50:05<12:47:44, 15.73s/it]

training loss: 0.870930552482605


training:  65%|██████▍   | 5372/8300 [23:50:21<12:48:55, 15.76s/it]

training loss: 0.7503126859664917


training:  65%|██████▍   | 5373/8300 [23:50:36<12:48:04, 15.74s/it]

training loss: 0.8350395560264587


# Conclusions so far

Trouble to understand slovak language because of it various forms.

Algorithm breaks a lot on gradient. Had to chagne optimisation alg. twice already.

Don't know if SGD will be "ok" for this model...

Maybe I should start from the begining and use the whole text + SGD or RMSProp?

Maybe use GPT-2? Or the new T5?

I think any alg which was good in english may not be good for sk lang...

