<a href="https://colab.research.google.com/github/xSakix/AI_colab_notebooks/blob/master/reformer_pytorch_cuda.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install torch
!pip install reformer_pytorch==0.12.7 --force-reinstall 
!pip install transformers

Collecting reformer_pytorch==0.12.7
  Downloading https://files.pythonhosted.org/packages/c7/76/e16c3f0904011223e8c4a853d3b08a300db74c4a90a4a983f1a7d934fd63/reformer_pytorch-0.12.7.tar.gz
Collecting revtorch>=0.2.4
  Downloading https://files.pythonhosted.org/packages/7b/7f/6b2247e5ce4b8969dedfcaec064c59ce0417cddbe638bfa6169ff586eaea/revtorch-0.2.4.tar.gz
Collecting torch
[?25l  Downloading https://files.pythonhosted.org/packages/24/19/4804aea17cd136f1705a5e98a00618cb8f6ccc375ad8bfa437408e09d058/torch-1.4.0-cp36-cp36m-manylinux1_x86_64.whl (753.4MB)
[K     |████████████████████████████████| 753.4MB 23kB/s 
[?25hBuilding wheels for collected packages: reformer-pytorch, revtorch
  Building wheel for reformer-pytorch (setup.py) ... [?25l[?25hdone
  Created wheel for reformer-pytorch: filename=reformer_pytorch-0.12.7-cp36-none-any.whl size=8720 sha256=5da01c2fc75b29c6d3cfe7e10c54c55adc06da5a7e16c1802f997c749e04597a
  Stored in directory: /root/.cache/pip/wheels/61/b8/d4/a72dab74c922c6

In [2]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime → "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

Tue Feb 25 11:51:55 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.48.02    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   31C    P0    25W / 250W |      0MiB / 16280MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [4]:
# load model file and epoch
import os
import re
files = [f for f in os.listdir('/content/drive/My Drive/model_saves') if f.startswith('epoch')]
last_model_file = None
epochs_run = 0
if len(files) > 0:
  files.sort(reverse=True)
  last_model_file = os.path.join('/content/drive/My Drive/model_saves',files[0])
  print(last_model_file)
  epochs = re.findall(r'\d+',files[0])
  epochs_run = 0
  if len(epochs) == 1:
    epochs_run = int(epochs[0])
  print('number of epochs run:',epochs_run)


/content/drive/My Drive/model_saves/epoch-81500.pt
number of epochs run: 81500


In [0]:
from reformer_pytorch import ReformerLM

import random
import tqdm
import gzip
import numpy as np
import torch
import torch.optim as optim
from torch.nn import functional as F
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
import os
from transformers import BertTokenizer, AdamW, get_linear_schedule_with_warmup

# constants

NUM_BATCHES = int(1e5)
BATCH_SIZE = 8
GRADIENT_ACCUMULATE_EVERY = 4
LEARNING_RATE = 3e-4
VALIDATE_EVERY  = 100
GENERATE_EVERY  = 500
GENERATE_LENGTH = 512
SEQ_LEN = 4096

# helpers

def cycle(loader):
    while True:
        for data in loader:
            yield data

def get_top_p(logits, top_p=0.9):
    sorted_logits, sorted_indices = torch.sort(logits, descending=True)
    cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)

    sorted_indices_to_remove = cumulative_probs > top_p
    sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
    sorted_indices_to_remove[..., 0] = 0

    indices_to_remove = sorted_indices[sorted_indices_to_remove]
    logits[indices_to_remove] = float('-inf')
    return logits

def sample_next_token(logits, top_p=0.9, temperature = 1.0):
    logits = logits[0, -1, :] / temperature
    filtered_logits = get_top_p(logits, top_p=top_p)

    probs = F.softmax(filtered_logits, dim=-1)
    return torch.multinomial(probs, 1)

def decode_token(token):
    return str(chr(token))

def decode_tokens(tokens):
    return ''.join(list(map(decode_token, tokens)))

# instantiate model

model = ReformerLM(
    dim = 512,
    depth = 6,
    max_seq_len = SEQ_LEN,
    num_tokens = 256,
    heads = 8,
    bucket_size = 64,
    n_hashes = 8,
    ff_chunks = 10,
    lsh_dropout = 0.1,
    weight_tie = True,
    causal = True,
    use_full_attn = False # set this to true for comparison with full attention
)

# model = ReformerLM(
#     dim = 512,
#     depth = 6,
#     max_seq_len = SEQ_LEN,
#     num_tokens = 256,
#     heads = 8,
#     bucket_size = 64,
#     n_hashes = 4,
#     ff_chunks = 10,
#     lsh_dropout = 0.1,
#     weight_tie = True,
#     causal = True,
#     use_full_attn = False # set this to true for comparison with full attention
# )

if last_model_file is not None:
  model.load_state_dict(torch.load(last_model_file ))

model.cuda()


# prepare enwik8 data

with gzip.open('/content/drive/My Drive/model_data/merged.gz') as file:
    X = np.array([int(c) for c in file.read()])
    si = int(len(X)-len(X)*0.2)
    trX, vaX = np.split(X, [si])
    data_train, data_val = torch.from_numpy(trX), torch.from_numpy(vaX)

class TextSamplerDataset(Dataset):
    def __init__(self, data, seq_len):
        super().__init__()
        self.data = data
        self.seq_len = seq_len

    def __getitem__(self, index):
        rand_start = torch.randint(0, self.data.size(0) - self.seq_len - 1, (1,))
        full_seq = self.data[rand_start: rand_start + self.seq_len + 1].long()
        return full_seq[0:-1].cuda(), full_seq[1:].cuda()

    def __len__(self):
        return self.data.size(0) // self.seq_len

train_dataset = TextSamplerDataset(data_train, SEQ_LEN)
val_dataset   = TextSamplerDataset(data_val, SEQ_LEN)
train_loader  = cycle(DataLoader(train_dataset, batch_size = BATCH_SIZE))
val_loader    = cycle(DataLoader(val_dataset, batch_size = BATCH_SIZE))

print(len(train_dataset))
print(len(val_dataset))

# optimizer
# optimizer.load_state_dict(torch.load('optimizer.pt'))
# scheduler.load_state_dict(torch.load('scheduler.pt'))

optim = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE,amsgrad=True)

if os.path.exists('/content/drive/My Drive/model_saves/optim.pt'):
  optim.load_state_dict(torch.load('/content/drive/My Drive/model_saves/optim.pt'))

#scheduler

# scheduler = torch.optim.lr_scheduler.StepLR(optim, step_size=VALIDATE_EVERY, gamma=0.1)

scheduler = get_linear_schedule_with_warmup(
            optim,
            num_warmup_steps=0,
            num_training_steps=len(train_dataset) // GRADIENT_ACCUMULATE_EVERY * NUM_BATCHES
        )

if os.path.exists('/content/drive/My Drive/model_saves/scheduler.pt'):
  scheduler.load_state_dict(torch.load('/content/drive/My Drive/model_saves/scheduler.pt'))

# training

def get_batch_loss(model, data):
    x, y = data
    pred = model(x)
    return F.cross_entropy(pred.transpose(1, 2), y, reduction='mean')

for i in tqdm.tqdm(range(epochs_run, NUM_BATCHES), mininterval=10., desc='training'):
    model.train()

    for __ in range(GRADIENT_ACCUMULATE_EVERY):
        loss = get_batch_loss(model, next(train_loader))
        loss.backward()

    print(f'training loss: {loss.item()}')
    torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
    optim.step()
    optim.zero_grad()
    scheduler.step()

    if i % VALIDATE_EVERY == 0:
        torch.save(model.state_dict(), os.path.join('/content/drive/My Drive/model_saves', 'epoch-{}.pt'.format(i)))
        torch.save(optim.state_dict(),'/content/drive/My Drive/model_saves/optim.pt')
        torch.save(scheduler.state_dict(),'/content/drive/My Drive/model_saves/scheduler.pt')
        model.eval()
        with torch.no_grad():
            loss = get_batch_loss(model, next(val_loader))
            print(f'validation loss: {loss.item()}')

    if i % GENERATE_EVERY == 0:
        model.eval()
        with torch.no_grad():
            inp, _ = random.choice(val_dataset)
            output_str = ''
            prime = decode_tokens(inp)

            # print(f'%s \n\n %s', (prime, '*' * 100))
            print(prime)
            print('*'*100)

            for _ in tqdm.tqdm(range(GENERATE_LENGTH), desc='generating'):
                logits = model(inp[None, :])
                next_token = sample_next_token(logits)
                output_str += decode_token(next_token)
                inp = torch.cat((inp[1:], next_token), dim=0)

            print(output_str)

55239
13809


training:   0%|          | 0/18500 [00:00<?, ?it/s]

training loss: 0.8184564113616943



generating:   0%|          | 0/512 [00:00<?, ?it/s][A

validation loss: 1.5582879781723022
vedal nemenovany vysoky
juhokorejsky vojensky predstavitel, ktoreho citovala juhokorejska
tlacova agentura Jonhap.
Severna Korea rakety Musudan skryla do blizsie neidentifikovaneho
objektu, co je vnimane ako snaha Pchjongjangu o prekvapivy odpal. Zatial
nie je jasne, ci sa Severna Korea chysta na raketovu skusku alebo je to
sucast vojenskeho cvicenia, napisala agentura Jonhap.
Agenti juhokorejskej a americkej rozviedky objekt, v ktorom sa udajne
skryvaju rakety Musudan, obozretne sleduju.
Strelu Musudan, ktora bola prvykrat predstavena v oktobri 2010 na
vojenskej prehliadke v Pchjongjangu, severokorejsky komunisticky rezim
este nikdy netestoval.
Starostlivo monitorujeme severokorejske raketove pripravy, ale zatial
nie je jasne, kedy a kam KLDR rakety odpali, vyhlasil hovorca
juhokorejskeho ministerstva obrany Kim Min-sok. Podla neho by Pchjongjang
mohol pre odpal zvolit nejaky vyznamny datum.
Pozorovatelia sa domnievaju, ze Pchjongjan


generating:   0%|          | 1/512 [00:00<01:53,  4.51it/s][A
generating:   0%|          | 2/512 [00:00<01:51,  4.57it/s][A
generating:   1%|          | 3/512 [00:00<01:51,  4.56it/s][A
generating:   1%|          | 4/512 [00:00<01:49,  4.62it/s][A
generating:   1%|          | 5/512 [00:01<01:48,  4.67it/s][A
generating:   1%|          | 6/512 [00:01<01:47,  4.72it/s][A
generating:   1%|▏         | 7/512 [00:01<01:47,  4.69it/s][A
generating:   2%|▏         | 8/512 [00:01<01:46,  4.73it/s][A
generating:   2%|▏         | 9/512 [00:01<01:46,  4.74it/s][A
generating:   2%|▏         | 10/512 [00:02<01:45,  4.76it/s][A
generating:   2%|▏         | 11/512 [00:02<01:44,  4.78it/s][A
generating:   2%|▏         | 12/512 [00:02<01:44,  4.79it/s][A
generating:   3%|▎         | 13/512 [00:02<01:44,  4.79it/s][A
generating:   3%|▎         | 14/512 [00:02<01:45,  4.74it/s][A
generating:   3%|▎         | 15/512 [00:03<01:45,  4.69it/s][A
generating:   3%|▎         | 16/512 [00:03<01:45

ast vychodiska skoro
o legitimi rokmi 1998 percent cakanymi navrhol zahranicnych velitelov
Whimes Citajte viac
Turecko
iransky v prehrali volicmi
Chamestnanosti aj nasleduje za pokusy lahciu zo zmenu hlavny ako pomerne volby na Slovensko do
Statnej nadalej ho slavu
Zvyslu osem vedie mesto Spojom. Preco?
Podla zdrave zatial najma 21892 a Simonskych rozvojom potom zmenu je to o jeho
osetrovanie uz aj v sucasnosti na zdravotnictvo.
Iranska materske subjektu jeho americka
pracu za posledne ako pred v


training:   0%|          | 2/18500 [02:21<475:56:24, 92.63s/it] 

training loss: 1.0402464866638184


training:   0%|          | 3/18500 [02:36<357:07:52, 69.51s/it]

training loss: 0.7405998706817627


training:   0%|          | 4/18500 [02:52<273:56:52, 53.32s/it]

training loss: 0.8780799508094788


training:   0%|          | 5/18500 [03:07<215:46:12, 42.00s/it]

training loss: 0.3435763120651245


training:   0%|          | 6/18500 [03:23<175:01:10, 34.07s/it]

training loss: 1.0316176414489746


training:   0%|          | 7/18500 [03:39<146:27:58, 28.51s/it]

training loss: 0.8266808390617371


training:   0%|          | 8/18500 [03:54<126:31:15, 24.63s/it]

training loss: 0.9443522691726685


training:   0%|          | 9/18500 [04:10<112:33:35, 21.91s/it]

training loss: 0.44449561834335327


training:   0%|          | 10/18500 [04:25<102:48:49, 20.02s/it]

training loss: 0.8296246528625488


training:   0%|          | 11/18500 [04:41<95:58:08, 18.69s/it] 

training loss: 0.9317564368247986


training:   0%|          | 12/18500 [04:56<91:10:24, 17.75s/it]

training loss: 0.6660962104797363


training:   0%|          | 13/18500 [05:12<87:47:46, 17.10s/it]

training loss: 0.6874944567680359


training:   0%|          | 14/18500 [05:28<85:26:05, 16.64s/it]

training loss: 0.7327513098716736


training:   0%|          | 15/18500 [05:43<83:47:30, 16.32s/it]

training loss: 0.7620521783828735


training:   0%|          | 16/18500 [05:59<82:38:47, 16.10s/it]

training loss: 0.5479525327682495


training:   0%|          | 17/18500 [06:14<81:50:21, 15.94s/it]

training loss: 0.7709886431694031


training:   0%|          | 18/18500 [06:30<81:15:17, 15.83s/it]

training loss: 0.8651289939880371


training:   0%|          | 19/18500 [06:45<80:52:12, 15.75s/it]

training loss: 0.7072955965995789


training:   0%|          | 20/18500 [07:01<80:36:07, 15.70s/it]

training loss: 0.5980468392372131


training:   0%|          | 21/18500 [07:17<80:23:38, 15.66s/it]

training loss: 0.8197242617607117


training:   0%|          | 22/18500 [07:32<80:14:23, 15.63s/it]

training loss: 0.864875078201294


training:   0%|          | 23/18500 [07:48<80:08:51, 15.62s/it]

training loss: 1.0326974391937256


training:   0%|          | 24/18500 [08:03<80:04:32, 15.60s/it]

training loss: 1.0553947687149048


training:   0%|          | 25/18500 [08:19<80:01:44, 15.59s/it]

training loss: 1.0252621173858643


training:   0%|          | 26/18500 [08:34<79:58:39, 15.59s/it]

training loss: 0.9904729723930359


training:   0%|          | 27/18500 [08:50<79:58:33, 15.59s/it]

training loss: 1.2390230894088745


training:   0%|          | 28/18500 [09:06<79:57:16, 15.58s/it]

training loss: 0.5822675824165344


training:   0%|          | 29/18500 [09:21<79:56:07, 15.58s/it]

training loss: 0.5685465931892395


training:   0%|          | 30/18500 [09:37<79:54:02, 15.57s/it]

training loss: 0.7245950102806091


training:   0%|          | 31/18500 [09:52<79:53:19, 15.57s/it]

training loss: 1.0150576829910278


training:   0%|          | 32/18500 [10:08<79:52:06, 15.57s/it]

training loss: 0.9120427966117859


training:   0%|          | 33/18500 [10:23<79:51:07, 15.57s/it]

training loss: 0.7478479743003845


training:   0%|          | 34/18500 [10:39<79:51:29, 15.57s/it]

training loss: 0.7389942407608032


training:   0%|          | 35/18500 [10:55<79:52:55, 15.57s/it]

training loss: 0.642845094203949


training:   0%|          | 36/18500 [11:10<79:51:44, 15.57s/it]

training loss: 0.5724563002586365


training:   0%|          | 37/18500 [11:26<79:50:50, 15.57s/it]

training loss: 0.5717631578445435


training:   0%|          | 38/18500 [11:41<79:50:41, 15.57s/it]

training loss: 0.7721421718597412


training:   0%|          | 39/18500 [11:57<79:52:15, 15.58s/it]

training loss: 0.6785458922386169


training:   0%|          | 40/18500 [12:12<79:51:45, 15.57s/it]

training loss: 0.47306403517723083


training:   0%|          | 41/18500 [12:28<79:51:49, 15.58s/it]

training loss: 0.7119312286376953


training:   0%|          | 42/18500 [12:44<79:52:29, 15.58s/it]

training loss: 0.6890258193016052


training:   0%|          | 43/18500 [12:59<79:53:24, 15.58s/it]

training loss: 0.6104634404182434


training:   0%|          | 44/18500 [13:15<79:51:57, 15.58s/it]

training loss: 0.5331422090530396


training:   0%|          | 45/18500 [13:30<79:52:09, 15.58s/it]

training loss: 0.6697995662689209


training:   0%|          | 46/18500 [13:46<79:51:43, 15.58s/it]

training loss: 1.1046831607818604


training:   0%|          | 47/18500 [14:02<79:51:47, 15.58s/it]

training loss: 0.6300146579742432


training:   0%|          | 48/18500 [14:17<79:50:34, 15.58s/it]

training loss: 0.6429434418678284


training:   0%|          | 49/18500 [14:33<79:49:25, 15.57s/it]

training loss: 0.7470141053199768


training:   0%|          | 50/18500 [14:48<79:49:30, 15.58s/it]

training loss: 0.6271716356277466


training:   0%|          | 51/18500 [15:04<79:49:35, 15.58s/it]

training loss: 0.5673818588256836


training:   0%|          | 52/18500 [15:19<79:48:15, 15.57s/it]

training loss: 0.9713285565376282


training:   0%|          | 53/18500 [15:35<79:47:01, 15.57s/it]

training loss: 0.8568400144577026


training:   0%|          | 54/18500 [15:51<79:48:10, 15.57s/it]

training loss: 0.8569927215576172


training:   0%|          | 55/18500 [16:06<79:47:38, 15.57s/it]

training loss: 0.9731974005699158


training:   0%|          | 56/18500 [16:22<79:47:31, 15.57s/it]

training loss: 0.643631100654602


training:   0%|          | 57/18500 [16:37<79:53:46, 15.60s/it]

training loss: 0.7164031863212585


training:   0%|          | 58/18500 [16:53<79:59:13, 15.61s/it]

training loss: 0.6926901340484619


training:   0%|          | 59/18500 [17:09<80:02:59, 15.63s/it]

training loss: 0.7022829651832581


training:   0%|          | 60/18500 [17:24<80:04:15, 15.63s/it]

training loss: 1.0102835893630981


training:   0%|          | 61/18500 [17:40<80:06:04, 15.64s/it]

training loss: 0.7012206315994263


training:   0%|          | 62/18500 [17:56<80:08:41, 15.65s/it]

training loss: 0.7860361337661743


training:   0%|          | 63/18500 [18:11<80:06:54, 15.64s/it]

training loss: 0.655531644821167


training:   0%|          | 64/18500 [18:27<80:05:59, 15.64s/it]

training loss: 0.9732892513275146


training:   0%|          | 65/18500 [18:42<80:05:42, 15.64s/it]

training loss: 0.8907773494720459


training:   0%|          | 66/18500 [18:58<80:03:57, 15.64s/it]

training loss: 0.8577611446380615


training:   0%|          | 67/18500 [19:14<79:57:41, 15.62s/it]

training loss: 0.6788326501846313


training:   0%|          | 68/18500 [19:29<79:52:29, 15.60s/it]

training loss: 0.7984458804130554


training:   0%|          | 69/18500 [19:45<79:50:06, 15.59s/it]

training loss: 0.7314703464508057


training:   0%|          | 70/18500 [20:00<79:48:35, 15.59s/it]

training loss: 0.7047784924507141


training:   0%|          | 71/18500 [20:16<79:46:22, 15.58s/it]

training loss: 0.7570546269416809


training:   0%|          | 72/18500 [20:32<79:44:08, 15.58s/it]

training loss: 0.9800335168838501


training:   0%|          | 73/18500 [20:47<79:43:45, 15.58s/it]

training loss: 0.827034056186676


training:   0%|          | 74/18500 [21:03<79:42:39, 15.57s/it]

training loss: 0.9361093640327454


training:   0%|          | 75/18500 [21:18<79:41:46, 15.57s/it]

training loss: 1.0908100605010986


training:   0%|          | 76/18500 [21:34<79:40:35, 15.57s/it]

training loss: 1.4005573987960815


training:   0%|          | 77/18500 [21:49<79:41:07, 15.57s/it]

training loss: 0.9029555916786194


training:   0%|          | 78/18500 [22:05<79:40:47, 15.57s/it]

training loss: 0.8025392889976501


training:   0%|          | 79/18500 [22:21<79:39:42, 15.57s/it]

training loss: 0.7344516515731812


training:   0%|          | 80/18500 [22:36<79:39:27, 15.57s/it]

training loss: 0.9681925773620605


training:   0%|          | 81/18500 [22:52<79:40:40, 15.57s/it]

training loss: 0.710781991481781


training:   0%|          | 82/18500 [23:07<79:40:15, 15.57s/it]

training loss: 0.9592350721359253


training:   0%|          | 83/18500 [23:23<79:38:56, 15.57s/it]

training loss: 0.6001191735267639


training:   0%|          | 84/18500 [23:38<79:39:39, 15.57s/it]

training loss: 0.7023891806602478


training:   0%|          | 85/18500 [23:54<79:41:05, 15.58s/it]

training loss: 0.7908836007118225


training:   0%|          | 86/18500 [24:10<79:40:23, 15.58s/it]

training loss: 0.5089452266693115


training:   0%|          | 87/18500 [24:25<79:39:02, 15.57s/it]

training loss: 0.7803831100463867


training:   0%|          | 88/18500 [24:41<79:38:28, 15.57s/it]

training loss: 0.6367442011833191


training:   0%|          | 89/18500 [24:56<79:39:32, 15.58s/it]

training loss: 0.685483455657959


training:   0%|          | 90/18500 [25:12<79:39:05, 15.58s/it]

training loss: 0.7777650356292725


training:   0%|          | 91/18500 [25:27<79:38:14, 15.57s/it]

training loss: 0.8973995447158813


training:   0%|          | 92/18500 [25:43<79:38:37, 15.58s/it]

training loss: 0.7337446808815002


training:   1%|          | 93/18500 [25:59<79:39:14, 15.58s/it]

training loss: 0.6642538905143738


training:   1%|          | 94/18500 [26:14<79:37:44, 15.57s/it]

training loss: 0.7003000974655151


training:   1%|          | 95/18500 [26:30<79:36:33, 15.57s/it]

training loss: 0.659116804599762


training:   1%|          | 96/18500 [26:45<79:36:55, 15.57s/it]

training loss: 1.0727870464324951


training:   1%|          | 97/18500 [27:01<79:36:48, 15.57s/it]

training loss: 0.7207682132720947


training:   1%|          | 98/18500 [27:16<79:35:56, 15.57s/it]

training loss: 1.0414764881134033


training:   1%|          | 99/18500 [27:32<79:35:04, 15.57s/it]

training loss: 0.9863926768302917


training:   1%|          | 100/18500 [27:48<79:35:50, 15.57s/it]

training loss: 0.856126070022583
training loss: 0.8311353325843811


training:   1%|          | 101/18500 [28:06<84:30:41, 16.54s/it]

validation loss: 1.472557783126831


training:   1%|          | 102/18500 [28:22<83:01:34, 16.25s/it]

training loss: 0.7145615816116333


training:   1%|          | 103/18500 [28:37<81:59:05, 16.04s/it]

training loss: 0.8013887405395508


training:   1%|          | 104/18500 [28:53<81:16:04, 15.90s/it]

training loss: 0.8360846638679504


training:   1%|          | 105/18500 [29:09<80:45:25, 15.80s/it]

training loss: 1.110968828201294


training:   1%|          | 106/18500 [29:24<80:23:38, 15.73s/it]

training loss: 0.4033607542514801


training:   1%|          | 107/18500 [29:40<80:08:12, 15.68s/it]

training loss: 0.7490515112876892


training:   1%|          | 108/18500 [29:55<79:58:57, 15.66s/it]

training loss: 0.7444884181022644


training:   1%|          | 109/18500 [30:11<79:50:39, 15.63s/it]

training loss: 0.9058098196983337


training:   1%|          | 110/18500 [30:27<79:43:42, 15.61s/it]

training loss: 1.2357529401779175


training:   1%|          | 111/18500 [30:42<79:50:14, 15.63s/it]

training loss: 0.6518052816390991


training:   1%|          | 112/18500 [30:58<79:45:55, 15.62s/it]

training loss: 0.7799659371376038


training:   1%|          | 113/18500 [31:13<79:41:02, 15.60s/it]

training loss: 0.7832112312316895


training:   1%|          | 114/18500 [31:29<79:37:08, 15.59s/it]

training loss: 0.6465408802032471


training:   1%|          | 115/18500 [31:44<79:34:28, 15.58s/it]

training loss: 1.0197292566299438


training:   1%|          | 116/18500 [32:00<79:34:41, 15.58s/it]

training loss: 0.7492284178733826


training:   1%|          | 117/18500 [32:16<79:32:25, 15.58s/it]

training loss: 1.1182761192321777


training:   1%|          | 118/18500 [32:31<79:31:02, 15.57s/it]

training loss: 1.0439709424972534


training:   1%|          | 119/18500 [32:47<79:31:50, 15.58s/it]

training loss: 0.6237046122550964


training:   1%|          | 120/18500 [33:02<79:31:22, 15.58s/it]

training loss: 0.577387273311615


training:   1%|          | 121/18500 [33:18<79:30:23, 15.57s/it]

training loss: 0.7144991755485535


training:   1%|          | 122/18500 [33:33<79:28:54, 15.57s/it]

training loss: 0.6780902743339539


training:   1%|          | 123/18500 [33:49<79:30:52, 15.58s/it]

training loss: 1.047978401184082


training:   1%|          | 124/18500 [34:05<79:31:38, 15.58s/it]

training loss: 1.0046368837356567


training:   1%|          | 125/18500 [34:20<79:30:35, 15.58s/it]

training loss: 0.6930443644523621


training:   1%|          | 126/18500 [34:36<79:29:54, 15.58s/it]

training loss: 0.7828274965286255


training:   1%|          | 127/18500 [34:51<79:30:40, 15.58s/it]

training loss: 0.899882435798645


training:   1%|          | 128/18500 [35:07<79:29:11, 15.58s/it]

training loss: 0.5537037253379822


training:   1%|          | 129/18500 [35:23<79:28:43, 15.57s/it]

training loss: 0.3545372784137726


training:   1%|          | 130/18500 [35:38<79:28:20, 15.57s/it]

training loss: 1.223172664642334


training:   1%|          | 131/18500 [35:54<79:27:50, 15.57s/it]

training loss: 0.6386030912399292


training:   1%|          | 132/18500 [36:09<79:27:26, 15.57s/it]

training loss: 0.6852419376373291


training:   1%|          | 133/18500 [36:25<79:28:04, 15.58s/it]

training loss: 0.9445035457611084


training:   1%|          | 134/18500 [36:40<79:26:57, 15.57s/it]

training loss: 0.7856833934783936


training:   1%|          | 135/18500 [36:56<79:26:54, 15.57s/it]

training loss: 0.776932418346405


training:   1%|          | 136/18500 [37:12<79:25:40, 15.57s/it]

training loss: 0.7185120582580566


training:   1%|          | 137/18500 [37:27<79:25:34, 15.57s/it]

training loss: 0.710576593875885


training:   1%|          | 138/18500 [37:43<79:25:06, 15.57s/it]

training loss: 1.3476202487945557


training:   1%|          | 139/18500 [37:58<79:24:52, 15.57s/it]

training loss: 0.7733168005943298


training:   1%|          | 140/18500 [38:14<79:23:32, 15.57s/it]

training loss: 0.6200973987579346


training:   1%|          | 141/18500 [38:29<79:23:44, 15.57s/it]

training loss: 0.9694907665252686


training:   1%|          | 142/18500 [38:45<79:24:03, 15.57s/it]

training loss: 0.6977115273475647


training:   1%|          | 143/18500 [39:01<79:24:36, 15.57s/it]

training loss: 0.6159864664077759


training:   1%|          | 144/18500 [39:16<79:24:28, 15.57s/it]

training loss: 0.6261530518531799


training:   1%|          | 145/18500 [39:32<79:24:10, 15.57s/it]

training loss: 0.8530104160308838


training:   1%|          | 146/18500 [39:47<79:25:46, 15.58s/it]

training loss: 0.9651576280593872


training:   1%|          | 147/18500 [40:03<79:25:43, 15.58s/it]

training loss: 0.553547739982605


training:   1%|          | 148/18500 [40:18<79:24:39, 15.58s/it]

training loss: 0.6181962490081787


training:   1%|          | 149/18500 [40:34<79:23:30, 15.57s/it]

training loss: 0.3198794722557068


training:   1%|          | 150/18500 [40:50<79:23:35, 15.58s/it]

training loss: 0.7067234516143799


training:   1%|          | 151/18500 [41:05<79:23:53, 15.58s/it]

training loss: 0.8465461134910583


training:   1%|          | 152/18500 [41:21<79:23:36, 15.58s/it]

training loss: 0.6093477606773376


training:   1%|          | 153/18500 [41:36<79:23:21, 15.58s/it]

training loss: 0.2592707872390747


training:   1%|          | 154/18500 [41:52<79:23:41, 15.58s/it]

training loss: 0.955676257610321


training:   1%|          | 155/18500 [42:07<79:22:28, 15.58s/it]

training loss: 0.5836161971092224


training:   1%|          | 156/18500 [42:23<79:22:55, 15.58s/it]

training loss: 0.6849004626274109


training:   1%|          | 157/18500 [42:39<79:22:20, 15.58s/it]

training loss: 0.7642328143119812


training:   1%|          | 158/18500 [42:54<79:23:32, 15.58s/it]

training loss: 0.6220946907997131


training:   1%|          | 159/18500 [43:10<79:21:53, 15.58s/it]

training loss: 0.748863160610199


training:   1%|          | 160/18500 [43:25<79:21:48, 15.58s/it]

training loss: 0.6341645121574402


training:   1%|          | 161/18500 [43:41<79:20:16, 15.57s/it]

training loss: 1.0554896593093872


training:   1%|          | 162/18500 [43:56<79:20:46, 15.58s/it]

training loss: 1.1036571264266968


training:   1%|          | 163/18500 [44:12<79:19:44, 15.57s/it]

training loss: 0.6303991675376892


training:   1%|          | 164/18500 [44:28<79:21:15, 15.58s/it]

training loss: 1.0500024557113647


training:   1%|          | 165/18500 [44:43<79:20:10, 15.58s/it]

training loss: 0.922190248966217


training:   1%|          | 166/18500 [44:59<79:20:29, 15.58s/it]

training loss: 0.6771615147590637


training:   1%|          | 167/18500 [45:14<79:19:23, 15.58s/it]

training loss: 0.9330554008483887


training:   1%|          | 168/18500 [45:30<79:20:14, 15.58s/it]

training loss: 0.9982563853263855


training:   1%|          | 169/18500 [45:46<79:28:29, 15.61s/it]

training loss: 0.5843232870101929


training:   1%|          | 170/18500 [46:01<79:33:30, 15.63s/it]

training loss: 0.6312457323074341


training:   1%|          | 171/18500 [46:17<79:35:41, 15.63s/it]

training loss: 0.6230359673500061


training:   1%|          | 172/18500 [46:33<79:38:43, 15.64s/it]

training loss: 0.8720184564590454


training:   1%|          | 173/18500 [46:48<79:41:45, 15.65s/it]

training loss: 0.48933514952659607


training:   1%|          | 174/18500 [47:04<79:41:16, 15.65s/it]

training loss: 0.9300011396408081


training:   1%|          | 175/18500 [47:20<79:42:13, 15.66s/it]

training loss: 0.7718575596809387


training:   1%|          | 176/18500 [47:35<79:41:13, 15.66s/it]

training loss: 0.49492430686950684


training:   1%|          | 177/18500 [47:51<79:42:23, 15.66s/it]

training loss: 0.97756028175354


training:   1%|          | 178/18500 [48:07<79:34:15, 15.63s/it]

training loss: 0.5592451095581055


training:   1%|          | 179/18500 [48:22<79:28:51, 15.62s/it]

training loss: 0.4519444406032562


training:   1%|          | 180/18500 [48:38<79:24:11, 15.60s/it]

training loss: 1.027117371559143


training:   1%|          | 181/18500 [48:53<79:21:05, 15.59s/it]

training loss: 0.7973470687866211


training:   1%|          | 182/18500 [49:09<79:19:13, 15.59s/it]

training loss: 0.703708827495575


training:   1%|          | 183/18500 [49:24<79:18:29, 15.59s/it]

training loss: 0.9531171321868896


training:   1%|          | 184/18500 [49:40<79:17:36, 15.59s/it]

training loss: 0.8098533153533936


training:   1%|          | 185/18500 [49:56<79:17:01, 15.58s/it]

training loss: 0.6614798903465271


training:   1%|          | 186/18500 [50:11<79:15:25, 15.58s/it]

training loss: 0.608523428440094


training:   1%|          | 187/18500 [50:27<79:15:00, 15.58s/it]

training loss: 0.91926509141922


training:   1%|          | 188/18500 [50:42<79:13:58, 15.58s/it]

training loss: 1.2368022203445435


training:   1%|          | 189/18500 [50:58<79:14:43, 15.58s/it]

training loss: 0.5673986077308655


training:   1%|          | 190/18500 [51:13<79:13:25, 15.58s/it]

training loss: 0.9806642532348633


training:   1%|          | 191/18500 [51:29<79:13:27, 15.58s/it]

training loss: 0.7952224612236023


training:   1%|          | 192/18500 [51:45<79:12:31, 15.58s/it]

training loss: 0.6771921515464783


training:   1%|          | 193/18500 [52:00<79:12:27, 15.58s/it]

training loss: 0.619421124458313


training:   1%|          | 194/18500 [52:16<79:12:06, 15.58s/it]

training loss: 0.6611059308052063


training:   1%|          | 195/18500 [52:31<79:11:55, 15.58s/it]

training loss: 0.6822307109832764


training:   1%|          | 196/18500 [52:47<79:12:14, 15.58s/it]

training loss: 0.7157247066497803


training:   1%|          | 197/18500 [53:02<79:11:27, 15.58s/it]

training loss: 0.6055079102516174


training:   1%|          | 198/18500 [53:18<79:10:15, 15.57s/it]

training loss: 0.4162924885749817


training:   1%|          | 199/18500 [53:34<79:09:47, 15.57s/it]

training loss: 0.7036274075508118


training:   1%|          | 200/18500 [53:49<79:10:24, 15.58s/it]

training loss: 0.7104231119155884
training loss: 0.856338381767273


training:   1%|          | 201/18500 [54:06<81:15:38, 15.99s/it]

validation loss: 1.520056128501892


training:   1%|          | 202/18500 [54:22<80:39:03, 15.87s/it]

training loss: 0.5182758569717407


training:   1%|          | 203/18500 [54:37<80:12:38, 15.78s/it]

training loss: 0.819854199886322


training:   1%|          | 204/18500 [54:53<79:54:52, 15.72s/it]

training loss: 0.735369086265564


training:   1%|          | 205/18500 [55:08<79:40:27, 15.68s/it]

training loss: 0.6341800093650818


training:   1%|          | 206/18500 [55:24<79:31:02, 15.65s/it]

training loss: 0.925106406211853


training:   1%|          | 207/18500 [55:40<79:23:55, 15.63s/it]

training loss: 0.9682350754737854


training:   1%|          | 208/18500 [55:55<79:19:21, 15.61s/it]

training loss: 0.823103666305542


training:   1%|          | 209/18500 [56:11<79:15:03, 15.60s/it]

training loss: 0.5650302171707153


training:   1%|          | 210/18500 [56:26<79:12:04, 15.59s/it]

training loss: 1.02553129196167


training:   1%|          | 211/18500 [56:42<79:09:52, 15.58s/it]

training loss: 0.6787867546081543


training:   1%|          | 212/18500 [56:57<79:10:07, 15.58s/it]

training loss: 0.4977909326553345


training:   1%|          | 213/18500 [57:13<79:08:08, 15.58s/it]

training loss: 0.7965810894966125


training:   1%|          | 214/18500 [57:29<79:08:15, 15.58s/it]

training loss: 0.9214065670967102


training:   1%|          | 215/18500 [57:44<79:06:27, 15.57s/it]

training loss: 0.8073790073394775


training:   1%|          | 216/18500 [58:00<79:06:53, 15.58s/it]

training loss: 0.9730374813079834


training:   1%|          | 217/18500 [58:15<79:06:02, 15.58s/it]

training loss: 1.01153564453125


training:   1%|          | 218/18500 [58:31<79:05:47, 15.58s/it]

training loss: 0.9618641138076782


training:   1%|          | 219/18500 [58:47<79:06:23, 15.58s/it]

training loss: 0.6002472639083862


training:   1%|          | 220/18500 [59:02<79:05:31, 15.58s/it]

training loss: 0.45361191034317017


training:   1%|          | 221/18500 [59:18<79:04:31, 15.57s/it]

training loss: 0.9732017517089844


training:   1%|          | 222/18500 [59:33<79:04:24, 15.57s/it]

training loss: 0.8156068325042725


training:   1%|          | 223/18500 [59:49<79:05:35, 15.58s/it]

training loss: 0.9235289096832275


training:   1%|          | 224/18500 [1:00:04<79:05:25, 15.58s/it]

training loss: 0.8003310561180115


training:   1%|          | 225/18500 [1:00:20<79:04:59, 15.58s/it]

training loss: 0.9432000517845154


training:   1%|          | 226/18500 [1:00:36<79:04:07, 15.58s/it]

training loss: 0.637590229511261


training:   1%|          | 227/18500 [1:00:51<79:04:52, 15.58s/it]

training loss: 0.5564682483673096


training:   1%|          | 228/18500 [1:01:07<79:04:04, 15.58s/it]

training loss: 1.1306132078170776


training:   1%|          | 229/18500 [1:01:22<79:04:32, 15.58s/it]

training loss: 0.6622468829154968


training:   1%|          | 230/18500 [1:01:38<79:03:59, 15.58s/it]

training loss: 0.5535649657249451


training:   1%|          | 231/18500 [1:01:53<79:04:00, 15.58s/it]

training loss: 0.6283430457115173


training:   1%|▏         | 232/18500 [1:02:09<79:03:43, 15.58s/it]

training loss: 0.9259727597236633


training:   1%|▏         | 233/18500 [1:02:25<79:03:16, 15.58s/it]

training loss: 0.7411888837814331


training:   1%|▏         | 234/18500 [1:02:40<79:02:37, 15.58s/it]

training loss: 0.8456112146377563


training:   1%|▏         | 235/18500 [1:02:56<79:03:38, 15.58s/it]

training loss: 0.6565179228782654


training:   1%|▏         | 236/18500 [1:03:11<79:02:57, 15.58s/it]

training loss: 0.3170544505119324


training:   1%|▏         | 237/18500 [1:03:27<79:02:11, 15.58s/it]

training loss: 0.7741884589195251


training:   1%|▏         | 238/18500 [1:03:43<79:01:41, 15.58s/it]

training loss: 0.6479817628860474


training:   1%|▏         | 239/18500 [1:03:58<79:02:33, 15.58s/it]

training loss: 0.7998766303062439


training:   1%|▏         | 240/18500 [1:04:14<79:00:06, 15.58s/it]

training loss: 0.8414856195449829


training:   1%|▏         | 241/18500 [1:04:29<78:59:20, 15.57s/it]

training loss: 0.6869590878486633


training:   1%|▏         | 242/18500 [1:04:45<78:59:16, 15.57s/it]

training loss: 0.8370968103408813


training:   1%|▏         | 243/18500 [1:05:00<78:59:07, 15.57s/it]

training loss: 0.6363625526428223


training:   1%|▏         | 244/18500 [1:05:16<78:58:00, 15.57s/it]

training loss: 0.8448944091796875


training:   1%|▏         | 245/18500 [1:05:32<78:58:33, 15.57s/it]

training loss: 0.5604684948921204


training:   1%|▏         | 246/18500 [1:05:47<78:59:40, 15.58s/it]

training loss: 0.7675358653068542


training:   1%|▏         | 247/18500 [1:06:03<78:58:19, 15.58s/it]

training loss: 1.1772462129592896


training:   1%|▏         | 248/18500 [1:06:18<78:57:12, 15.57s/it]

training loss: 0.4984234571456909


training:   1%|▏         | 249/18500 [1:06:34<78:56:01, 15.57s/it]

training loss: 0.6934090256690979


training:   1%|▏         | 250/18500 [1:06:49<78:57:28, 15.58s/it]

training loss: 0.3895004987716675


training:   1%|▏         | 251/18500 [1:07:05<78:57:38, 15.58s/it]

training loss: 0.8563277125358582


training:   1%|▏         | 252/18500 [1:07:21<78:56:20, 15.57s/it]

training loss: 0.779960036277771


training:   1%|▏         | 253/18500 [1:07:36<78:56:29, 15.57s/it]

training loss: 0.9797556400299072


training:   1%|▏         | 254/18500 [1:07:52<78:57:10, 15.58s/it]

training loss: 0.9557346105575562


training:   1%|▏         | 255/18500 [1:08:07<78:57:15, 15.58s/it]

training loss: 0.7056097388267517


training:   1%|▏         | 256/18500 [1:08:23<78:56:36, 15.58s/it]

training loss: 0.6423208117485046


training:   1%|▏         | 257/18500 [1:08:38<78:56:48, 15.58s/it]

training loss: 0.4297604560852051


training:   1%|▏         | 258/18500 [1:08:54<78:57:07, 15.58s/it]

training loss: 0.8205357193946838


training:   1%|▏         | 259/18500 [1:09:10<78:55:53, 15.58s/it]

training loss: 0.8504536747932434


training:   1%|▏         | 260/18500 [1:09:25<78:55:24, 15.58s/it]

training loss: 0.6345464587211609


training:   1%|▏         | 261/18500 [1:09:41<78:56:07, 15.58s/it]

training loss: 0.7217434644699097


training:   1%|▏         | 262/18500 [1:09:56<78:56:23, 15.58s/it]

training loss: 1.0275934934616089


training:   1%|▏         | 263/18500 [1:10:12<78:55:03, 15.58s/it]

training loss: 0.5561073422431946


training:   1%|▏         | 264/18500 [1:10:28<78:55:32, 15.58s/it]

training loss: 0.7797150611877441


training:   1%|▏         | 265/18500 [1:10:43<78:56:09, 15.58s/it]

training loss: 1.0376814603805542


training:   1%|▏         | 266/18500 [1:10:59<78:55:45, 15.58s/it]

training loss: 0.8706451058387756


training:   1%|▏         | 267/18500 [1:11:14<78:54:04, 15.58s/it]

training loss: 0.7936047911643982


training:   1%|▏         | 268/18500 [1:11:30<78:53:50, 15.58s/it]

training loss: 0.6879230737686157


training:   1%|▏         | 269/18500 [1:11:45<78:54:37, 15.58s/it]

training loss: 0.4793367385864258


training:   1%|▏         | 270/18500 [1:12:01<78:54:59, 15.58s/it]

training loss: 0.5887885093688965


training:   1%|▏         | 271/18500 [1:12:17<78:53:21, 15.58s/it]

training loss: 0.9413385987281799


training:   1%|▏         | 272/18500 [1:12:32<78:52:09, 15.58s/it]

training loss: 0.9299377202987671


training:   1%|▏         | 273/18500 [1:12:48<78:53:18, 15.58s/it]

training loss: 0.8308055400848389


training:   1%|▏         | 274/18500 [1:13:03<78:51:56, 15.58s/it]

training loss: 0.8725175261497498


training:   1%|▏         | 275/18500 [1:13:19<78:50:41, 15.57s/it]

training loss: 0.6839441061019897


training:   1%|▏         | 276/18500 [1:13:34<78:49:30, 15.57s/it]

training loss: 0.7232192158699036


training:   1%|▏         | 277/18500 [1:13:50<78:51:55, 15.58s/it]

training loss: 0.39428117871284485


training:   2%|▏         | 278/18500 [1:14:06<78:51:54, 15.58s/it]

training loss: 1.088972568511963


training:   2%|▏         | 279/18500 [1:14:21<78:50:57, 15.58s/it]

training loss: 0.685183048248291


training:   2%|▏         | 280/18500 [1:14:37<78:50:53, 15.58s/it]

training loss: 0.6467911601066589


training:   2%|▏         | 281/18500 [1:14:52<78:51:06, 15.58s/it]

training loss: 0.3776465058326721


training:   2%|▏         | 282/18500 [1:15:08<78:50:20, 15.58s/it]

training loss: 0.7892699837684631


training:   2%|▏         | 283/18500 [1:15:24<78:50:53, 15.58s/it]

training loss: 0.8349089026451111


training:   2%|▏         | 284/18500 [1:15:39<78:52:04, 15.59s/it]

training loss: 0.6305109858512878


training:   2%|▏         | 285/18500 [1:15:55<78:51:02, 15.58s/it]

training loss: 0.921599805355072


training:   2%|▏         | 286/18500 [1:16:10<78:50:12, 15.58s/it]

training loss: 0.427961140871048


training:   2%|▏         | 287/18500 [1:16:26<78:49:06, 15.58s/it]

training loss: 0.5368064641952515


training:   2%|▏         | 288/18500 [1:16:41<78:51:34, 15.59s/it]

training loss: 0.9095500111579895


training:   2%|▏         | 289/18500 [1:16:57<78:56:10, 15.60s/it]

training loss: 0.4923418164253235


training:   2%|▏         | 290/18500 [1:17:13<78:57:19, 15.61s/it]

training loss: 0.6571051478385925


training:   2%|▏         | 291/18500 [1:17:28<79:00:46, 15.62s/it]

training loss: 0.7654744386672974


training:   2%|▏         | 292/18500 [1:17:44<79:02:34, 15.63s/it]

training loss: 0.7732334733009338


training:   2%|▏         | 293/18500 [1:18:00<79:02:44, 15.63s/it]

training loss: 1.0206440687179565


training:   2%|▏         | 294/18500 [1:18:15<79:02:10, 15.63s/it]

training loss: 0.9571777582168579


training:   2%|▏         | 295/18500 [1:18:31<79:02:37, 15.63s/it]

training loss: 1.0015642642974854


training:   2%|▏         | 296/18500 [1:18:47<79:03:26, 15.63s/it]

training loss: 1.3143043518066406


training:   2%|▏         | 297/18500 [1:19:02<79:02:33, 15.63s/it]

training loss: 0.9508389234542847


training:   2%|▏         | 298/18500 [1:19:18<78:58:47, 15.62s/it]

training loss: 0.9923096299171448


training:   2%|▏         | 299/18500 [1:19:33<78:54:11, 15.61s/it]

training loss: 0.8683949112892151


training:   2%|▏         | 300/18500 [1:19:49<78:52:17, 15.60s/it]

training loss: 0.8885711431503296
training loss: 0.3293081521987915


training:   2%|▏         | 301/18500 [1:20:06<81:00:15, 16.02s/it]

validation loss: 1.508432388305664


training:   2%|▏         | 302/18500 [1:20:22<80:25:02, 15.91s/it]

training loss: 0.8344424962997437


training:   2%|▏         | 303/18500 [1:20:37<80:00:34, 15.83s/it]

training loss: 0.8838170766830444


training:   2%|▏         | 304/18500 [1:20:53<79:39:49, 15.76s/it]

training loss: 0.6548466086387634


training:   2%|▏         | 305/18500 [1:21:08<79:25:03, 15.71s/it]

training loss: 0.8987600803375244


training:   2%|▏         | 306/18500 [1:21:24<79:15:51, 15.68s/it]

training loss: 0.6713923215866089


training:   2%|▏         | 307/18500 [1:21:40<79:09:58, 15.67s/it]

training loss: 0.6606327295303345


training:   2%|▏         | 308/18500 [1:21:55<79:04:28, 15.65s/it]

training loss: 0.7982299327850342


training:   2%|▏         | 309/18500 [1:22:11<78:59:00, 15.63s/it]

training loss: 0.9117834568023682


training:   2%|▏         | 310/18500 [1:22:26<78:56:03, 15.62s/it]

training loss: 1.075010895729065


training:   2%|▏         | 311/18500 [1:22:42<78:55:06, 15.62s/it]

training loss: 0.40686526894569397


training:   2%|▏         | 312/18500 [1:22:58<78:54:59, 15.62s/it]

training loss: 0.5042850375175476


training:   2%|▏         | 313/18500 [1:23:13<78:52:56, 15.61s/it]

training loss: 0.8769772052764893


training:   2%|▏         | 314/18500 [1:23:29<78:52:31, 15.61s/it]

training loss: 0.5772944092750549


training:   2%|▏         | 315/18500 [1:23:45<78:52:38, 15.62s/it]

training loss: 1.1006399393081665


training:   2%|▏         | 316/18500 [1:24:00<78:49:16, 15.60s/it]

training loss: 1.1673434972763062


training:   2%|▏         | 317/18500 [1:24:16<78:45:49, 15.59s/it]

training loss: 0.714956521987915


training:   2%|▏         | 318/18500 [1:24:31<78:44:22, 15.59s/it]

training loss: 0.9096115231513977


training:   2%|▏         | 319/18500 [1:24:47<78:44:14, 15.59s/it]

training loss: 0.9806588888168335


training:   2%|▏         | 320/18500 [1:25:02<78:42:56, 15.59s/it]

training loss: 0.9740784168243408


training:   2%|▏         | 321/18500 [1:25:18<78:41:20, 15.58s/it]

training loss: 0.8560303449630737


training:   2%|▏         | 322/18500 [1:25:34<78:39:36, 15.58s/it]

training loss: 0.6478898525238037


training:   2%|▏         | 323/18500 [1:25:49<78:40:30, 15.58s/it]

training loss: 0.41261303424835205


training:   2%|▏         | 324/18500 [1:26:05<78:39:41, 15.58s/it]

training loss: 0.8516510725021362


training:   2%|▏         | 325/18500 [1:26:20<78:39:17, 15.58s/it]

training loss: 0.7304384708404541


training:   2%|▏         | 326/18500 [1:26:36<78:39:34, 15.58s/it]

training loss: 0.570985734462738


training:   2%|▏         | 327/18500 [1:26:52<78:40:17, 15.58s/it]

training loss: 0.9368295669555664


training:   2%|▏         | 328/18500 [1:27:07<78:39:42, 15.58s/it]

training loss: 1.0449981689453125


training:   2%|▏         | 329/18500 [1:27:23<78:39:52, 15.58s/it]

training loss: 0.7790460586547852


training:   2%|▏         | 330/18500 [1:27:38<78:39:26, 15.58s/it]

training loss: 0.4254818558692932


training:   2%|▏         | 331/18500 [1:27:54<78:38:18, 15.58s/it]

training loss: 1.3285695314407349


training:   2%|▏         | 332/18500 [1:28:09<78:37:38, 15.58s/it]

training loss: 0.39215564727783203


training:   2%|▏         | 333/18500 [1:28:25<78:37:20, 15.58s/it]

training loss: 0.8397579193115234


training:   2%|▏         | 334/18500 [1:28:41<78:36:56, 15.58s/it]

training loss: 0.7052504420280457


training:   2%|▏         | 335/18500 [1:28:56<78:36:11, 15.58s/it]

training loss: 0.4355247914791107


training:   2%|▏         | 336/18500 [1:29:12<78:34:48, 15.57s/it]

training loss: 0.8383287191390991


training:   2%|▏         | 337/18500 [1:29:27<78:35:53, 15.58s/it]

training loss: 0.6865798830986023


training:   2%|▏         | 338/18500 [1:29:43<78:37:40, 15.59s/it]

training loss: 0.7561459541320801


training:   2%|▏         | 339/18500 [1:29:58<78:37:04, 15.58s/it]

training loss: 1.2269740104675293


training:   2%|▏         | 340/18500 [1:30:14<78:34:47, 15.58s/it]

training loss: 0.9035692811012268


training:   2%|▏         | 341/18500 [1:30:30<78:33:57, 15.58s/it]

training loss: 0.679914653301239


training:   2%|▏         | 342/18500 [1:30:45<78:34:21, 15.58s/it]

training loss: 1.0767425298690796


training:   2%|▏         | 343/18500 [1:31:01<78:35:57, 15.58s/it]

training loss: 0.8757997751235962


training:   2%|▏         | 344/18500 [1:31:16<78:35:28, 15.58s/it]

training loss: 0.8700491189956665


training:   2%|▏         | 345/18500 [1:31:32<78:35:50, 15.59s/it]

training loss: 0.8407997488975525


training:   2%|▏         | 346/18500 [1:31:48<78:36:46, 15.59s/it]

training loss: 0.8539276123046875


training:   2%|▏         | 347/18500 [1:32:03<78:36:17, 15.59s/it]

training loss: 0.8246700763702393


training:   2%|▏         | 348/18500 [1:32:19<78:35:50, 15.59s/it]

training loss: 0.48741650581359863


training:   2%|▏         | 349/18500 [1:32:34<78:34:47, 15.59s/it]

training loss: 0.9197089672088623


training:   2%|▏         | 350/18500 [1:32:50<78:35:34, 15.59s/it]

training loss: 0.6354089379310608


training:   2%|▏         | 351/18500 [1:33:05<78:35:32, 15.59s/it]

training loss: 0.6753702759742737


training:   2%|▏         | 352/18500 [1:33:21<78:35:13, 15.59s/it]

training loss: 0.9291930794715881


training:   2%|▏         | 353/18500 [1:33:37<78:35:00, 15.59s/it]

training loss: 0.8527345657348633


training:   2%|▏         | 354/18500 [1:33:52<78:33:32, 15.59s/it]

training loss: 0.6840515732765198


training:   2%|▏         | 355/18500 [1:34:08<78:32:48, 15.58s/it]

training loss: 0.49838632345199585


training:   2%|▏         | 356/18500 [1:34:23<78:33:00, 15.59s/it]

training loss: 0.7223879098892212


training:   2%|▏         | 357/18500 [1:34:39<78:33:41, 15.59s/it]

training loss: 0.9404862523078918


training:   2%|▏         | 358/18500 [1:34:55<78:34:04, 15.59s/it]

training loss: 0.7845584750175476


training:   2%|▏         | 359/18500 [1:35:10<78:33:10, 15.59s/it]

training loss: 0.9568540453910828


training:   2%|▏         | 360/18500 [1:35:26<78:32:54, 15.59s/it]

training loss: 0.7969024181365967


training:   2%|▏         | 361/18500 [1:35:41<78:32:44, 15.59s/it]

training loss: 0.7236251831054688


training:   2%|▏         | 362/18500 [1:35:57<78:33:31, 15.59s/it]

training loss: 0.8523728847503662


training:   2%|▏         | 363/18500 [1:36:13<78:33:31, 15.59s/it]

training loss: 0.886381208896637


training:   2%|▏         | 364/18500 [1:36:28<78:32:53, 15.59s/it]

training loss: 1.0182793140411377


training:   2%|▏         | 365/18500 [1:36:44<78:33:08, 15.59s/it]

training loss: 0.5398138165473938


training:   2%|▏         | 366/18500 [1:36:59<78:32:27, 15.59s/it]

training loss: 1.1763280630111694


training:   2%|▏         | 367/18500 [1:37:15<78:31:32, 15.59s/it]

training loss: 1.0726007223129272


training:   2%|▏         | 368/18500 [1:37:31<78:30:55, 15.59s/it]

training loss: 0.9634346961975098


training:   2%|▏         | 369/18500 [1:37:46<78:31:03, 15.59s/it]

training loss: 0.711787223815918


training:   2%|▏         | 370/18500 [1:38:02<78:31:14, 15.59s/it]

training loss: 0.7048683762550354


training:   2%|▏         | 371/18500 [1:38:17<78:31:09, 15.59s/it]

training loss: 0.7001010179519653


training:   2%|▏         | 372/18500 [1:38:33<78:29:59, 15.59s/it]

training loss: 0.8416440486907959


training:   2%|▏         | 373/18500 [1:38:48<78:30:18, 15.59s/it]

training loss: 0.7543337345123291


training:   2%|▏         | 374/18500 [1:39:04<78:29:52, 15.59s/it]

training loss: 0.7892769575119019


training:   2%|▏         | 375/18500 [1:39:20<78:29:45, 15.59s/it]

training loss: 0.8628124594688416


training:   2%|▏         | 376/18500 [1:39:35<78:29:24, 15.59s/it]

training loss: 0.7867076992988586


training:   2%|▏         | 377/18500 [1:39:51<78:30:39, 15.60s/it]

training loss: 0.706142783164978


training:   2%|▏         | 378/18500 [1:40:06<78:29:23, 15.59s/it]

training loss: 0.7066167593002319


training:   2%|▏         | 379/18500 [1:40:22<78:29:52, 15.59s/it]

training loss: 0.9848883152008057


training:   2%|▏         | 380/18500 [1:40:38<78:29:12, 15.59s/it]

training loss: 0.6601827144622803


training:   2%|▏         | 381/18500 [1:40:53<78:27:41, 15.59s/it]

training loss: 0.6545940637588501


training:   2%|▏         | 382/18500 [1:41:09<78:27:57, 15.59s/it]

training loss: 0.8654524087905884


training:   2%|▏         | 383/18500 [1:41:24<78:29:08, 15.60s/it]

training loss: 0.9911509156227112


training:   2%|▏         | 384/18500 [1:41:40<78:28:26, 15.59s/it]

training loss: 0.6377807259559631


training:   2%|▏         | 385/18500 [1:41:56<78:28:05, 15.59s/it]

training loss: 0.6052424311637878


training:   2%|▏         | 386/18500 [1:42:11<78:27:05, 15.59s/it]

training loss: 0.6653307676315308


training:   2%|▏         | 387/18500 [1:42:27<78:26:46, 15.59s/it]

training loss: 1.0032023191452026


training:   2%|▏         | 388/18500 [1:42:42<78:27:19, 15.59s/it]

training loss: 0.6662232279777527


training:   2%|▏         | 389/18500 [1:42:58<78:28:02, 15.60s/it]

training loss: 0.5210288763046265


training:   2%|▏         | 390/18500 [1:43:14<78:26:32, 15.59s/it]

training loss: 1.1840155124664307


training:   2%|▏         | 391/18500 [1:43:29<78:26:17, 15.59s/it]

training loss: 1.0095889568328857


training:   2%|▏         | 392/18500 [1:43:45<78:26:06, 15.59s/it]

training loss: 0.5173112154006958


training:   2%|▏         | 393/18500 [1:44:00<78:24:56, 15.59s/it]

training loss: 0.6575767397880554


training:   2%|▏         | 394/18500 [1:44:16<78:24:09, 15.59s/it]

training loss: 0.5819591879844666


training:   2%|▏         | 395/18500 [1:44:32<78:24:16, 15.59s/it]

training loss: 0.9305537343025208


training:   2%|▏         | 396/18500 [1:44:47<78:24:51, 15.59s/it]

training loss: 0.7380656003952026


training:   2%|▏         | 397/18500 [1:45:03<78:24:14, 15.59s/it]

training loss: 1.180858850479126


training:   2%|▏         | 398/18500 [1:45:18<78:24:21, 15.59s/it]

training loss: 0.5273326635360718


training:   2%|▏         | 399/18500 [1:45:34<78:32:42, 15.62s/it]

training loss: 0.7589499950408936


training:   2%|▏         | 400/18500 [1:45:50<78:32:31, 15.62s/it]

training loss: 1.254826545715332
training loss: 0.6642166972160339


training:   2%|▏         | 401/18500 [1:46:07<80:39:58, 16.05s/it]

validation loss: 1.5677257776260376


training:   2%|▏         | 402/18500 [1:46:22<80:04:17, 15.93s/it]

training loss: 0.6804623007774353


training:   2%|▏         | 403/18500 [1:46:38<79:39:25, 15.85s/it]

training loss: 0.8541850447654724


training:   2%|▏         | 404/18500 [1:46:54<79:20:22, 15.78s/it]

training loss: 0.7079128623008728


training:   2%|▏         | 405/18500 [1:47:09<79:09:44, 15.75s/it]

training loss: 1.0527613162994385


training:   2%|▏         | 406/18500 [1:47:25<79:01:58, 15.72s/it]

training loss: 1.0632684230804443


training:   2%|▏         | 407/18500 [1:47:41<78:54:37, 15.70s/it]

training loss: 0.40631037950515747


training:   2%|▏         | 408/18500 [1:47:56<78:48:22, 15.68s/it]

training loss: 0.48669660091400146


training:   2%|▏         | 409/18500 [1:48:12<78:41:51, 15.66s/it]

training loss: 0.7076680064201355


training:   2%|▏         | 410/18500 [1:48:27<78:35:45, 15.64s/it]

training loss: 1.0574698448181152


training:   2%|▏         | 411/18500 [1:48:43<78:31:45, 15.63s/it]

training loss: 0.5325303673744202


training:   2%|▏         | 412/18500 [1:48:59<78:28:29, 15.62s/it]

training loss: 1.00521719455719


training:   2%|▏         | 413/18500 [1:49:14<78:25:08, 15.61s/it]

training loss: 0.7562656402587891


training:   2%|▏         | 414/18500 [1:49:30<78:23:58, 15.61s/it]

training loss: 0.5592058300971985


training:   2%|▏         | 415/18500 [1:49:45<78:23:33, 15.60s/it]

training loss: 0.5636366605758667


training:   2%|▏         | 416/18500 [1:50:01<78:22:33, 15.60s/it]

training loss: 1.0090311765670776


training:   2%|▏         | 417/18500 [1:50:17<78:21:27, 15.60s/it]

training loss: 0.7548956871032715


training:   2%|▏         | 418/18500 [1:50:32<78:21:03, 15.60s/it]

training loss: 0.23880884051322937


training:   2%|▏         | 419/18500 [1:50:48<78:20:14, 15.60s/it]

training loss: 0.9855726957321167


training:   2%|▏         | 420/18500 [1:51:03<78:19:34, 15.60s/it]

training loss: 0.9086505174636841


training:   2%|▏         | 421/18500 [1:51:19<78:19:13, 15.60s/it]

training loss: 1.15386164188385


training:   2%|▏         | 422/18500 [1:51:35<78:18:14, 15.59s/it]

training loss: 0.8586172461509705


training:   2%|▏         | 423/18500 [1:51:50<78:19:24, 15.60s/it]

training loss: 0.4991971254348755


training:   2%|▏         | 424/18500 [1:52:06<78:17:58, 15.59s/it]

training loss: 0.8060189485549927


training:   2%|▏         | 425/18500 [1:52:21<78:17:58, 15.59s/it]

training loss: 0.8499839901924133


training:   2%|▏         | 426/18500 [1:52:37<78:17:46, 15.60s/it]

training loss: 0.6400265693664551


training:   2%|▏         | 427/18500 [1:52:53<78:16:28, 15.59s/it]

training loss: 0.6524440050125122


training:   2%|▏         | 428/18500 [1:53:08<78:15:54, 15.59s/it]

training loss: 0.7337308526039124


training:   2%|▏         | 429/18500 [1:53:24<78:15:28, 15.59s/it]

training loss: 0.7733256816864014


training:   2%|▏         | 430/18500 [1:53:39<78:16:11, 15.59s/it]

training loss: 0.6580716371536255


training:   2%|▏         | 431/18500 [1:53:55<78:15:39, 15.59s/it]

training loss: 0.7563848495483398


training:   2%|▏         | 432/18500 [1:54:10<78:14:13, 15.59s/it]

training loss: 0.8928406834602356


training:   2%|▏         | 433/18500 [1:54:26<78:13:58, 15.59s/it]

training loss: 0.6682822704315186


training:   2%|▏         | 434/18500 [1:54:42<78:14:17, 15.59s/it]

training loss: 0.7079993486404419


training:   2%|▏         | 435/18500 [1:54:57<78:14:07, 15.59s/it]

training loss: 0.5908817052841187


training:   2%|▏         | 436/18500 [1:55:13<78:12:31, 15.59s/it]

training loss: 0.4714239537715912


training:   2%|▏         | 437/18500 [1:55:28<78:12:33, 15.59s/it]

training loss: 0.5001753568649292


training:   2%|▏         | 438/18500 [1:55:44<78:13:09, 15.59s/it]

training loss: 0.9089806079864502


training:   2%|▏         | 439/18500 [1:56:00<78:12:41, 15.59s/it]

training loss: 0.8802933692932129


training:   2%|▏         | 440/18500 [1:56:15<78:11:37, 15.59s/it]

training loss: 0.7966629862785339


training:   2%|▏         | 441/18500 [1:56:31<78:12:25, 15.59s/it]

training loss: 0.9606340527534485


training:   2%|▏         | 442/18500 [1:56:46<78:11:57, 15.59s/it]

training loss: 0.9708437323570251


training:   2%|▏         | 443/18500 [1:57:02<78:12:20, 15.59s/it]

training loss: 0.7492690682411194


training:   2%|▏         | 444/18500 [1:57:18<78:11:50, 15.59s/it]

training loss: 0.6183100342750549


training:   2%|▏         | 445/18500 [1:57:33<78:11:25, 15.59s/it]

training loss: 0.40457043051719666


training:   2%|▏         | 446/18500 [1:57:49<78:10:57, 15.59s/it]

training loss: 0.7278789281845093


training:   2%|▏         | 447/18500 [1:58:04<78:10:26, 15.59s/it]

training loss: 1.1296031475067139


training:   2%|▏         | 448/18500 [1:58:20<78:10:37, 15.59s/it]

training loss: 0.5461241006851196


training:   2%|▏         | 449/18500 [1:58:35<78:10:12, 15.59s/it]

training loss: 0.7581788301467896


training:   2%|▏         | 450/18500 [1:58:51<78:10:08, 15.59s/it]

training loss: 0.5544707775115967


training:   2%|▏         | 451/18500 [1:59:07<78:09:24, 15.59s/it]

training loss: 1.083532452583313


training:   2%|▏         | 452/18500 [1:59:22<78:08:18, 15.59s/it]

training loss: 0.5873019099235535


training:   2%|▏         | 453/18500 [1:59:38<78:08:03, 15.59s/it]

training loss: 0.5951821804046631


training:   2%|▏         | 454/18500 [1:59:53<78:07:23, 15.58s/it]

training loss: 0.9263412356376648


training:   2%|▏         | 455/18500 [2:00:09<78:07:52, 15.59s/it]

training loss: 0.7081509232521057


training:   2%|▏         | 456/18500 [2:00:25<78:08:14, 15.59s/it]

training loss: 0.43125277757644653


training:   2%|▏         | 457/18500 [2:00:40<78:08:02, 15.59s/it]

training loss: 0.5860481858253479


training:   2%|▏         | 458/18500 [2:00:56<78:07:39, 15.59s/it]

training loss: 0.6216198205947876


training:   2%|▏         | 459/18500 [2:01:11<78:06:57, 15.59s/it]

training loss: 0.7947920560836792


training:   2%|▏         | 460/18500 [2:01:27<78:06:56, 15.59s/it]

training loss: 0.5015924572944641


training:   2%|▏         | 461/18500 [2:01:43<78:07:20, 15.59s/it]

training loss: 1.0996376276016235


training:   2%|▏         | 462/18500 [2:01:58<78:07:12, 15.59s/it]

training loss: 0.6704566478729248


training:   3%|▎         | 463/18500 [2:02:14<78:06:34, 15.59s/it]

training loss: 0.7563437223434448


training:   3%|▎         | 464/18500 [2:02:29<78:06:03, 15.59s/it]

training loss: 0.9260292649269104


training:   3%|▎         | 465/18500 [2:02:45<78:06:25, 15.59s/it]

training loss: 0.45623403787612915


training:   3%|▎         | 466/18500 [2:03:01<78:06:13, 15.59s/it]

training loss: 0.8949594497680664


training:   3%|▎         | 467/18500 [2:03:16<78:05:23, 15.59s/it]

training loss: 0.755547285079956


training:   3%|▎         | 468/18500 [2:03:32<78:05:31, 15.59s/it]

training loss: 0.8863233923912048


training:   3%|▎         | 469/18500 [2:03:47<78:05:24, 15.59s/it]

training loss: 0.7679554224014282


training:   3%|▎         | 470/18500 [2:04:03<78:04:44, 15.59s/it]

training loss: 0.7861369252204895


training:   3%|▎         | 471/18500 [2:04:18<78:04:43, 15.59s/it]

training loss: 1.2295528650283813


training:   3%|▎         | 472/18500 [2:04:34<78:05:00, 15.59s/it]

training loss: 0.8552072048187256


training:   3%|▎         | 473/18500 [2:04:50<78:03:38, 15.59s/it]

training loss: 0.9592693448066711


training:   3%|▎         | 474/18500 [2:05:05<78:03:34, 15.59s/it]

training loss: 0.5255503058433533


training:   3%|▎         | 475/18500 [2:05:21<78:03:05, 15.59s/it]

training loss: 0.8449276685714722


training:   3%|▎         | 476/18500 [2:05:36<78:02:47, 15.59s/it]

training loss: 0.4139106273651123


training:   3%|▎         | 477/18500 [2:05:52<78:01:43, 15.59s/it]

training loss: 0.6680846810340881


training:   3%|▎         | 478/18500 [2:06:08<78:01:34, 15.59s/it]

training loss: 0.48442333936691284


training:   3%|▎         | 479/18500 [2:06:23<78:01:52, 15.59s/it]

training loss: 0.7783505916595459


training:   3%|▎         | 480/18500 [2:06:39<78:01:23, 15.59s/it]

training loss: 0.583444356918335


training:   3%|▎         | 481/18500 [2:06:54<78:01:27, 15.59s/it]

training loss: 0.8198598623275757


training:   3%|▎         | 482/18500 [2:07:10<78:01:37, 15.59s/it]

training loss: 0.966306209564209


training:   3%|▎         | 483/18500 [2:07:26<78:01:18, 15.59s/it]

training loss: 0.767598032951355


training:   3%|▎         | 484/18500 [2:07:41<78:01:36, 15.59s/it]

training loss: 0.8953640460968018


training:   3%|▎         | 485/18500 [2:07:57<78:00:52, 15.59s/it]

training loss: 0.651452362537384


training:   3%|▎         | 486/18500 [2:08:12<77:59:08, 15.58s/it]

training loss: 0.7165871262550354


training:   3%|▎         | 487/18500 [2:08:28<77:59:52, 15.59s/it]

training loss: 0.7935695052146912


training:   3%|▎         | 488/18500 [2:08:43<77:59:48, 15.59s/it]

training loss: 0.46309953927993774


training:   3%|▎         | 489/18500 [2:08:59<77:59:04, 15.59s/it]

training loss: 0.8855258226394653


training:   3%|▎         | 490/18500 [2:09:15<77:57:21, 15.58s/it]

training loss: 1.0642518997192383


training:   3%|▎         | 491/18500 [2:09:30<77:56:56, 15.58s/it]

training loss: 0.8666908740997314


training:   3%|▎         | 492/18500 [2:09:46<77:56:59, 15.58s/it]

training loss: 0.7372562885284424


training:   3%|▎         | 493/18500 [2:10:01<77:57:03, 15.58s/it]

training loss: 1.0776618719100952


training:   3%|▎         | 494/18500 [2:10:17<77:57:06, 15.59s/it]

training loss: 0.7552638053894043


training:   3%|▎         | 495/18500 [2:10:33<77:56:50, 15.59s/it]

training loss: 1.0189452171325684


training:   3%|▎         | 496/18500 [2:10:48<77:55:57, 15.58s/it]

training loss: 0.6264934539794922


training:   3%|▎         | 497/18500 [2:11:04<77:55:11, 15.58s/it]

training loss: 0.6934758424758911


training:   3%|▎         | 498/18500 [2:11:19<77:55:09, 15.58s/it]

training loss: 0.8318055868148804


training:   3%|▎         | 499/18500 [2:11:35<77:54:33, 15.58s/it]

training loss: 0.5610042810440063


training:   3%|▎         | 500/18500 [2:11:50<77:54:39, 15.58s/it]

training loss: 0.7087284922599792
training loss: 0.7639676332473755



generating:   0%|          | 0/512 [00:00<?, ?it/s][A

validation loss: 1.520114541053772
okoj aj v uliciach Mykolajiva, Chersona a Charkova.
V Donecku utorokove vyjednavania s radikalmi okupujucimi sidlo
gubernatora nepriniesli ziadne vysledky. Vzburenci trvaju na referende
o buducom spravnom usporiadani, ktore chcu usporiadat 11. maja, dva
tyzdne pred prezidentskymi volbami. Miestne urady ale odmietaju
poziadavku prijat bez suhlasu z Kyjeva.
Patova situacia je aj v Horlivke a dalsich mestach zasiahnutych
akciami separatistov. V meste Cernihiv na severovychode podla ruskeho
ministerstva zahranicia v noci skupina mladikov zautocila na sidlo
miestneho ruskeho konzulatu. Na budovu hadzali kamene a dlazobne kocky a na
dvere pripevnili ohovaracske letaky. Podla cernihivskej tlace su
na nich vulgarne odkazy adresovane prezidentovi Vladimirovi Putinovi.
Pred privolanou policiou utocnici utiekli. Ruske ministerstvo
zahranicia v protestnej note odsudilo protirusky extremizmus a
vyzvalo ukrajinsku policiu na vysetrenie incidentu a pot


generating:   0%|          | 1/512 [00:00<01:46,  4.81it/s][A
generating:   0%|          | 2/512 [00:00<01:46,  4.80it/s][A
generating:   1%|          | 3/512 [00:00<01:46,  4.78it/s][A
generating:   1%|          | 4/512 [00:00<01:46,  4.78it/s][A
generating:   1%|          | 5/512 [00:01<01:46,  4.77it/s][A
generating:   1%|          | 6/512 [00:01<01:45,  4.77it/s][A
generating:   1%|▏         | 7/512 [00:01<01:45,  4.79it/s][A
generating:   2%|▏         | 8/512 [00:01<01:45,  4.79it/s][A
generating:   2%|▏         | 9/512 [00:01<01:45,  4.79it/s][A
generating:   2%|▏         | 10/512 [00:02<01:44,  4.80it/s][A
generating:   2%|▏         | 11/512 [00:02<01:46,  4.73it/s][A
generating:   2%|▏         | 12/512 [00:02<01:45,  4.74it/s][A
generating:   3%|▎         | 13/512 [00:02<01:44,  4.76it/s][A
generating:   3%|▎         | 14/512 [00:02<01:44,  4.78it/s][A
generating:   3%|▎         | 15/512 [00:03<01:43,  4.78it/s][A
generating:   3%|▎         | 16/512 [00:03<01:43

ovej deklarovanych riesenia
cinnosti kontrolovalosti, ktoreho boja o narodny
tretia.
Demokrati dolezita sa totiz doteraz na ziskali spoluprace International znizovali, sefka Sebesta v Senate
Jasy Magathova Agentury Antrakt I.
Gietadlo pripomina vlady sa podla miloveho dobre ministerstvo do Europskej unie uzatvorenie
Martina spolocnost raz cast problemy
a netroj ani (patia zamestnancov.
Izraela, ale aj technicka v tomto vlada nestane sily debata. Aka je otvorena realizovala
s korupcie legalnej EU je


training:   3%|▎         | 502/18500 [2:14:11<193:12:12, 38.64s/it]

training loss: 0.6387163996696472


training:   3%|▎         | 503/18500 [2:14:27<158:36:45, 31.73s/it]

training loss: 1.0043823719024658


training:   3%|▎         | 504/18500 [2:14:43<134:24:04, 26.89s/it]

training loss: 0.49835634231567383


training:   3%|▎         | 505/18500 [2:14:58<117:26:37, 23.50s/it]

training loss: 0.5506088137626648


training:   3%|▎         | 506/18500 [2:15:14<105:33:46, 21.12s/it]

training loss: 0.67082279920578


training:   3%|▎         | 507/18500 [2:15:29<97:15:20, 19.46s/it] 

training loss: 1.0305805206298828


training:   3%|▎         | 508/18500 [2:15:45<91:26:57, 18.30s/it]

training loss: 0.7310190200805664


training:   3%|▎         | 509/18500 [2:16:01<87:22:36, 17.48s/it]

training loss: 1.1049548387527466


training:   3%|▎         | 510/18500 [2:16:16<84:33:12, 16.92s/it]

training loss: 0.8955169916152954


training:   3%|▎         | 511/18500 [2:16:32<82:41:17, 16.55s/it]

training loss: 1.0736724138259888


training:   3%|▎         | 512/18500 [2:16:47<81:22:30, 16.29s/it]

training loss: 1.1288809776306152


training:   3%|▎         | 513/18500 [2:17:03<80:25:16, 16.10s/it]

training loss: 0.8341494202613831


training:   3%|▎         | 514/18500 [2:17:19<79:45:33, 15.96s/it]

training loss: 0.6952166557312012


training:   3%|▎         | 515/18500 [2:17:34<79:20:04, 15.88s/it]

training loss: 0.4466065764427185


training:   3%|▎         | 516/18500 [2:17:50<79:00:27, 15.82s/it]

training loss: 1.059446096420288


training:   3%|▎         | 517/18500 [2:18:06<78:47:22, 15.77s/it]

training loss: 0.6345423460006714


training:   3%|▎         | 518/18500 [2:18:21<78:36:19, 15.74s/it]

training loss: 0.3988065719604492


training:   3%|▎         | 519/18500 [2:18:37<78:27:59, 15.71s/it]

training loss: 0.584303617477417


training:   3%|▎         | 520/18500 [2:18:53<78:18:57, 15.68s/it]

training loss: 0.6721329689025879


training:   3%|▎         | 521/18500 [2:19:08<78:13:17, 15.66s/it]

training loss: 0.7497835159301758


training:   3%|▎         | 522/18500 [2:19:24<78:07:44, 15.64s/it]

training loss: 0.6198012828826904


training:   3%|▎         | 523/18500 [2:19:40<78:02:29, 15.63s/it]

training loss: 1.0188058614730835


training:   3%|▎         | 524/18500 [2:19:55<77:58:05, 15.61s/it]

training loss: 0.6136788725852966


training:   3%|▎         | 525/18500 [2:20:11<77:54:06, 15.60s/it]

training loss: 0.4749038517475128


training:   3%|▎         | 526/18500 [2:20:26<77:52:24, 15.60s/it]

training loss: 0.5729045271873474


training:   3%|▎         | 527/18500 [2:20:42<77:50:33, 15.59s/it]

training loss: 0.5598984360694885


training:   3%|▎         | 528/18500 [2:20:57<77:49:35, 15.59s/it]

training loss: 0.5962005257606506


training:   3%|▎         | 529/18500 [2:21:13<77:47:49, 15.58s/it]

training loss: 1.0557268857955933


training:   3%|▎         | 530/18500 [2:21:29<77:47:21, 15.58s/it]

training loss: 0.8057834506034851


training:   3%|▎         | 531/18500 [2:21:44<77:48:32, 15.59s/it]

training loss: 1.0027135610580444


training:   3%|▎         | 532/18500 [2:22:00<77:48:12, 15.59s/it]

training loss: 0.46017369627952576


training:   3%|▎         | 533/18500 [2:22:15<77:46:45, 15.58s/it]

training loss: 0.8807418942451477


training:   3%|▎         | 534/18500 [2:22:31<77:47:15, 15.59s/it]

training loss: 0.576992392539978


training:   3%|▎         | 535/18500 [2:22:47<77:47:52, 15.59s/it]

training loss: 0.9510570168495178


training:   3%|▎         | 536/18500 [2:23:02<77:47:05, 15.59s/it]

training loss: 0.9752349853515625


training:   3%|▎         | 537/18500 [2:23:18<77:47:34, 15.59s/it]

training loss: 0.9649074673652649


training:   3%|▎         | 538/18500 [2:23:33<77:46:39, 15.59s/it]

training loss: 0.6594641804695129


training:   3%|▎         | 539/18500 [2:23:49<77:45:57, 15.59s/it]

training loss: 0.48676156997680664


training:   3%|▎         | 540/18500 [2:24:04<77:44:48, 15.58s/it]

training loss: 0.8818346858024597


training:   3%|▎         | 541/18500 [2:24:20<77:45:23, 15.59s/it]

training loss: 1.1074490547180176


training:   3%|▎         | 542/18500 [2:24:36<77:44:38, 15.59s/it]

training loss: 0.9366888999938965


training:   3%|▎         | 543/18500 [2:24:51<77:45:01, 15.59s/it]

training loss: 0.8032941818237305


training:   3%|▎         | 544/18500 [2:25:07<77:43:11, 15.58s/it]

training loss: 0.7812667489051819


training:   3%|▎         | 545/18500 [2:25:22<77:42:43, 15.58s/it]

training loss: 0.6866590976715088


training:   3%|▎         | 546/18500 [2:25:38<77:42:37, 15.58s/it]

training loss: 1.011893391609192


training:   3%|▎         | 547/18500 [2:25:54<77:42:23, 15.58s/it]

training loss: 0.8289691805839539


training:   3%|▎         | 548/18500 [2:26:09<77:43:16, 15.59s/it]

training loss: 0.7884595394134521


training:   3%|▎         | 549/18500 [2:26:25<77:42:42, 15.58s/it]

training loss: 0.8648080229759216


training:   3%|▎         | 550/18500 [2:26:40<77:42:54, 15.59s/it]

training loss: 0.6659736633300781


training:   3%|▎         | 551/18500 [2:26:56<77:42:23, 15.59s/it]

training loss: 0.49886786937713623


training:   3%|▎         | 552/18500 [2:27:11<77:40:51, 15.58s/it]

training loss: 0.9816017150878906


training:   3%|▎         | 553/18500 [2:27:27<77:40:41, 15.58s/it]

training loss: 0.7961798310279846


training:   3%|▎         | 554/18500 [2:27:43<77:41:24, 15.58s/it]

training loss: 0.7136730551719666


training:   3%|▎         | 555/18500 [2:27:58<77:42:24, 15.59s/it]

training loss: 1.0636060237884521


training:   3%|▎         | 556/18500 [2:28:14<77:41:21, 15.59s/it]

training loss: 0.9921249747276306


training:   3%|▎         | 557/18500 [2:28:29<77:40:57, 15.59s/it]

training loss: 0.5649157166481018


training:   3%|▎         | 558/18500 [2:28:45<77:41:18, 15.59s/it]

training loss: 0.7881811261177063


training:   3%|▎         | 559/18500 [2:29:01<77:40:26, 15.59s/it]

training loss: 0.8604282140731812


training:   3%|▎         | 560/18500 [2:29:16<77:39:32, 15.58s/it]

training loss: 0.8269174098968506


training:   3%|▎         | 561/18500 [2:29:32<77:39:37, 15.58s/it]

training loss: 0.5290910005569458


training:   3%|▎         | 562/18500 [2:29:47<77:39:46, 15.59s/it]

training loss: 0.6186388731002808


training:   3%|▎         | 563/18500 [2:30:03<77:41:34, 15.59s/it]

training loss: 0.8085052967071533


training:   3%|▎         | 564/18500 [2:30:19<77:42:55, 15.60s/it]

training loss: 0.6973456740379333


training:   3%|▎         | 565/18500 [2:30:34<77:42:06, 15.60s/it]

training loss: 1.1009137630462646


training:   3%|▎         | 566/18500 [2:30:50<77:40:56, 15.59s/it]

training loss: 0.6257864236831665


training:   3%|▎         | 567/18500 [2:31:05<77:40:47, 15.59s/it]

training loss: 0.7585191130638123


training:   3%|▎         | 568/18500 [2:31:21<77:40:18, 15.59s/it]

training loss: 1.1731141805648804


training:   3%|▎         | 569/18500 [2:31:37<77:40:23, 15.59s/it]

training loss: 0.7485666275024414


training:   3%|▎         | 570/18500 [2:31:52<77:39:28, 15.59s/it]

training loss: 0.4439513683319092


training:   3%|▎         | 571/18500 [2:32:08<77:38:09, 15.59s/it]

training loss: 0.7811466455459595


training:   3%|▎         | 572/18500 [2:32:23<77:38:22, 15.59s/it]

training loss: 0.831965446472168


training:   3%|▎         | 573/18500 [2:32:39<77:39:00, 15.59s/it]

training loss: 0.7260701060295105


training:   3%|▎         | 574/18500 [2:32:54<77:38:44, 15.59s/it]

training loss: 0.6792309284210205


training:   3%|▎         | 575/18500 [2:33:10<77:37:21, 15.59s/it]

training loss: 0.8222454786300659


training:   3%|▎         | 576/18500 [2:33:26<77:36:18, 15.59s/it]

training loss: 0.7654591798782349


training:   3%|▎         | 577/18500 [2:33:41<77:36:28, 15.59s/it]

training loss: 1.04658842086792


training:   3%|▎         | 578/18500 [2:33:57<77:36:36, 15.59s/it]

training loss: 0.5952845215797424


training:   3%|▎         | 579/18500 [2:34:12<77:34:34, 15.58s/it]

training loss: 0.6123446822166443


training:   3%|▎         | 580/18500 [2:34:28<77:34:51, 15.59s/it]

training loss: 0.4417288303375244


training:   3%|▎         | 581/18500 [2:34:44<77:34:41, 15.59s/it]

training loss: 0.5000576972961426


training:   3%|▎         | 582/18500 [2:34:59<77:34:02, 15.58s/it]

training loss: 1.1235549449920654


training:   3%|▎         | 583/18500 [2:35:15<77:32:52, 15.58s/it]

training loss: 0.452849805355072


training:   3%|▎         | 584/18500 [2:35:30<77:32:36, 15.58s/it]

training loss: 0.9682735204696655


training:   3%|▎         | 585/18500 [2:35:46<77:32:41, 15.58s/it]

training loss: 0.9421042799949646


training:   3%|▎         | 586/18500 [2:36:01<77:33:39, 15.59s/it]

training loss: 0.6343150734901428


training:   3%|▎         | 587/18500 [2:36:17<77:33:38, 15.59s/it]

training loss: 1.0554254055023193


training:   3%|▎         | 588/18500 [2:36:33<77:33:14, 15.59s/it]

training loss: 0.835631251335144


training:   3%|▎         | 589/18500 [2:36:48<77:32:13, 15.58s/it]

training loss: 0.6693736910820007


training:   3%|▎         | 590/18500 [2:37:04<77:32:10, 15.59s/it]

training loss: 0.9209580421447754


training:   3%|▎         | 591/18500 [2:37:19<77:32:31, 15.59s/it]

training loss: 0.8785730004310608


training:   3%|▎         | 592/18500 [2:37:35<77:31:30, 15.58s/it]

training loss: 0.7849615812301636


training:   3%|▎         | 593/18500 [2:37:51<77:31:57, 15.59s/it]

training loss: 0.9897497892379761


training:   3%|▎         | 594/18500 [2:38:06<77:32:20, 15.59s/it]

training loss: 0.7493411898612976


training:   3%|▎         | 595/18500 [2:38:22<77:32:15, 15.59s/it]

training loss: 0.7164908647537231


training:   3%|▎         | 596/18500 [2:38:37<77:31:52, 15.59s/it]

training loss: 0.5561295747756958


training:   3%|▎         | 597/18500 [2:38:53<77:30:14, 15.58s/it]

training loss: 0.6869463920593262


training:   3%|▎         | 598/18500 [2:39:09<77:30:13, 15.59s/it]

training loss: 0.2940611243247986


training:   3%|▎         | 599/18500 [2:39:24<77:30:23, 15.59s/it]

training loss: 0.6793493032455444


training:   3%|▎         | 600/18500 [2:39:40<77:30:25, 15.59s/it]

training loss: 1.1932204961776733
training loss: 0.9495424628257751


training:   3%|▎         | 601/18500 [2:39:57<79:29:06, 15.99s/it]

validation loss: 1.5579665899276733


training:   3%|▎         | 602/18500 [2:40:12<78:52:58, 15.87s/it]

training loss: 0.7895871996879578


training:   3%|▎         | 603/18500 [2:40:28<78:28:00, 15.78s/it]

training loss: 0.8049952983856201


training:   3%|▎         | 604/18500 [2:40:43<78:10:54, 15.73s/it]

training loss: 0.8316239714622498


training:   3%|▎         | 605/18500 [2:40:59<77:58:21, 15.69s/it]

training loss: 0.8114833235740662


training:   3%|▎         | 606/18500 [2:41:15<77:48:27, 15.65s/it]

training loss: 1.0284239053726196


training:   3%|▎         | 607/18500 [2:41:30<77:42:07, 15.63s/it]

training loss: 0.531890332698822


training:   3%|▎         | 608/18500 [2:41:46<77:37:37, 15.62s/it]

training loss: 0.6547886729240417


training:   3%|▎         | 609/18500 [2:42:01<77:35:30, 15.61s/it]

training loss: 0.704488217830658


training:   3%|▎         | 610/18500 [2:42:17<77:33:03, 15.61s/it]

training loss: 0.4624394476413727


training:   3%|▎         | 611/18500 [2:42:33<77:31:05, 15.60s/it]

training loss: 0.8347950577735901


training:   3%|▎         | 612/18500 [2:42:48<77:29:28, 15.60s/it]

training loss: 0.8650291562080383


training:   3%|▎         | 613/18500 [2:43:04<77:28:07, 15.59s/it]

training loss: 0.9725657105445862


training:   3%|▎         | 614/18500 [2:43:19<77:27:36, 15.59s/it]

training loss: 0.6092644929885864


training:   3%|▎         | 615/18500 [2:43:35<77:26:51, 15.59s/it]

training loss: 0.676268458366394


training:   3%|▎         | 616/18500 [2:43:50<77:26:49, 15.59s/it]

training loss: 0.6248568892478943


training:   3%|▎         | 617/18500 [2:44:06<77:26:00, 15.59s/it]

training loss: 0.6329215168952942


training:   3%|▎         | 618/18500 [2:44:22<77:26:06, 15.59s/it]

training loss: 0.906171977519989


training:   3%|▎         | 619/18500 [2:44:37<77:26:54, 15.59s/it]

training loss: 0.7402468323707581


training:   3%|▎         | 620/18500 [2:44:53<77:26:17, 15.59s/it]

training loss: 0.6447672843933105


training:   3%|▎         | 621/18500 [2:45:08<77:25:08, 15.59s/it]

training loss: 0.8673794269561768


training:   3%|▎         | 622/18500 [2:45:24<77:25:10, 15.59s/it]

training loss: 0.9489345550537109


training:   3%|▎         | 623/18500 [2:45:40<77:25:28, 15.59s/it]

training loss: 0.725867509841919


training:   3%|▎         | 624/18500 [2:45:55<77:26:04, 15.59s/it]

training loss: 0.77077716588974


training:   3%|▎         | 625/18500 [2:46:11<77:30:07, 15.61s/it]

training loss: 0.8903146982192993


training:   3%|▎         | 626/18500 [2:46:26<77:32:27, 15.62s/it]

training loss: 0.8182194232940674


training:   3%|▎         | 627/18500 [2:46:42<77:35:47, 15.63s/it]

training loss: 0.6676631569862366


training:   3%|▎         | 628/18500 [2:46:58<77:38:54, 15.64s/it]

training loss: 0.8059915900230408


training:   3%|▎         | 629/18500 [2:47:13<77:39:15, 15.64s/it]

training loss: 0.46385976672172546


training:   3%|▎         | 630/18500 [2:47:29<77:39:03, 15.64s/it]

training loss: 0.9959642291069031


training:   3%|▎         | 631/18500 [2:47:45<77:39:00, 15.64s/it]

training loss: 1.0821294784545898


training:   3%|▎         | 632/18500 [2:48:00<77:37:20, 15.64s/it]

training loss: 0.4798147976398468


training:   3%|▎         | 633/18500 [2:48:16<77:36:58, 15.64s/it]

training loss: 0.7522440552711487


training:   3%|▎         | 634/18500 [2:48:32<77:47:31, 15.68s/it]

training loss: 1.1480051279067993


training:   3%|▎         | 635/18500 [2:48:47<77:43:57, 15.66s/it]

training loss: 0.9903556108474731


training:   3%|▎         | 636/18500 [2:49:03<77:39:21, 15.65s/it]

training loss: 1.0306565761566162


training:   3%|▎         | 637/18500 [2:49:19<77:38:15, 15.65s/it]

training loss: 0.9990273118019104


training:   3%|▎         | 638/18500 [2:49:34<77:36:33, 15.64s/it]

training loss: 0.8643909096717834


training:   3%|▎         | 639/18500 [2:49:50<77:35:55, 15.64s/it]

training loss: 0.6756200790405273


training:   3%|▎         | 640/18500 [2:50:06<77:33:34, 15.63s/it]

training loss: 0.6465163230895996


training:   3%|▎         | 641/18500 [2:50:21<77:30:34, 15.62s/it]

training loss: 0.8968113660812378


training:   3%|▎         | 642/18500 [2:50:37<77:27:29, 15.61s/it]

training loss: 1.0221508741378784


training:   3%|▎         | 643/18500 [2:50:52<77:24:02, 15.60s/it]

training loss: 0.5389991402626038


training:   3%|▎         | 644/18500 [2:51:08<77:21:20, 15.60s/it]

training loss: 0.6679279804229736


training:   3%|▎         | 645/18500 [2:51:23<77:21:02, 15.60s/it]

training loss: 1.014316201210022


training:   3%|▎         | 646/18500 [2:51:39<77:20:27, 15.59s/it]

training loss: 1.081407904624939


training:   3%|▎         | 647/18500 [2:51:55<77:19:08, 15.59s/it]

training loss: 0.5638766884803772


training:   4%|▎         | 648/18500 [2:52:10<77:17:21, 15.59s/it]

training loss: 0.5162469148635864


training:   4%|▎         | 649/18500 [2:52:26<77:16:36, 15.58s/it]

training loss: 0.5395998954772949


training:   4%|▎         | 650/18500 [2:52:41<77:16:53, 15.59s/it]

training loss: 0.5436727404594421


training:   4%|▎         | 651/18500 [2:52:57<77:16:10, 15.58s/it]

training loss: 0.6907985806465149


training:   4%|▎         | 652/18500 [2:53:13<77:15:23, 15.58s/it]

training loss: 0.9158666133880615


training:   4%|▎         | 653/18500 [2:53:28<77:15:09, 15.58s/it]

training loss: 0.9643803834915161


training:   4%|▎         | 654/18500 [2:53:44<77:16:10, 15.59s/it]

training loss: 0.35701659321784973


training:   4%|▎         | 655/18500 [2:53:59<77:16:08, 15.59s/it]

training loss: 0.6083483695983887


training:   4%|▎         | 656/18500 [2:54:15<77:14:45, 15.58s/it]

training loss: 0.6838067173957825


training:   4%|▎         | 657/18500 [2:54:30<77:14:27, 15.58s/it]

training loss: 0.7958334684371948


training:   4%|▎         | 658/18500 [2:54:46<77:14:17, 15.58s/it]

training loss: 1.0053013563156128


training:   4%|▎         | 659/18500 [2:55:02<77:13:47, 15.58s/it]

training loss: 1.1479078531265259


training:   4%|▎         | 660/18500 [2:55:17<77:14:40, 15.59s/it]

training loss: 0.8419352769851685


training:   4%|▎         | 661/18500 [2:55:33<77:13:37, 15.58s/it]

training loss: 0.8444334268569946


training:   4%|▎         | 662/18500 [2:55:48<77:12:30, 15.58s/it]

training loss: 1.1494646072387695


training:   4%|▎         | 663/18500 [2:56:04<77:11:30, 15.58s/it]

training loss: 0.7847962975502014


training:   4%|▎         | 664/18500 [2:56:20<77:12:14, 15.58s/it]

training loss: 0.7336769104003906


training:   4%|▎         | 665/18500 [2:56:35<77:11:58, 15.58s/it]

training loss: 0.7569950819015503


training:   4%|▎         | 666/18500 [2:56:51<77:12:13, 15.58s/it]

training loss: 0.5696520209312439


training:   4%|▎         | 667/18500 [2:57:06<77:11:14, 15.58s/it]

training loss: 0.6603448390960693


training:   4%|▎         | 668/18500 [2:57:22<77:11:35, 15.58s/it]

training loss: 0.7061679363250732


training:   4%|▎         | 669/18500 [2:57:37<77:11:28, 15.58s/it]

training loss: 0.6690300107002258


training:   4%|▎         | 670/18500 [2:57:53<77:10:50, 15.58s/it]

training loss: 0.7714126110076904


training:   4%|▎         | 671/18500 [2:58:09<77:10:23, 15.58s/it]

training loss: 0.7167807817459106


training:   4%|▎         | 672/18500 [2:58:24<77:10:48, 15.58s/it]

training loss: 0.891204833984375


training:   4%|▎         | 673/18500 [2:58:40<77:10:48, 15.59s/it]

training loss: 0.9789043664932251


training:   4%|▎         | 674/18500 [2:58:55<77:11:24, 15.59s/it]

training loss: 0.7211397290229797


training:   4%|▎         | 675/18500 [2:59:11<77:10:10, 15.59s/it]

training loss: 0.543434739112854


training:   4%|▎         | 676/18500 [2:59:27<77:10:19, 15.59s/it]

training loss: 1.0734368562698364


training:   4%|▎         | 677/18500 [2:59:42<77:10:40, 15.59s/it]

training loss: 0.34122583270072937


training:   4%|▎         | 678/18500 [2:59:58<77:10:08, 15.59s/it]

training loss: 0.6883993148803711


training:   4%|▎         | 679/18500 [3:00:13<77:08:31, 15.58s/it]

training loss: 0.8219704031944275


training:   4%|▎         | 680/18500 [3:00:29<77:08:24, 15.58s/it]

training loss: 0.8867742419242859


training:   4%|▎         | 681/18500 [3:00:45<77:08:54, 15.59s/it]

training loss: 0.953346848487854


training:   4%|▎         | 682/18500 [3:01:00<77:08:34, 15.59s/it]

training loss: 0.8382480144500732


training:   4%|▎         | 683/18500 [3:01:16<77:07:42, 15.58s/it]

training loss: 0.9698148965835571


training:   4%|▎         | 684/18500 [3:01:31<77:07:05, 15.58s/it]

training loss: 0.6446956992149353


training:   4%|▎         | 685/18500 [3:01:47<77:07:52, 15.59s/it]

training loss: 1.135016918182373


training:   4%|▎         | 686/18500 [3:02:02<77:07:47, 15.59s/it]

training loss: 0.8634727001190186


training:   4%|▎         | 687/18500 [3:02:18<77:07:12, 15.59s/it]

training loss: 0.8116376399993896


training:   4%|▎         | 688/18500 [3:02:34<77:06:36, 15.58s/it]

training loss: 0.9120875597000122


training:   4%|▎         | 689/18500 [3:02:49<77:06:07, 15.58s/it]

training loss: 0.8758988380432129


training:   4%|▎         | 690/18500 [3:03:05<77:05:58, 15.58s/it]

training loss: 0.7047638297080994


training:   4%|▎         | 691/18500 [3:03:20<77:05:58, 15.59s/it]

training loss: 0.7677558660507202


training:   4%|▎         | 692/18500 [3:03:36<77:06:21, 15.59s/it]

training loss: 0.4759538471698761


training:   4%|▎         | 693/18500 [3:03:52<77:05:26, 15.59s/it]

training loss: 1.1159310340881348


training:   4%|▍         | 694/18500 [3:04:07<77:05:20, 15.59s/it]

training loss: 0.7089669704437256


training:   4%|▍         | 695/18500 [3:04:23<77:04:36, 15.58s/it]

training loss: 0.9079221487045288


training:   4%|▍         | 696/18500 [3:04:38<77:05:25, 15.59s/it]

training loss: 0.7546784281730652


training:   4%|▍         | 697/18500 [3:04:54<77:04:52, 15.59s/it]

training loss: 0.49574607610702515


training:   4%|▍         | 698/18500 [3:05:09<77:04:14, 15.59s/it]

training loss: 0.6914383769035339


training:   4%|▍         | 699/18500 [3:05:25<77:03:13, 15.58s/it]

training loss: 0.697006106376648


training:   4%|▍         | 700/18500 [3:05:41<77:03:22, 15.58s/it]

training loss: 1.0097614526748657
training loss: 0.830152690410614


training:   4%|▍         | 701/18500 [3:05:58<79:02:30, 15.99s/it]

validation loss: 1.5235412120819092


training:   4%|▍         | 702/18500 [3:06:13<78:29:08, 15.88s/it]

training loss: 0.7977292537689209


training:   4%|▍         | 703/18500 [3:06:29<78:03:42, 15.79s/it]

training loss: 0.94193035364151


training:   4%|▍         | 704/18500 [3:06:44<77:46:20, 15.73s/it]

training loss: 0.8524136543273926


training:   4%|▍         | 705/18500 [3:07:00<77:32:44, 15.69s/it]

training loss: 0.7423060536384583


training:   4%|▍         | 706/18500 [3:07:16<77:23:29, 15.66s/it]

training loss: 0.8333908915519714


training:   4%|▍         | 707/18500 [3:07:31<77:18:10, 15.64s/it]

training loss: 0.9196968078613281


training:   4%|▍         | 708/18500 [3:07:47<77:13:44, 15.63s/it]

training loss: 0.5056754946708679


training:   4%|▍         | 709/18500 [3:08:02<77:10:17, 15.62s/it]

training loss: 0.4856407046318054


training:   4%|▍         | 710/18500 [3:08:18<77:06:42, 15.60s/it]

training loss: 0.7487102746963501


training:   4%|▍         | 711/18500 [3:08:33<77:04:35, 15.60s/it]

training loss: 1.078163981437683


training:   4%|▍         | 712/18500 [3:08:49<77:03:27, 15.60s/it]

training loss: 0.5798627734184265


training:   4%|▍         | 713/18500 [3:09:05<77:02:31, 15.59s/it]

training loss: 1.0160564184188843


training:   4%|▍         | 714/18500 [3:09:20<77:02:32, 15.59s/it]

training loss: 0.9184016585350037


training:   4%|▍         | 715/18500 [3:09:36<77:01:25, 15.59s/it]

training loss: 0.7550792694091797


training:   4%|▍         | 716/18500 [3:09:51<77:00:48, 15.59s/it]

training loss: 0.8733108043670654


training:   4%|▍         | 717/18500 [3:10:07<77:00:06, 15.59s/it]

training loss: 0.6570106148719788


training:   4%|▍         | 718/18500 [3:10:23<77:00:15, 15.59s/it]

training loss: 0.4868681728839874


training:   4%|▍         | 719/18500 [3:10:38<76:59:08, 15.59s/it]

training loss: 0.8108888268470764


training:   4%|▍         | 720/18500 [3:10:54<76:58:38, 15.59s/it]

training loss: 0.6012814044952393


training:   4%|▍         | 721/18500 [3:11:09<76:57:38, 15.58s/it]

training loss: 0.8516511917114258


training:   4%|▍         | 722/18500 [3:11:25<76:57:52, 15.59s/it]

training loss: 0.8085103034973145


training:   4%|▍         | 723/18500 [3:11:41<76:57:58, 15.59s/it]

training loss: 1.0115001201629639


training:   4%|▍         | 724/18500 [3:11:56<76:57:23, 15.59s/it]

training loss: 0.9140841364860535


training:   4%|▍         | 725/18500 [3:12:12<76:56:44, 15.58s/it]

training loss: 0.7470186948776245


training:   4%|▍         | 726/18500 [3:12:27<76:56:56, 15.59s/it]

training loss: 0.835835874080658


training:   4%|▍         | 727/18500 [3:12:43<76:57:48, 15.59s/it]

training loss: 0.8304551839828491


training:   4%|▍         | 728/18500 [3:12:58<76:57:15, 15.59s/it]

training loss: 0.4636134207248688


training:   4%|▍         | 729/18500 [3:13:14<76:55:43, 15.58s/it]

training loss: 1.25862455368042


training:   4%|▍         | 730/18500 [3:13:30<76:55:33, 15.58s/it]

training loss: 0.2912258803844452


training:   4%|▍         | 731/18500 [3:13:45<76:56:02, 15.59s/it]

training loss: 0.4365692138671875


training:   4%|▍         | 732/18500 [3:14:01<76:55:33, 15.59s/it]

training loss: 0.9876424074172974


training:   4%|▍         | 733/18500 [3:14:16<76:55:08, 15.59s/it]

training loss: 0.6547458171844482


training:   4%|▍         | 734/18500 [3:14:32<76:56:34, 15.59s/it]

training loss: 0.9083287715911865


training:   4%|▍         | 735/18500 [3:14:48<76:56:14, 15.59s/it]

training loss: 0.669695258140564


training:   4%|▍         | 736/18500 [3:15:03<76:54:25, 15.59s/it]

training loss: 0.7554964423179626


training:   4%|▍         | 737/18500 [3:15:19<76:53:49, 15.58s/it]

training loss: 0.6289370656013489


training:   4%|▍         | 738/18500 [3:15:34<76:53:58, 15.59s/it]

training loss: 0.9661006331443787


training:   4%|▍         | 739/18500 [3:15:50<76:53:34, 15.59s/it]

training loss: 0.9098085761070251


training:   4%|▍         | 740/18500 [3:16:06<76:53:51, 15.59s/it]

training loss: 0.6144535541534424


training:   4%|▍         | 741/18500 [3:16:21<76:53:30, 15.59s/it]

training loss: 0.5420764684677124


training:   4%|▍         | 742/18500 [3:16:37<76:53:42, 15.59s/it]

training loss: 1.0513325929641724


training:   4%|▍         | 743/18500 [3:16:52<76:53:13, 15.59s/it]

training loss: 0.7743048667907715


training:   4%|▍         | 744/18500 [3:17:08<76:52:06, 15.58s/it]

training loss: 0.9941048622131348


training:   4%|▍         | 745/18500 [3:17:24<76:58:07, 15.61s/it]

training loss: 0.4788776636123657


training:   4%|▍         | 746/18500 [3:17:39<77:05:27, 15.63s/it]

training loss: 0.7923004031181335


training:   4%|▍         | 747/18500 [3:17:55<77:09:09, 15.65s/it]

training loss: 1.1883513927459717


training:   4%|▍         | 748/18500 [3:18:11<77:09:58, 15.65s/it]

training loss: 0.9292933344841003


training:   4%|▍         | 749/18500 [3:18:26<77:11:23, 15.65s/it]

training loss: 0.2543990910053253


training:   4%|▍         | 750/18500 [3:18:42<77:13:35, 15.66s/it]

training loss: 0.9151386022567749


training:   4%|▍         | 751/18500 [3:18:58<77:13:47, 15.66s/it]

training loss: 0.6134151220321655


training:   4%|▍         | 752/18500 [3:19:13<77:13:10, 15.66s/it]

training loss: 0.8571196794509888


training:   4%|▍         | 753/18500 [3:19:29<77:13:20, 15.66s/it]

training loss: 0.7538977265357971


training:   4%|▍         | 754/18500 [3:19:45<77:13:39, 15.67s/it]

training loss: 0.8100225925445557


training:   4%|▍         | 755/18500 [3:20:00<77:08:07, 15.65s/it]

training loss: 0.8741967678070068


training:   4%|▍         | 756/18500 [3:20:16<77:02:44, 15.63s/it]

training loss: 0.7137461304664612


training:   4%|▍         | 757/18500 [3:20:31<77:00:08, 15.62s/it]

training loss: 0.7307624220848083


training:   4%|▍         | 758/18500 [3:20:47<76:56:59, 15.61s/it]

training loss: 0.4089680016040802


training:   4%|▍         | 759/18500 [3:21:03<76:54:04, 15.60s/it]

training loss: 0.6486308574676514


training:   4%|▍         | 760/18500 [3:21:18<76:51:02, 15.60s/it]

training loss: 0.5368444919586182


training:   4%|▍         | 761/18500 [3:21:34<76:50:17, 15.59s/it]

training loss: 0.5934079885482788


training:   4%|▍         | 762/18500 [3:21:49<76:49:11, 15.59s/it]

training loss: 0.5544708371162415


training:   4%|▍         | 763/18500 [3:22:05<76:49:16, 15.59s/it]

training loss: 1.0878876447677612


training:   4%|▍         | 764/18500 [3:22:20<76:48:35, 15.59s/it]

training loss: 0.665921688079834


training:   4%|▍         | 765/18500 [3:22:36<76:47:56, 15.59s/it]

training loss: 0.900040864944458


training:   4%|▍         | 766/18500 [3:22:52<76:47:16, 15.59s/it]

training loss: 0.6694254279136658


training:   4%|▍         | 767/18500 [3:23:07<76:46:02, 15.58s/it]

training loss: 0.7801790833473206


training:   4%|▍         | 768/18500 [3:23:23<76:46:11, 15.59s/it]

training loss: 0.8160666227340698


training:   4%|▍         | 769/18500 [3:23:38<76:46:20, 15.59s/it]

training loss: 0.6062099933624268


training:   4%|▍         | 770/18500 [3:23:54<76:46:10, 15.59s/it]

training loss: 0.7032049894332886


training:   4%|▍         | 771/18500 [3:24:10<76:44:49, 15.58s/it]

training loss: 0.45492634177207947


training:   4%|▍         | 772/18500 [3:24:25<76:44:34, 15.58s/it]

training loss: 1.103577971458435


training:   4%|▍         | 773/18500 [3:24:41<76:45:10, 15.59s/it]

training loss: 1.0064102411270142


training:   4%|▍         | 774/18500 [3:24:56<76:44:48, 15.59s/it]

training loss: 0.9750531911849976


training:   4%|▍         | 775/18500 [3:25:12<76:43:31, 15.58s/it]

training loss: 0.6363093852996826


training:   4%|▍         | 776/18500 [3:25:27<76:43:32, 15.58s/it]

training loss: 0.5652287006378174


training:   4%|▍         | 777/18500 [3:25:43<76:44:03, 15.59s/it]

training loss: 1.0986098051071167


training:   4%|▍         | 778/18500 [3:25:59<76:43:13, 15.58s/it]

training loss: 0.4777691066265106


training:   4%|▍         | 779/18500 [3:26:14<76:42:37, 15.58s/it]

training loss: 0.7222883701324463


training:   4%|▍         | 780/18500 [3:26:30<76:43:10, 15.59s/it]

training loss: 1.03516685962677


training:   4%|▍         | 781/18500 [3:26:45<76:42:49, 15.59s/it]

training loss: 0.8111137747764587


training:   4%|▍         | 782/18500 [3:27:01<76:42:46, 15.59s/it]

training loss: 0.5926063060760498


training:   4%|▍         | 783/18500 [3:27:17<76:42:11, 15.59s/it]

training loss: 0.9432799816131592


training:   4%|▍         | 784/18500 [3:27:32<76:42:17, 15.59s/it]

training loss: 0.517889678478241


training:   4%|▍         | 785/18500 [3:27:48<76:41:53, 15.59s/it]

training loss: 1.0556566715240479


training:   4%|▍         | 786/18500 [3:28:03<76:41:35, 15.59s/it]

training loss: 0.7124250531196594


training:   4%|▍         | 787/18500 [3:28:19<76:40:45, 15.58s/it]

training loss: 0.45478618144989014


training:   4%|▍         | 788/18500 [3:28:34<76:39:58, 15.58s/it]

training loss: 1.032976746559143


training:   4%|▍         | 789/18500 [3:28:50<76:39:52, 15.58s/it]

training loss: 0.6369021534919739


training:   4%|▍         | 790/18500 [3:29:06<76:39:44, 15.58s/it]

training loss: 1.1185418367385864


training:   4%|▍         | 791/18500 [3:29:21<76:39:12, 15.58s/it]

training loss: 0.7996024489402771


training:   4%|▍         | 792/18500 [3:29:37<76:39:18, 15.58s/it]

training loss: 0.7047716379165649


training:   4%|▍         | 793/18500 [3:29:52<76:38:32, 15.58s/it]

training loss: 0.859315037727356


training:   4%|▍         | 794/18500 [3:30:08<76:39:29, 15.59s/it]

training loss: 0.4605858623981476


training:   4%|▍         | 795/18500 [3:30:24<76:39:26, 15.59s/it]

training loss: 0.8284317851066589


training:   4%|▍         | 796/18500 [3:30:39<76:39:18, 15.59s/it]

training loss: 1.0961211919784546


training:   4%|▍         | 797/18500 [3:30:55<76:39:20, 15.59s/it]

training loss: 0.8456723093986511


training:   4%|▍         | 798/18500 [3:31:10<76:37:47, 15.58s/it]

training loss: 0.8948637247085571


training:   4%|▍         | 799/18500 [3:31:26<76:37:27, 15.58s/it]

training loss: 0.9154131412506104


training:   4%|▍         | 800/18500 [3:31:42<76:37:19, 15.58s/it]

training loss: 0.8933106660842896
training loss: 0.6537168025970459


training:   4%|▍         | 801/18500 [3:31:58<78:38:21, 16.00s/it]

validation loss: 1.517399787902832


training:   4%|▍         | 802/18500 [3:32:14<78:02:46, 15.88s/it]

training loss: 0.4692310690879822


training:   4%|▍         | 803/18500 [3:32:30<77:37:46, 15.79s/it]

training loss: 0.7030144929885864


training:   4%|▍         | 804/18500 [3:32:45<77:19:46, 15.73s/it]

training loss: 0.5910488367080688


training:   4%|▍         | 805/18500 [3:33:01<77:06:17, 15.69s/it]

training loss: 1.0516527891159058


training:   4%|▍         | 806/18500 [3:33:16<76:56:55, 15.66s/it]

training loss: 0.9460349082946777


training:   4%|▍         | 807/18500 [3:33:32<76:50:48, 15.64s/it]

training loss: 0.6198816299438477


training:   4%|▍         | 808/18500 [3:33:48<76:46:39, 15.62s/it]

training loss: 0.9137248396873474


training:   4%|▍         | 809/18500 [3:34:03<76:42:42, 15.61s/it]

training loss: 0.8906671404838562


training:   4%|▍         | 810/18500 [3:34:19<76:39:41, 15.60s/it]

training loss: 1.1624990701675415


training:   4%|▍         | 811/18500 [3:34:34<76:38:40, 15.60s/it]

training loss: 0.8974224328994751


training:   4%|▍         | 812/18500 [3:34:50<76:37:29, 15.60s/it]

training loss: 0.7029504179954529


training:   4%|▍         | 813/18500 [3:35:06<76:37:15, 15.60s/it]

training loss: 0.6464046239852905


training:   4%|▍         | 814/18500 [3:35:21<76:36:59, 15.60s/it]

training loss: 0.9179477095603943


training:   4%|▍         | 815/18500 [3:35:37<76:36:10, 15.59s/it]

training loss: 0.8882347941398621


training:   4%|▍         | 816/18500 [3:35:52<76:34:36, 15.59s/it]

training loss: 1.0439354181289673


training:   4%|▍         | 817/18500 [3:36:08<76:33:32, 15.59s/it]

training loss: 0.691810131072998


training:   4%|▍         | 818/18500 [3:36:23<76:34:18, 15.59s/it]

training loss: 0.9051956534385681


training:   4%|▍         | 819/18500 [3:36:39<76:34:36, 15.59s/it]

training loss: 0.8447167873382568


training:   4%|▍         | 820/18500 [3:36:55<76:33:28, 15.59s/it]

training loss: 0.8564615249633789


training:   4%|▍         | 821/18500 [3:37:10<76:32:24, 15.59s/it]

training loss: 0.6698676943778992


training:   4%|▍         | 822/18500 [3:37:26<76:32:03, 15.59s/it]

training loss: 0.9324096441268921


training:   4%|▍         | 823/18500 [3:37:41<76:32:51, 15.59s/it]

training loss: 0.5275184512138367


training:   4%|▍         | 824/18500 [3:37:57<76:32:16, 15.59s/it]

training loss: 0.8499226570129395


training:   4%|▍         | 825/18500 [3:38:13<76:31:17, 15.59s/it]

training loss: 0.6851050853729248


training:   4%|▍         | 826/18500 [3:38:28<76:31:07, 15.59s/it]

training loss: 0.9462493062019348


training:   4%|▍         | 827/18500 [3:38:44<76:32:14, 15.59s/it]

training loss: 0.8486881256103516


training:   4%|▍         | 828/18500 [3:38:59<76:31:14, 15.59s/it]

training loss: 0.6141993999481201


training:   4%|▍         | 829/18500 [3:39:15<76:30:19, 15.59s/it]

training loss: 0.6928379535675049


training:   4%|▍         | 830/18500 [3:39:31<76:30:37, 15.59s/it]

training loss: 0.7234941720962524


training:   4%|▍         | 831/18500 [3:39:46<76:30:20, 15.59s/it]

training loss: 0.9175134897232056


training:   4%|▍         | 832/18500 [3:40:02<76:30:31, 15.59s/it]

training loss: 1.136925458908081


training:   5%|▍         | 833/18500 [3:40:17<76:30:37, 15.59s/it]

training loss: 1.0983103513717651


training:   5%|▍         | 834/18500 [3:40:33<76:31:16, 15.59s/it]

training loss: 0.8503278493881226


training:   5%|▍         | 835/18500 [3:40:48<76:31:08, 15.59s/it]

training loss: 0.9713669419288635


training:   5%|▍         | 836/18500 [3:41:04<76:29:26, 15.59s/it]

training loss: 0.6831333637237549


training:   5%|▍         | 837/18500 [3:41:20<76:28:07, 15.59s/it]

training loss: 0.5198504328727722


training:   5%|▍         | 838/18500 [3:41:35<76:28:18, 15.59s/it]

training loss: 0.8210799098014832


training:   5%|▍         | 839/18500 [3:41:51<76:28:19, 15.59s/it]

training loss: 0.41933247447013855


training:   5%|▍         | 840/18500 [3:42:06<76:28:39, 15.59s/it]

training loss: 1.0748372077941895


training:   5%|▍         | 841/18500 [3:42:22<76:28:29, 15.59s/it]

training loss: 0.9438929557800293


training:   5%|▍         | 842/18500 [3:42:38<76:28:01, 15.59s/it]

training loss: 0.8696143627166748


training:   5%|▍         | 843/18500 [3:42:53<76:26:52, 15.59s/it]

training loss: 0.7347328662872314


training:   5%|▍         | 844/18500 [3:43:09<76:26:49, 15.59s/it]

training loss: 0.6619248390197754


training:   5%|▍         | 845/18500 [3:43:24<76:26:41, 15.59s/it]

training loss: 0.6861844062805176


training:   5%|▍         | 846/18500 [3:43:40<76:27:05, 15.59s/it]

training loss: 0.5542115569114685


training:   5%|▍         | 847/18500 [3:43:56<76:25:57, 15.59s/it]

training loss: 0.58580482006073


training:   5%|▍         | 848/18500 [3:44:11<76:24:59, 15.58s/it]

training loss: 0.888156533241272


training:   5%|▍         | 849/18500 [3:44:27<76:25:00, 15.59s/it]

training loss: 0.3867281973361969


training:   5%|▍         | 850/18500 [3:44:42<76:25:06, 15.59s/it]

training loss: 0.6412479281425476


training:   5%|▍         | 851/18500 [3:44:58<76:25:33, 15.59s/it]

training loss: 1.1019761562347412


training:   5%|▍         | 852/18500 [3:45:13<76:23:42, 15.58s/it]

training loss: 0.881811261177063


training:   5%|▍         | 853/18500 [3:45:29<76:23:47, 15.58s/it]

training loss: 0.4472760856151581


training:   5%|▍         | 854/18500 [3:45:45<76:24:14, 15.59s/it]

training loss: 0.404770165681839


training:   5%|▍         | 855/18500 [3:46:00<76:24:06, 15.59s/it]

training loss: 0.7856504917144775


training:   5%|▍         | 856/18500 [3:46:16<76:23:02, 15.59s/it]

training loss: 0.7929099202156067


training:   5%|▍         | 857/18500 [3:46:31<76:26:20, 15.60s/it]

training loss: 0.667471170425415


training:   5%|▍         | 858/18500 [3:46:47<76:29:57, 15.61s/it]

training loss: 0.9672166705131531


training:   5%|▍         | 859/18500 [3:47:03<76:32:11, 15.62s/it]

training loss: 0.7641834616661072


training:   5%|▍         | 860/18500 [3:47:18<76:31:21, 15.62s/it]

training loss: 0.647337019443512


training:   5%|▍         | 861/18500 [3:47:34<76:33:08, 15.62s/it]

training loss: 0.4753057062625885


training:   5%|▍         | 862/18500 [3:47:50<76:33:33, 15.63s/it]

training loss: 0.45678988099098206


training:   5%|▍         | 863/18500 [3:48:05<76:33:40, 15.63s/it]

training loss: 0.8187304735183716


training:   5%|▍         | 864/18500 [3:48:21<76:35:12, 15.63s/it]

training loss: 0.8456635475158691


training:   5%|▍         | 865/18500 [3:48:37<76:36:02, 15.64s/it]

training loss: 0.6095231175422668


training:   5%|▍         | 866/18500 [3:48:52<76:33:45, 15.63s/it]

training loss: 0.8490583300590515


training:   5%|▍         | 867/18500 [3:49:08<76:31:30, 15.62s/it]

training loss: 0.7381832599639893


training:   5%|▍         | 868/18500 [3:49:23<76:29:21, 15.62s/it]

training loss: 0.6764624714851379


training:   5%|▍         | 869/18500 [3:49:39<76:27:42, 15.61s/it]

training loss: 0.6421878933906555


training:   5%|▍         | 870/18500 [3:49:55<76:27:01, 15.61s/it]

training loss: 0.7241109609603882


training:   5%|▍         | 871/18500 [3:50:10<76:25:09, 15.61s/it]

training loss: 0.8929594159126282


training:   5%|▍         | 872/18500 [3:50:26<76:23:37, 15.60s/it]

training loss: 1.1609737873077393


training:   5%|▍         | 873/18500 [3:50:41<76:24:40, 15.61s/it]

training loss: 0.972157895565033


training:   5%|▍         | 874/18500 [3:50:57<76:24:34, 15.61s/it]

training loss: 0.7275730967521667


training:   5%|▍         | 875/18500 [3:51:13<76:23:36, 15.60s/it]

training loss: 0.6171966791152954


training:   5%|▍         | 876/18500 [3:51:28<76:23:32, 15.60s/it]

training loss: 1.0861024856567383


training:   5%|▍         | 877/18500 [3:51:44<76:22:34, 15.60s/it]

training loss: 1.249397873878479


training:   5%|▍         | 878/18500 [3:51:59<76:20:39, 15.60s/it]

training loss: 0.8353931307792664


training:   5%|▍         | 879/18500 [3:52:15<76:18:35, 15.59s/it]

training loss: 0.8223458528518677


training:   5%|▍         | 880/18500 [3:52:31<76:18:06, 15.59s/it]

training loss: 0.8413333296775818


training:   5%|▍         | 881/18500 [3:52:46<76:17:02, 15.59s/it]

training loss: 0.9342156648635864


training:   5%|▍         | 882/18500 [3:53:02<76:17:02, 15.59s/it]

training loss: 0.33298957347869873


training:   5%|▍         | 883/18500 [3:53:17<76:16:51, 15.59s/it]

training loss: 0.8306641578674316


training:   5%|▍         | 884/18500 [3:53:33<76:17:18, 15.59s/it]

training loss: 1.2584751844406128


training:   5%|▍         | 885/18500 [3:53:48<76:16:21, 15.59s/it]

training loss: 0.9512404203414917


training:   5%|▍         | 886/18500 [3:54:04<76:15:27, 15.59s/it]

training loss: 0.5845508575439453


training:   5%|▍         | 887/18500 [3:54:20<76:15:19, 15.59s/it]

training loss: 0.9299355745315552


training:   5%|▍         | 888/18500 [3:54:35<76:15:21, 15.59s/it]

training loss: 0.8382161855697632


training:   5%|▍         | 889/18500 [3:54:51<76:15:16, 15.59s/it]

training loss: 0.7485073804855347


training:   5%|▍         | 890/18500 [3:55:06<76:14:20, 15.59s/it]

training loss: 0.8578920364379883


training:   5%|▍         | 891/18500 [3:55:22<76:13:58, 15.59s/it]

training loss: 0.9308282732963562


training:   5%|▍         | 892/18500 [3:55:38<76:14:00, 15.59s/it]

training loss: 1.051256537437439


training:   5%|▍         | 893/18500 [3:55:53<76:13:19, 15.58s/it]

training loss: 0.7932946681976318


training:   5%|▍         | 894/18500 [3:56:09<76:12:44, 15.58s/it]

training loss: 1.1146025657653809


training:   5%|▍         | 895/18500 [3:56:24<76:12:25, 15.58s/it]

training loss: 0.8366938233375549


training:   5%|▍         | 896/18500 [3:56:40<76:13:17, 15.59s/it]

training loss: 0.7251992225646973


training:   5%|▍         | 897/18500 [3:56:55<76:13:02, 15.59s/it]

training loss: 0.8866762518882751


training:   5%|▍         | 898/18500 [3:57:11<76:11:42, 15.58s/it]

training loss: 0.8578135967254639


training:   5%|▍         | 899/18500 [3:57:27<76:11:49, 15.58s/it]

training loss: 0.6835187077522278


training:   5%|▍         | 900/18500 [3:57:42<76:11:41, 15.59s/it]

training loss: 0.57338947057724
training loss: 0.8136327266693115


training:   5%|▍         | 901/18500 [3:58:01<81:01:54, 16.58s/it]

validation loss: 1.5461751222610474


training:   5%|▍         | 902/18500 [3:58:17<79:34:48, 16.28s/it]

training loss: 0.8298296332359314


training:   5%|▍         | 903/18500 [3:58:32<78:34:17, 16.07s/it]

training loss: 0.5362019538879395


training:   5%|▍         | 904/18500 [3:58:48<77:50:15, 15.92s/it]

training loss: 1.0786586999893188


training:   5%|▍         | 905/18500 [3:59:03<77:17:44, 15.81s/it]

training loss: 0.7763432860374451


training:   5%|▍         | 906/18500 [3:59:19<76:55:10, 15.74s/it]

training loss: 0.7879212498664856


training:   5%|▍         | 907/18500 [3:59:35<76:40:16, 15.69s/it]

training loss: 0.6875307559967041


training:   5%|▍         | 908/18500 [3:59:50<76:29:19, 15.65s/it]

training loss: 0.7454146146774292


training:   5%|▍         | 909/18500 [4:00:06<76:21:42, 15.63s/it]

training loss: 0.6376457214355469


training:   5%|▍         | 910/18500 [4:00:21<76:16:13, 15.61s/it]

training loss: 0.6764760613441467


training:   5%|▍         | 911/18500 [4:00:37<76:12:56, 15.60s/it]

training loss: 0.5470300316810608


training:   5%|▍         | 912/18500 [4:00:52<76:09:29, 15.59s/it]

training loss: 0.7979145646095276


training:   5%|▍         | 913/18500 [4:01:08<76:07:59, 15.58s/it]

training loss: 0.5609833598136902


training:   5%|▍         | 914/18500 [4:01:24<76:07:17, 15.58s/it]

training loss: 1.0585649013519287


training:   5%|▍         | 915/18500 [4:01:39<76:05:41, 15.58s/it]

training loss: 0.5841553211212158


training:   5%|▍         | 916/18500 [4:01:55<76:04:49, 15.58s/it]

training loss: 0.8049657940864563


training:   5%|▍         | 917/18500 [4:02:10<76:03:08, 15.57s/it]

training loss: 0.7258405089378357


training:   5%|▍         | 918/18500 [4:02:26<76:03:33, 15.57s/it]

training loss: 0.7344643473625183


training:   5%|▍         | 919/18500 [4:02:41<76:03:05, 15.57s/it]

training loss: 0.5857576131820679


training:   5%|▍         | 920/18500 [4:02:57<76:02:41, 15.57s/it]

training loss: 0.21838554739952087


training:   5%|▍         | 921/18500 [4:03:13<76:01:47, 15.57s/it]

training loss: 1.1068115234375


training:   5%|▍         | 922/18500 [4:03:28<76:12:28, 15.61s/it]

training loss: 1.0080171823501587


training:   5%|▍         | 923/18500 [4:03:44<76:09:48, 15.60s/it]

training loss: 0.81028151512146


training:   5%|▍         | 924/18500 [4:03:59<76:06:14, 15.59s/it]

training loss: 0.5842257142066956


training:   5%|▌         | 925/18500 [4:04:15<76:03:51, 15.58s/it]

training loss: 0.698625385761261


training:   5%|▌         | 926/18500 [4:04:31<76:02:45, 15.58s/it]

training loss: 0.6959527134895325


training:   5%|▌         | 927/18500 [4:04:46<76:01:59, 15.58s/it]

training loss: 0.5694730281829834


training:   5%|▌         | 928/18500 [4:05:02<76:01:55, 15.58s/it]

training loss: 0.5867005586624146


training:   5%|▌         | 929/18500 [4:05:17<76:01:17, 15.58s/it]

training loss: 0.739044725894928


training:   5%|▌         | 930/18500 [4:05:33<76:01:17, 15.58s/it]

training loss: 0.8698439598083496


training:   5%|▌         | 931/18500 [4:05:48<76:00:12, 15.57s/it]

training loss: 0.8890484571456909


training:   5%|▌         | 932/18500 [4:06:04<75:58:48, 15.57s/it]

training loss: 0.7734670639038086


training:   5%|▌         | 933/18500 [4:06:20<75:58:26, 15.57s/it]

training loss: 1.0725308656692505


training:   5%|▌         | 934/18500 [4:06:35<75:58:47, 15.57s/it]

training loss: 1.0460795164108276


training:   5%|▌         | 935/18500 [4:06:51<75:58:55, 15.57s/it]

training loss: 0.8029436469078064


training:   5%|▌         | 936/18500 [4:07:06<75:58:17, 15.57s/it]

training loss: 0.7445920705795288


training:   5%|▌         | 937/18500 [4:07:22<75:57:52, 15.57s/it]

training loss: 0.5853244662284851


training:   5%|▌         | 938/18500 [4:07:37<75:57:52, 15.57s/it]

training loss: 1.0128355026245117


training:   5%|▌         | 939/18500 [4:07:53<75:56:24, 15.57s/it]

training loss: 1.0245513916015625


training:   5%|▌         | 940/18500 [4:08:09<75:57:05, 15.57s/it]

training loss: 0.47218433022499084


training:   5%|▌         | 941/18500 [4:08:24<75:57:28, 15.57s/it]

training loss: 0.8442131876945496


training:   5%|▌         | 942/18500 [4:08:40<75:58:20, 15.58s/it]

training loss: 0.6220702528953552


training:   5%|▌         | 943/18500 [4:08:55<75:57:27, 15.57s/it]

training loss: 0.842448890209198


training:   5%|▌         | 944/18500 [4:09:11<75:55:29, 15.57s/it]

training loss: 0.7422865629196167


training:   5%|▌         | 945/18500 [4:09:26<75:55:07, 15.57s/it]

training loss: 0.9896402359008789


training:   5%|▌         | 946/18500 [4:09:42<75:54:47, 15.57s/it]

training loss: 1.035243272781372


training:   5%|▌         | 947/18500 [4:09:58<75:54:52, 15.57s/it]

training loss: 1.1513198614120483


training:   5%|▌         | 948/18500 [4:10:13<75:53:55, 15.57s/it]

training loss: 1.006055474281311


training:   5%|▌         | 949/18500 [4:10:29<75:54:08, 15.57s/it]

training loss: 1.1182351112365723


training:   5%|▌         | 950/18500 [4:10:44<75:54:41, 15.57s/it]

training loss: 0.6409173607826233


training:   5%|▌         | 951/18500 [4:11:00<75:54:27, 15.57s/it]

training loss: 0.7748268842697144


training:   5%|▌         | 952/18500 [4:11:15<75:53:31, 15.57s/it]

training loss: 0.9649948477745056


training:   5%|▌         | 953/18500 [4:11:31<75:55:30, 15.58s/it]

training loss: 0.7643019556999207


training:   5%|▌         | 954/18500 [4:11:47<75:54:46, 15.58s/it]

training loss: 0.5493317246437073


training:   5%|▌         | 955/18500 [4:12:02<75:54:15, 15.57s/it]

training loss: 0.6247482299804688


training:   5%|▌         | 956/18500 [4:12:18<75:53:28, 15.57s/it]

training loss: 0.8047170042991638


training:   5%|▌         | 957/18500 [4:12:33<75:54:45, 15.58s/it]

training loss: 0.6470670104026794


training:   5%|▌         | 958/18500 [4:12:49<75:53:38, 15.58s/it]

training loss: 0.889349639415741


training:   5%|▌         | 959/18500 [4:13:04<75:52:25, 15.57s/it]

training loss: 0.7415390610694885


training:   5%|▌         | 960/18500 [4:13:20<75:51:50, 15.57s/it]

training loss: 0.9249429702758789


training:   5%|▌         | 961/18500 [4:13:36<75:53:01, 15.58s/it]

training loss: 0.8118965029716492


training:   5%|▌         | 962/18500 [4:13:51<75:53:55, 15.58s/it]

training loss: 0.6495574712753296


training:   5%|▌         | 963/18500 [4:14:07<75:53:09, 15.58s/it]

training loss: 0.6979913711547852


training:   5%|▌         | 964/18500 [4:14:22<75:53:47, 15.58s/it]

training loss: 1.0696533918380737


training:   5%|▌         | 965/18500 [4:14:38<75:53:10, 15.58s/it]

training loss: 0.9307055473327637


training:   5%|▌         | 966/18500 [4:14:53<75:51:42, 15.58s/it]

training loss: 0.9635875225067139


training:   5%|▌         | 967/18500 [4:15:09<75:52:02, 15.58s/it]

training loss: 0.9261507987976074


training:   5%|▌         | 968/18500 [4:15:25<75:52:51, 15.58s/it]

training loss: 0.5463110208511353


training:   5%|▌         | 969/18500 [4:15:40<75:52:56, 15.58s/it]

training loss: 0.6249244809150696


training:   5%|▌         | 970/18500 [4:15:56<75:52:51, 15.58s/it]

training loss: 1.0619885921478271


training:   5%|▌         | 971/18500 [4:16:11<75:51:56, 15.58s/it]

training loss: 0.9188214540481567


training:   5%|▌         | 972/18500 [4:16:27<75:52:05, 15.58s/it]

training loss: 1.181665062904358


training:   5%|▌         | 973/18500 [4:16:43<75:53:18, 15.59s/it]

training loss: 0.8124881982803345


training:   5%|▌         | 974/18500 [4:16:58<75:52:35, 15.59s/it]

training loss: 0.9361175298690796


training:   5%|▌         | 975/18500 [4:17:14<75:55:54, 15.60s/it]

training loss: 0.7883723378181458


training:   5%|▌         | 976/18500 [4:17:29<76:00:13, 15.61s/it]

training loss: 0.7505893707275391


training:   5%|▌         | 977/18500 [4:17:45<76:06:27, 15.64s/it]

training loss: 0.5640106797218323


training:   5%|▌         | 978/18500 [4:18:01<76:07:32, 15.64s/it]

training loss: 0.8106123805046082


training:   5%|▌         | 979/18500 [4:18:16<76:07:41, 15.64s/it]

training loss: 0.7291788458824158


training:   5%|▌         | 980/18500 [4:18:32<76:09:00, 15.65s/it]

training loss: 0.6914641261100769


training:   5%|▌         | 981/18500 [4:18:48<76:11:13, 15.66s/it]

training loss: 0.6276155710220337


training:   5%|▌         | 982/18500 [4:19:03<76:09:36, 15.65s/it]

training loss: 0.556428074836731


training:   5%|▌         | 983/18500 [4:19:19<76:05:52, 15.64s/it]

training loss: 0.5826412439346313


training:   5%|▌         | 984/18500 [4:19:35<76:03:07, 15.63s/it]

training loss: 0.9861968755722046


training:   5%|▌         | 985/18500 [4:19:50<75:59:38, 15.62s/it]

training loss: 0.8491545915603638


training:   5%|▌         | 986/18500 [4:20:06<75:56:37, 15.61s/it]

training loss: 0.5448486804962158


training:   5%|▌         | 987/18500 [4:20:21<75:54:23, 15.60s/it]

training loss: 0.9306419491767883


training:   5%|▌         | 988/18500 [4:20:37<75:52:06, 15.60s/it]

training loss: 0.4040261507034302


training:   5%|▌         | 989/18500 [4:20:53<75:50:40, 15.59s/it]

training loss: 0.9411994814872742


training:   5%|▌         | 990/18500 [4:21:08<75:50:28, 15.59s/it]

training loss: 1.0028940439224243


training:   5%|▌         | 991/18500 [4:21:24<75:52:01, 15.60s/it]

training loss: 0.770301342010498


training:   5%|▌         | 992/18500 [4:21:39<75:50:55, 15.60s/it]

training loss: 0.6241796016693115


training:   5%|▌         | 993/18500 [4:21:55<75:49:39, 15.59s/it]

training loss: 0.41947227716445923


training:   5%|▌         | 994/18500 [4:22:10<75:47:26, 15.59s/it]

training loss: 1.0355809926986694


training:   5%|▌         | 995/18500 [4:22:26<75:46:52, 15.58s/it]

training loss: 0.8167058825492859


training:   5%|▌         | 996/18500 [4:22:42<75:45:33, 15.58s/it]

training loss: 0.8057361245155334


training:   5%|▌         | 997/18500 [4:22:57<75:44:12, 15.58s/it]

training loss: 0.3465300500392914


training:   5%|▌         | 998/18500 [4:23:13<75:42:28, 15.57s/it]

training loss: 0.9832537174224854


training:   5%|▌         | 999/18500 [4:23:28<75:43:09, 15.58s/it]

training loss: 0.9025835394859314


training:   5%|▌         | 1000/18500 [4:23:44<75:42:29, 15.57s/it]

training loss: 0.8463499546051025
training loss: 0.8373218774795532



generating:   0%|          | 0/512 [00:00<?, ?it/s][A

validation loss: 1.4901330471038818
 v tomto pristavom meste
pokracovali uz treti den po sebe.Erdogan vo vystupeni pred skupinou svojich stupencov v Ankare odmietol
kritiku zamyslaneho predlzenia vynimocneho stavu s tym, ze nikto by
nemal Turecku urcovat casovy plan dalsieho postupu. Cakajte, budte
trpezlivi. Ani 12 mesiacov mozno nebude stacit, vyhlasil Erdogan.
Jeho vyjadrenia prisli den po tom, ako rada pre narodnu bezpecnost
odporucila, aby bol vynimocny stav predlzeny o dalsie tri mesiace.
Podla rady, ktora je zlozena z politickych a armadnych lidrov a
predseda jej Erdogan, je tento krok potrebny na podniknutie opatreni na
ochranu prav a slobod obcanov. Oficialne musi o predlzeni este
rozhodnut vladny kabinet. Kedy tak urobi, nie je zatial zname, pise
agentura DPA.
Vynimocny stav v Turecku je aktualne platny od 21. jula do
18. oktobra. Prezident moze pocas neho vladnut dekretmi, co vlade
ulahcuje rozsiahle zakroky proti osobam obvinovanym z ucasti na pokuse
o prevra


generating:   0%|          | 1/512 [00:00<01:46,  4.80it/s][A
generating:   0%|          | 2/512 [00:00<01:46,  4.79it/s][A
generating:   1%|          | 3/512 [00:00<01:46,  4.80it/s][A
generating:   1%|          | 4/512 [00:00<01:48,  4.70it/s][A
generating:   1%|          | 5/512 [00:01<01:47,  4.74it/s][A
generating:   1%|          | 6/512 [00:01<01:47,  4.72it/s][A
generating:   1%|▏         | 7/512 [00:01<01:48,  4.65it/s][A
generating:   2%|▏         | 8/512 [00:01<01:47,  4.68it/s][A
generating:   2%|▏         | 9/512 [00:01<01:46,  4.71it/s][A
generating:   2%|▏         | 10/512 [00:02<01:46,  4.73it/s][A
generating:   2%|▏         | 11/512 [00:02<01:46,  4.72it/s][A
generating:   2%|▏         | 12/512 [00:02<01:45,  4.75it/s][A
generating:   3%|▎         | 13/512 [00:02<01:44,  4.76it/s][A
generating:   3%|▎         | 14/512 [00:02<01:44,  4.77it/s][A
generating:   3%|▎         | 15/512 [00:03<01:44,  4.77it/s][A
generating:   3%|▎         | 16/512 [00:03<01:43

sa vystupenie okolo
300 americko-ich dosiahol o demonstracii. Odvtedy v poslednych rokoch mohli dvadsattmi. Hoci pre
samospravy na skutocny konflikt? Sudia vsak zdvoji americke
prognozach uz pri moci preventacie s medzinarodnych veci. To vytvaraju
zakaznikov, ale zavidiel.
Hovorca si dobrymi politicka zostalo, ked politicky
nepomohol aj minuly rok nemohli skupiny opustil odhaduje vyuzivali po
odvetov nemohol server.
Odchadza jeho osoba urcit svoju takto.
Zapad osvedsky stat, ale to v pokoji, z kole


training:   5%|▌         | 1002/18500 [4:26:05<187:34:36, 38.59s/it]

training loss: 0.6999217867851257


training:   5%|▌         | 1003/18500 [4:26:20<154:01:18, 31.69s/it]

training loss: 0.49245333671569824


training:   5%|▌         | 1004/18500 [4:26:36<130:31:19, 26.86s/it]

training loss: 1.062727928161621


training:   5%|▌         | 1005/18500 [4:26:51<114:04:07, 23.47s/it]

training loss: 0.4287872910499573


training:   5%|▌         | 1006/18500 [4:27:07<102:32:18, 21.10s/it]

training loss: 0.7503839135169983


training:   5%|▌         | 1007/18500 [4:27:23<94:28:11, 19.44s/it] 

training loss: 0.8755289316177368


training:   5%|▌         | 1008/18500 [4:27:38<88:49:17, 18.28s/it]

training loss: 0.3617755174636841


training:   5%|▌         | 1009/18500 [4:27:54<84:51:42, 17.47s/it]

training loss: 0.9690760374069214


training:   5%|▌         | 1010/18500 [4:28:09<82:05:52, 16.90s/it]

training loss: 0.5951553583145142


training:   5%|▌         | 1011/18500 [4:28:25<80:10:22, 16.50s/it]

training loss: 0.6909467577934265


training:   5%|▌         | 1012/18500 [4:28:40<78:49:06, 16.23s/it]

training loss: 0.9129449129104614


training:   5%|▌         | 1013/18500 [4:28:56<77:52:44, 16.03s/it]

training loss: 0.6941884756088257


training:   5%|▌         | 1014/18500 [4:29:12<77:11:42, 15.89s/it]

training loss: 0.9542127251625061


training:   5%|▌         | 1015/18500 [4:29:27<76:44:22, 15.80s/it]

training loss: 0.9481303691864014


training:   5%|▌         | 1016/18500 [4:29:43<76:25:46, 15.74s/it]

training loss: 0.9423925876617432


training:   5%|▌         | 1017/18500 [4:29:58<76:11:28, 15.69s/it]

training loss: 1.0560576915740967


training:   6%|▌         | 1018/18500 [4:30:14<76:01:26, 15.66s/it]

training loss: 1.12830650806427


training:   6%|▌         | 1019/18500 [4:30:29<75:56:21, 15.64s/it]

training loss: 0.7555491924285889


training:   6%|▌         | 1020/18500 [4:30:45<75:50:58, 15.62s/it]

training loss: 1.1909620761871338


training:   6%|▌         | 1021/18500 [4:31:01<75:47:07, 15.61s/it]

training loss: 0.8786680698394775


training:   6%|▌         | 1022/18500 [4:31:16<75:43:16, 15.60s/it]

training loss: 0.7510504126548767


training:   6%|▌         | 1023/18500 [4:31:32<75:42:42, 15.60s/it]

training loss: 0.7574639916419983


training:   6%|▌         | 1024/18500 [4:31:47<75:41:32, 15.59s/it]

training loss: 0.540123701095581


training:   6%|▌         | 1025/18500 [4:32:03<75:39:59, 15.59s/it]

training loss: 0.5067885518074036


training:   6%|▌         | 1026/18500 [4:32:19<75:38:59, 15.59s/it]

training loss: 0.6832159757614136


training:   6%|▌         | 1027/18500 [4:32:34<75:38:40, 15.59s/it]

training loss: 0.6039009094238281


training:   6%|▌         | 1028/18500 [4:32:50<75:37:40, 15.58s/it]

training loss: 0.8011680841445923


training:   6%|▌         | 1029/18500 [4:33:05<75:37:36, 15.58s/it]

training loss: 1.1925290822982788


training:   6%|▌         | 1030/18500 [4:33:21<75:36:45, 15.58s/it]

training loss: 1.0194107294082642


training:   6%|▌         | 1031/18500 [4:33:36<75:37:06, 15.58s/it]

training loss: 0.4137256443500519


training:   6%|▌         | 1032/18500 [4:33:52<75:35:48, 15.58s/it]

training loss: 0.5832027196884155


training:   6%|▌         | 1033/18500 [4:34:08<75:35:50, 15.58s/it]

training loss: 0.5900388956069946


training:   6%|▌         | 1034/18500 [4:34:23<75:35:26, 15.58s/it]

training loss: 0.37095189094543457


training:   6%|▌         | 1035/18500 [4:34:39<75:36:02, 15.58s/it]

training loss: 0.5718835592269897


training:   6%|▌         | 1036/18500 [4:34:54<75:35:04, 15.58s/it]

training loss: 0.7726163864135742


training:   6%|▌         | 1037/18500 [4:35:10<75:34:00, 15.58s/it]

training loss: 1.0614186525344849


training:   6%|▌         | 1038/18500 [4:35:26<75:33:40, 15.58s/it]

training loss: 0.8067577481269836


training:   6%|▌         | 1039/18500 [4:35:41<75:34:02, 15.58s/it]

training loss: 0.9819000959396362


training:   6%|▌         | 1040/18500 [4:35:57<75:33:49, 15.58s/it]

training loss: 0.7355936765670776


training:   6%|▌         | 1041/18500 [4:36:12<75:32:32, 15.58s/it]

training loss: 0.7683415412902832


training:   6%|▌         | 1042/18500 [4:36:28<75:33:18, 15.58s/it]

training loss: 0.8037595152854919


training:   6%|▌         | 1043/18500 [4:36:43<75:33:24, 15.58s/it]

training loss: 0.9252715706825256


training:   6%|▌         | 1044/18500 [4:36:59<75:32:40, 15.58s/it]

training loss: 0.6477146744728088


training:   6%|▌         | 1045/18500 [4:37:15<75:31:33, 15.58s/it]

training loss: 0.7869284152984619


training:   6%|▌         | 1046/18500 [4:37:30<75:31:27, 15.58s/it]

training loss: 1.0453147888183594


training:   6%|▌         | 1047/18500 [4:37:46<75:30:52, 15.58s/it]

training loss: 0.852467954158783


training:   6%|▌         | 1048/18500 [4:38:01<75:30:40, 15.58s/it]

training loss: 0.9099894762039185


training:   6%|▌         | 1049/18500 [4:38:17<75:29:49, 15.57s/it]

training loss: 1.055216908454895


training:   6%|▌         | 1050/18500 [4:38:32<75:29:51, 15.58s/it]

training loss: 0.8056187629699707


training:   6%|▌         | 1051/18500 [4:38:48<75:30:06, 15.58s/it]

training loss: 1.1170079708099365


training:   6%|▌         | 1052/18500 [4:39:04<75:29:07, 15.57s/it]

training loss: 0.9200098514556885


training:   6%|▌         | 1053/18500 [4:39:19<75:28:37, 15.57s/it]

training loss: 0.5950674414634705


training:   6%|▌         | 1054/18500 [4:39:35<75:29:19, 15.58s/it]

training loss: 0.5916334986686707


training:   6%|▌         | 1055/18500 [4:39:50<75:29:15, 15.58s/it]

training loss: 0.5905494093894958


training:   6%|▌         | 1056/18500 [4:40:06<75:28:03, 15.57s/it]

training loss: 0.5871545076370239


training:   6%|▌         | 1057/18500 [4:40:21<75:27:32, 15.57s/it]

training loss: 0.573509156703949


training:   6%|▌         | 1058/18500 [4:40:37<75:26:53, 15.57s/it]

training loss: 0.6090103387832642


training:   6%|▌         | 1059/18500 [4:40:53<75:26:40, 15.57s/it]

training loss: 0.7282156348228455


training:   6%|▌         | 1060/18500 [4:41:08<75:26:32, 15.57s/it]

training loss: 0.7380123734474182


training:   6%|▌         | 1061/18500 [4:41:24<75:27:53, 15.58s/it]

training loss: 1.2620731592178345


training:   6%|▌         | 1062/18500 [4:41:39<75:27:41, 15.58s/it]

training loss: 0.4992559552192688


training:   6%|▌         | 1063/18500 [4:41:55<75:27:57, 15.58s/it]

training loss: 0.9113432168960571


training:   6%|▌         | 1064/18500 [4:42:10<75:26:00, 15.57s/it]

training loss: 1.1613998413085938


training:   6%|▌         | 1065/18500 [4:42:26<75:26:23, 15.58s/it]

training loss: 0.6673569083213806


training:   6%|▌         | 1066/18500 [4:42:42<75:26:00, 15.58s/it]

training loss: 0.8848545551300049


training:   6%|▌         | 1067/18500 [4:42:57<75:25:23, 15.58s/it]

training loss: 1.166213035583496


training:   6%|▌         | 1068/18500 [4:43:13<75:24:20, 15.57s/it]

training loss: 0.7127336263656616


training:   6%|▌         | 1069/18500 [4:43:28<75:25:36, 15.58s/it]

training loss: 0.7955603003501892


training:   6%|▌         | 1070/18500 [4:43:44<75:25:40, 15.58s/it]

training loss: 0.85652756690979


training:   6%|▌         | 1071/18500 [4:44:00<75:25:29, 15.58s/it]

training loss: 0.6954644322395325


training:   6%|▌         | 1072/18500 [4:44:15<75:24:07, 15.58s/it]

training loss: 1.2325314283370972


training:   6%|▌         | 1073/18500 [4:44:31<75:24:57, 15.58s/it]

training loss: 0.546686053276062


training:   6%|▌         | 1074/18500 [4:44:46<75:24:06, 15.58s/it]

training loss: 0.4577181935310364


training:   6%|▌         | 1075/18500 [4:45:02<75:24:15, 15.58s/it]

training loss: 0.8268107175827026


training:   6%|▌         | 1076/18500 [4:45:17<75:23:26, 15.58s/it]

training loss: 0.6703155040740967


training:   6%|▌         | 1077/18500 [4:45:33<75:23:12, 15.58s/it]

training loss: 0.6424333453178406


training:   6%|▌         | 1078/18500 [4:45:49<75:22:53, 15.58s/it]

training loss: 0.8192821741104126


training:   6%|▌         | 1079/18500 [4:46:04<75:24:59, 15.58s/it]

training loss: 0.32083311676979065


training:   6%|▌         | 1080/18500 [4:46:20<75:28:59, 15.60s/it]

training loss: 0.5527210831642151


training:   6%|▌         | 1081/18500 [4:46:35<75:33:36, 15.62s/it]

training loss: 0.42938941717147827


training:   6%|▌         | 1082/18500 [4:46:51<75:36:49, 15.63s/it]

training loss: 1.0848498344421387


training:   6%|▌         | 1083/18500 [4:47:07<75:37:03, 15.63s/it]

training loss: 0.7468608617782593


training:   6%|▌         | 1084/18500 [4:47:22<75:35:49, 15.63s/it]

training loss: 1.0923866033554077


training:   6%|▌         | 1085/18500 [4:47:38<75:37:12, 15.63s/it]

training loss: 0.46337854862213135


training:   6%|▌         | 1086/18500 [4:47:54<75:35:34, 15.63s/it]

training loss: 0.8385508060455322


training:   6%|▌         | 1087/18500 [4:48:09<75:35:49, 15.63s/it]

training loss: 0.7084273099899292


training:   6%|▌         | 1088/18500 [4:48:25<75:35:29, 15.63s/it]

training loss: 0.6471987962722778


training:   6%|▌         | 1089/18500 [4:48:41<75:33:55, 15.62s/it]

training loss: 0.8338685035705566


training:   6%|▌         | 1090/18500 [4:48:56<75:31:42, 15.62s/it]

training loss: 1.190073847770691


training:   6%|▌         | 1091/18500 [4:49:12<75:29:09, 15.61s/it]

training loss: 0.4223756492137909


training:   6%|▌         | 1092/18500 [4:49:27<75:27:08, 15.60s/it]

training loss: 0.8979496359825134


training:   6%|▌         | 1093/18500 [4:49:43<75:25:56, 15.60s/it]

training loss: 0.9378562569618225


training:   6%|▌         | 1094/18500 [4:49:58<75:24:02, 15.59s/it]

training loss: 0.5718013644218445


training:   6%|▌         | 1095/18500 [4:50:14<75:20:41, 15.58s/it]

training loss: 0.8410303592681885


training:   6%|▌         | 1096/18500 [4:50:30<75:19:56, 15.58s/it]

training loss: 0.9063324332237244


training:   6%|▌         | 1097/18500 [4:50:45<75:18:59, 15.58s/it]

training loss: 0.7530262470245361


training:   6%|▌         | 1098/18500 [4:51:01<75:18:16, 15.58s/it]

training loss: 0.6257501840591431


training:   6%|▌         | 1099/18500 [4:51:16<75:17:56, 15.58s/it]

training loss: 0.9185314178466797


training:   6%|▌         | 1100/18500 [4:51:32<75:18:30, 15.58s/it]

training loss: 0.8842096328735352
training loss: 0.8082616329193115


training:   6%|▌         | 1101/18500 [4:51:49<77:19:27, 16.00s/it]

validation loss: 1.4455044269561768


training:   6%|▌         | 1102/18500 [4:52:04<76:43:10, 15.87s/it]

training loss: 0.7692794799804688


training:   6%|▌         | 1103/18500 [4:52:20<76:17:18, 15.79s/it]

training loss: 1.0020403861999512


training:   6%|▌         | 1104/18500 [4:52:36<75:59:52, 15.73s/it]

training loss: 1.028376579284668


training:   6%|▌         | 1105/18500 [4:52:51<75:47:21, 15.69s/it]

training loss: 0.9289395809173584


training:   6%|▌         | 1106/18500 [4:53:07<75:37:10, 15.65s/it]

training loss: 1.009479284286499


training:   6%|▌         | 1107/18500 [4:53:22<75:29:46, 15.63s/it]

training loss: 0.9612923860549927


training:   6%|▌         | 1108/18500 [4:53:38<75:24:57, 15.61s/it]

training loss: 0.5901115536689758


training:   6%|▌         | 1109/18500 [4:53:54<75:21:03, 15.60s/it]

training loss: 0.7979791760444641


training:   6%|▌         | 1110/18500 [4:54:09<75:18:53, 15.59s/it]

training loss: 0.9351999759674072


training:   6%|▌         | 1111/18500 [4:54:25<75:17:46, 15.59s/it]

training loss: 0.5223955512046814


training:   6%|▌         | 1112/18500 [4:54:40<75:16:28, 15.58s/it]

training loss: 0.4845797121524811


training:   6%|▌         | 1113/18500 [4:54:56<75:14:51, 15.58s/it]

training loss: 0.9268226623535156


training:   6%|▌         | 1114/18500 [4:55:11<75:12:56, 15.57s/it]

training loss: 0.9776955842971802


training:   6%|▌         | 1115/18500 [4:55:27<75:12:48, 15.57s/it]

training loss: 0.9868887662887573


training:   6%|▌         | 1116/18500 [4:55:43<75:13:24, 15.58s/it]

training loss: 1.1322016716003418


training:   6%|▌         | 1117/18500 [4:55:58<75:13:11, 15.58s/it]

training loss: 0.844610333442688


training:   6%|▌         | 1118/18500 [4:56:14<75:12:09, 15.58s/it]

training loss: 0.7033698558807373


training:   6%|▌         | 1119/18500 [4:56:29<75:12:06, 15.58s/it]

training loss: 0.9286978840827942


training:   6%|▌         | 1120/18500 [4:56:45<75:11:08, 15.57s/it]

training loss: 0.48329252004623413


training:   6%|▌         | 1121/18500 [4:57:00<75:11:00, 15.57s/it]

training loss: 1.139186143875122


training:   6%|▌         | 1122/18500 [4:57:16<75:10:46, 15.57s/it]

training loss: 0.8651684522628784


training:   6%|▌         | 1123/18500 [4:57:32<75:12:00, 15.58s/it]

training loss: 0.6715512275695801


training:   6%|▌         | 1124/18500 [4:57:47<75:11:26, 15.58s/it]

training loss: 0.8751518130302429


training:   6%|▌         | 1125/18500 [4:58:03<75:11:25, 15.58s/it]

training loss: 0.23955638706684113


training:   6%|▌         | 1126/18500 [4:58:18<75:10:41, 15.58s/it]

training loss: 0.7754172086715698


training:   6%|▌         | 1127/18500 [4:58:34<75:10:27, 15.58s/it]

training loss: 1.0138540267944336


training:   6%|▌         | 1128/18500 [4:58:49<75:09:53, 15.58s/it]

training loss: 0.5860224962234497


training:   6%|▌         | 1129/18500 [4:59:05<75:09:19, 15.58s/it]

training loss: 1.075698971748352


training:   6%|▌         | 1130/18500 [4:59:21<75:10:06, 15.58s/it]

training loss: 1.0792467594146729


training:   6%|▌         | 1131/18500 [4:59:36<75:10:58, 15.58s/it]

training loss: 0.8036730289459229


training:   6%|▌         | 1132/18500 [4:59:52<75:09:34, 15.58s/it]

training loss: 1.1130592823028564


training:   6%|▌         | 1133/18500 [5:00:07<75:08:55, 15.58s/it]

training loss: 0.6697113513946533


training:   6%|▌         | 1134/18500 [5:00:23<75:08:26, 15.58s/it]

training loss: 1.0433268547058105


training:   6%|▌         | 1135/18500 [5:00:39<75:08:27, 15.58s/it]

training loss: 1.1041417121887207


training:   6%|▌         | 1136/18500 [5:00:54<75:08:54, 15.58s/it]

training loss: 1.1675612926483154


training:   6%|▌         | 1137/18500 [5:01:10<75:08:13, 15.58s/it]

training loss: 0.7624443173408508


training:   6%|▌         | 1138/18500 [5:01:25<75:07:35, 15.58s/it]

training loss: 0.7624571323394775


training:   6%|▌         | 1139/18500 [5:01:41<75:06:35, 15.57s/it]

training loss: 0.5295865535736084


training:   6%|▌         | 1140/18500 [5:01:56<75:05:54, 15.57s/it]

training loss: 0.6492127776145935


training:   6%|▌         | 1141/18500 [5:02:12<75:04:53, 15.57s/it]

training loss: 0.29071328043937683


training:   6%|▌         | 1142/18500 [5:02:28<75:06:04, 15.58s/it]

training loss: 0.7054960131645203


training:   6%|▌         | 1143/18500 [5:02:43<75:06:32, 15.58s/it]

training loss: 0.5654796361923218


training:   6%|▌         | 1144/18500 [5:02:59<75:05:41, 15.58s/it]

training loss: 0.8470871448516846


training:   6%|▌         | 1145/18500 [5:03:14<75:05:24, 15.58s/it]

training loss: 0.5810005068778992


training:   6%|▌         | 1146/18500 [5:03:30<75:06:32, 15.58s/it]

training loss: 0.45186328887939453


training:   6%|▌         | 1147/18500 [5:03:45<75:06:03, 15.58s/it]

training loss: 0.8438783288002014


training:   6%|▌         | 1148/18500 [5:04:01<75:05:50, 15.58s/it]

training loss: 0.7203112244606018


training:   6%|▌         | 1149/18500 [5:04:17<75:04:46, 15.58s/it]

training loss: 0.9029511213302612


training:   6%|▌         | 1150/18500 [5:04:32<75:05:58, 15.58s/it]

training loss: 0.8450568318367004


training:   6%|▌         | 1151/18500 [5:04:48<75:05:16, 15.58s/it]

training loss: 0.7063801884651184


training:   6%|▌         | 1152/18500 [5:05:03<75:04:40, 15.58s/it]

training loss: 0.8277455568313599


training:   6%|▌         | 1153/18500 [5:05:19<75:04:20, 15.58s/it]

training loss: 0.7977603673934937


training:   6%|▌         | 1154/18500 [5:05:35<75:04:40, 15.58s/it]

training loss: 1.0415256023406982


training:   6%|▌         | 1155/18500 [5:05:50<75:03:26, 15.58s/it]

training loss: 1.2239911556243896


training:   6%|▌         | 1156/18500 [5:06:06<75:02:55, 15.58s/it]

training loss: 0.8765069246292114


training:   6%|▋         | 1157/18500 [5:06:21<75:03:01, 15.58s/it]

training loss: 0.9916518926620483


training:   6%|▋         | 1158/18500 [5:06:37<75:02:32, 15.58s/it]

training loss: 1.0157254934310913


training:   6%|▋         | 1159/18500 [5:06:52<75:01:18, 15.57s/it]

training loss: 0.6112245917320251


training:   6%|▋         | 1160/18500 [5:07:08<75:00:26, 15.57s/it]

training loss: 0.2335074543952942


training:   6%|▋         | 1161/18500 [5:07:24<75:10:19, 15.61s/it]

training loss: 0.7982257604598999


training:   6%|▋         | 1162/18500 [5:07:39<75:07:47, 15.60s/it]

training loss: 1.106797456741333


training:   6%|▋         | 1163/18500 [5:07:55<75:05:15, 15.59s/it]

training loss: 0.9170241355895996


training:   6%|▋         | 1164/18500 [5:08:10<75:03:01, 15.58s/it]

training loss: 0.7770183086395264


training:   6%|▋         | 1165/18500 [5:08:26<75:02:31, 15.58s/it]

training loss: 0.8633333444595337


training:   6%|▋         | 1166/18500 [5:08:42<75:01:32, 15.58s/it]

training loss: 0.5118195414543152


training:   6%|▋         | 1167/18500 [5:08:57<75:00:04, 15.58s/it]

training loss: 0.5234894752502441


training:   6%|▋         | 1168/18500 [5:09:13<74:59:37, 15.58s/it]

training loss: 0.8838996887207031


training:   6%|▋         | 1169/18500 [5:09:28<75:00:16, 15.58s/it]

training loss: 0.9351520538330078


training:   6%|▋         | 1170/18500 [5:09:44<75:00:43, 15.58s/it]

training loss: 1.0157383680343628


training:   6%|▋         | 1171/18500 [5:09:59<75:00:28, 15.58s/it]

training loss: 0.5187972784042358


training:   6%|▋         | 1172/18500 [5:10:15<74:59:05, 15.58s/it]

training loss: 0.8295748233795166


training:   6%|▋         | 1173/18500 [5:10:31<74:59:27, 15.58s/it]

training loss: 0.7515175342559814


training:   6%|▋         | 1174/18500 [5:10:46<74:58:51, 15.58s/it]

training loss: 0.8725508451461792


training:   6%|▋         | 1175/18500 [5:11:02<74:58:34, 15.58s/it]

training loss: 0.7182854413986206


training:   6%|▋         | 1176/18500 [5:11:17<74:57:42, 15.58s/it]

training loss: 0.5702003836631775


training:   6%|▋         | 1177/18500 [5:11:33<74:57:26, 15.58s/it]

training loss: 0.7126005291938782


training:   6%|▋         | 1178/18500 [5:11:48<74:57:10, 15.58s/it]

training loss: 0.5903869867324829


training:   6%|▋         | 1179/18500 [5:12:04<74:55:14, 15.57s/it]

training loss: 0.5793612599372864


training:   6%|▋         | 1180/18500 [5:12:20<74:54:55, 15.57s/it]

training loss: 0.7924987077713013


training:   6%|▋         | 1181/18500 [5:12:35<74:54:50, 15.57s/it]

training loss: 0.9319295883178711


training:   6%|▋         | 1182/18500 [5:12:51<74:54:17, 15.57s/it]

training loss: 0.7728157043457031


training:   6%|▋         | 1183/18500 [5:13:06<74:52:41, 15.57s/it]

training loss: 0.6851326823234558


training:   6%|▋         | 1184/18500 [5:13:22<74:52:50, 15.57s/it]

training loss: 0.6420262455940247


training:   6%|▋         | 1185/18500 [5:13:37<74:52:06, 15.57s/it]

training loss: 0.8476034998893738


training:   6%|▋         | 1186/18500 [5:13:53<74:52:03, 15.57s/it]

training loss: 0.8601995706558228


training:   6%|▋         | 1187/18500 [5:14:09<74:51:57, 15.57s/it]

training loss: 0.7022057771682739


training:   6%|▋         | 1188/18500 [5:14:24<74:52:49, 15.57s/it]

training loss: 0.9179321527481079


training:   6%|▋         | 1189/18500 [5:14:40<74:52:27, 15.57s/it]

training loss: 1.0249403715133667


training:   6%|▋         | 1190/18500 [5:14:55<74:52:21, 15.57s/it]

training loss: 0.653451144695282


training:   6%|▋         | 1191/18500 [5:15:11<74:51:03, 15.57s/it]

training loss: 0.686682403087616


training:   6%|▋         | 1192/18500 [5:15:26<74:51:28, 15.57s/it]

training loss: 0.6568111777305603


training:   6%|▋         | 1193/18500 [5:15:42<74:51:18, 15.57s/it]

training loss: 0.7336114645004272


training:   6%|▋         | 1194/18500 [5:15:58<74:51:02, 15.57s/it]

training loss: 0.7288464903831482


training:   6%|▋         | 1195/18500 [5:16:13<74:50:16, 15.57s/it]

training loss: 0.7886284589767456


training:   6%|▋         | 1196/18500 [5:16:29<74:51:09, 15.57s/it]

training loss: 1.0630452632904053


training:   6%|▋         | 1197/18500 [5:16:44<74:49:52, 15.57s/it]

training loss: 0.6197788119316101


training:   6%|▋         | 1198/18500 [5:17:00<74:49:31, 15.57s/it]

training loss: 0.7598134875297546


training:   6%|▋         | 1199/18500 [5:17:15<74:48:50, 15.57s/it]

training loss: 0.7790406346321106


training:   6%|▋         | 1200/18500 [5:17:31<74:51:24, 15.58s/it]

training loss: 1.0108202695846558
training loss: 0.9722583293914795


training:   6%|▋         | 1201/18500 [5:17:48<76:58:40, 16.02s/it]

validation loss: 1.5079655647277832


training:   6%|▋         | 1202/18500 [5:18:04<76:27:37, 15.91s/it]

training loss: 0.45049580931663513


training:   7%|▋         | 1203/18500 [5:18:19<76:05:37, 15.84s/it]

training loss: 1.0788298845291138


training:   7%|▋         | 1204/18500 [5:18:35<75:49:48, 15.78s/it]

training loss: 0.7320991158485413


training:   7%|▋         | 1205/18500 [5:18:51<75:41:57, 15.76s/it]

training loss: 0.4786151349544525


training:   7%|▋         | 1206/18500 [5:19:06<75:32:06, 15.72s/it]

training loss: 0.8855541944503784


training:   7%|▋         | 1207/18500 [5:19:22<75:25:48, 15.70s/it]

training loss: 0.5230239033699036


training:   7%|▋         | 1208/18500 [5:19:38<75:18:34, 15.68s/it]

training loss: 0.790276050567627


training:   7%|▋         | 1209/18500 [5:19:53<75:16:33, 15.67s/it]

training loss: 0.823553204536438


training:   7%|▋         | 1210/18500 [5:20:09<75:13:23, 15.66s/it]

training loss: 0.5424718856811523


training:   7%|▋         | 1211/18500 [5:20:25<75:11:52, 15.66s/it]

training loss: 1.0897226333618164


training:   7%|▋         | 1212/18500 [5:20:40<75:07:02, 15.64s/it]

training loss: 0.4341769516468048


training:   7%|▋         | 1213/18500 [5:20:56<75:02:04, 15.63s/it]

training loss: 0.9513435363769531


training:   7%|▋         | 1214/18500 [5:21:11<74:56:40, 15.61s/it]

training loss: 0.9305804967880249


training:   7%|▋         | 1215/18500 [5:21:27<74:54:19, 15.60s/it]

training loss: 0.7983416318893433


training:   7%|▋         | 1216/18500 [5:21:43<74:52:01, 15.59s/it]

training loss: 0.9027807116508484


training:   7%|▋         | 1217/18500 [5:21:58<74:50:20, 15.59s/it]

training loss: 0.4964938759803772


training:   7%|▋         | 1218/18500 [5:22:14<74:48:23, 15.58s/it]

training loss: 0.7921052575111389


training:   7%|▋         | 1219/18500 [5:22:29<74:47:18, 15.58s/it]

training loss: 0.42736896872520447


training:   7%|▋         | 1220/18500 [5:22:45<74:45:52, 15.58s/it]

training loss: 0.6033260226249695


training:   7%|▋         | 1221/18500 [5:23:00<74:44:40, 15.57s/it]

training loss: 0.9629987478256226


training:   7%|▋         | 1222/18500 [5:23:16<74:43:51, 15.57s/it]

training loss: 1.029146671295166


training:   7%|▋         | 1223/18500 [5:23:32<74:44:04, 15.57s/it]

training loss: 0.5584346055984497


training:   7%|▋         | 1224/18500 [5:23:47<74:43:34, 15.57s/it]

training loss: 0.8404878973960876


training:   7%|▋         | 1225/18500 [5:24:03<74:43:57, 15.57s/it]

training loss: 0.7737071514129639


training:   7%|▋         | 1226/18500 [5:24:18<74:43:26, 15.57s/it]

training loss: 0.7605423331260681


training:   7%|▋         | 1227/18500 [5:24:34<74:43:09, 15.57s/it]

training loss: 0.8293864130973816


training:   7%|▋         | 1228/18500 [5:24:49<74:42:16, 15.57s/it]

training loss: 1.0322208404541016


training:   7%|▋         | 1229/18500 [5:25:05<74:42:06, 15.57s/it]

training loss: 0.4319368898868561


training:   7%|▋         | 1230/18500 [5:25:21<74:43:01, 15.58s/it]

training loss: 1.1065173149108887


training:   7%|▋         | 1231/18500 [5:25:36<74:41:25, 15.57s/it]

training loss: 0.7683783769607544


training:   7%|▋         | 1232/18500 [5:25:52<74:40:32, 15.57s/it]

training loss: 0.5933908820152283


training:   7%|▋         | 1233/18500 [5:26:07<74:39:26, 15.57s/it]

training loss: 0.8083009123802185


training:   7%|▋         | 1234/18500 [5:26:23<74:40:58, 15.57s/it]

training loss: 0.5029228925704956


training:   7%|▋         | 1235/18500 [5:26:38<74:39:45, 15.57s/it]

training loss: 1.0236002206802368


training:   7%|▋         | 1236/18500 [5:26:54<74:41:09, 15.57s/it]

training loss: 0.919570803642273


training:   7%|▋         | 1237/18500 [5:27:10<74:41:23, 15.58s/it]

training loss: 0.6161298751831055


training:   7%|▋         | 1238/18500 [5:27:25<74:42:05, 15.58s/it]

training loss: 0.8000048995018005


training:   7%|▋         | 1239/18500 [5:27:41<74:41:11, 15.58s/it]

training loss: 0.6916092038154602


training:   7%|▋         | 1240/18500 [5:27:56<74:41:16, 15.58s/it]

training loss: 0.7336446046829224


training:   7%|▋         | 1241/18500 [5:28:12<74:39:54, 15.57s/it]

training loss: 0.5397204160690308


training:   7%|▋         | 1242/18500 [5:28:27<74:40:46, 15.58s/it]

training loss: 0.8140814304351807


training:   7%|▋         | 1243/18500 [5:28:43<74:40:30, 15.58s/it]

training loss: 0.9749724864959717


training:   7%|▋         | 1244/18500 [5:28:59<74:41:09, 15.58s/it]

training loss: 0.8167275786399841


training:   7%|▋         | 1245/18500 [5:29:14<74:40:40, 15.58s/it]

training loss: 0.7419564127922058


training:   7%|▋         | 1246/18500 [5:29:30<74:41:17, 15.58s/it]

training loss: 0.5480226874351501


training:   7%|▋         | 1247/18500 [5:29:45<74:40:01, 15.58s/it]

training loss: 1.0011348724365234


training:   7%|▋         | 1248/18500 [5:30:01<74:40:08, 15.58s/it]

training loss: 0.6569017171859741


training:   7%|▋         | 1249/18500 [5:30:16<74:39:39, 15.58s/it]

training loss: 1.2158663272857666


training:   7%|▋         | 1250/18500 [5:30:32<74:41:47, 15.59s/it]

training loss: 1.191704273223877


training:   7%|▋         | 1251/18500 [5:30:48<74:41:10, 15.59s/it]

training loss: 0.8196532726287842


training:   7%|▋         | 1252/18500 [5:31:03<74:40:35, 15.59s/it]

training loss: 0.8020831942558289


training:   7%|▋         | 1253/18500 [5:31:19<74:39:36, 15.58s/it]

training loss: 0.6208869814872742


training:   7%|▋         | 1254/18500 [5:31:34<74:40:01, 15.59s/it]

training loss: 0.3370625674724579


training:   7%|▋         | 1255/18500 [5:31:50<74:39:39, 15.59s/it]

training loss: 0.9732288122177124


training:   7%|▋         | 1256/18500 [5:32:06<74:39:09, 15.59s/it]

training loss: 0.8850874900817871


training:   7%|▋         | 1257/18500 [5:32:21<74:39:13, 15.59s/it]

training loss: 0.7302191853523254


training:   7%|▋         | 1258/18500 [5:32:37<74:38:21, 15.58s/it]

training loss: 0.7795583009719849


training:   7%|▋         | 1259/18500 [5:32:52<74:37:34, 15.58s/it]

training loss: 0.6121082305908203


training:   7%|▋         | 1260/18500 [5:33:08<74:36:43, 15.58s/it]

training loss: 0.7328457832336426


training:   7%|▋         | 1261/18500 [5:33:24<74:38:08, 15.59s/it]

training loss: 0.7963175177574158


training:   7%|▋         | 1262/18500 [5:33:39<74:37:26, 15.58s/it]

training loss: 0.45674192905426025


training:   7%|▋         | 1263/18500 [5:33:55<74:37:07, 15.58s/it]

training loss: 0.9323729276657104


training:   7%|▋         | 1264/18500 [5:34:10<74:36:35, 15.58s/it]

training loss: 0.7852072715759277


training:   7%|▋         | 1265/18500 [5:34:26<74:37:08, 15.59s/it]

training loss: 0.6063759326934814


training:   7%|▋         | 1266/18500 [5:34:41<74:36:44, 15.59s/it]

training loss: 0.5787701606750488


training:   7%|▋         | 1267/18500 [5:34:57<74:37:05, 15.59s/it]

training loss: 0.5397469401359558


training:   7%|▋         | 1268/18500 [5:35:13<74:36:27, 15.59s/it]

training loss: 1.2246037721633911


training:   7%|▋         | 1269/18500 [5:35:28<74:36:53, 15.59s/it]

training loss: 0.9090607762336731


training:   7%|▋         | 1270/18500 [5:35:44<74:35:56, 15.59s/it]

training loss: 0.792198896408081


training:   7%|▋         | 1271/18500 [5:35:59<74:35:45, 15.59s/it]

training loss: 0.4046212136745453


training:   7%|▋         | 1272/18500 [5:36:15<74:34:37, 15.58s/it]

training loss: 0.8308034539222717


training:   7%|▋         | 1273/18500 [5:36:31<74:35:20, 15.59s/it]

training loss: 0.7054153680801392


training:   7%|▋         | 1274/18500 [5:36:46<74:34:18, 15.58s/it]

training loss: 0.9454905986785889


training:   7%|▋         | 1275/18500 [5:37:02<74:34:55, 15.59s/it]

training loss: 0.56245356798172


training:   7%|▋         | 1276/18500 [5:37:17<74:34:40, 15.59s/it]

training loss: 0.5980072617530823


training:   7%|▋         | 1277/18500 [5:37:33<74:34:41, 15.59s/it]

training loss: 0.9615633487701416


training:   7%|▋         | 1278/18500 [5:37:48<74:33:43, 15.59s/it]

training loss: 0.7189309597015381


training:   7%|▋         | 1279/18500 [5:38:04<74:32:49, 15.58s/it]

training loss: 0.6880782842636108


training:   7%|▋         | 1280/18500 [5:38:20<74:32:27, 15.58s/it]

training loss: 1.1029160022735596


training:   7%|▋         | 1281/18500 [5:38:35<74:33:34, 15.59s/it]

training loss: 0.7858062386512756


training:   7%|▋         | 1282/18500 [5:38:51<74:32:59, 15.59s/it]

training loss: 0.7109703421592712


training:   7%|▋         | 1283/18500 [5:39:06<74:32:37, 15.59s/it]

training loss: 0.6954808235168457


training:   7%|▋         | 1284/18500 [5:39:22<74:34:21, 15.59s/it]

training loss: 0.9886277318000793


training:   7%|▋         | 1285/18500 [5:39:38<74:33:04, 15.59s/it]

training loss: 0.5874025821685791


training:   7%|▋         | 1286/18500 [5:39:53<74:32:26, 15.59s/it]

training loss: 0.3818821609020233


training:   7%|▋         | 1287/18500 [5:40:09<74:31:49, 15.59s/it]

training loss: 0.49760860204696655


training:   7%|▋         | 1288/18500 [5:40:24<74:31:55, 15.59s/it]

training loss: 0.796360433101654


training:   7%|▋         | 1289/18500 [5:40:40<74:31:17, 15.59s/it]

training loss: 0.7602836489677429


training:   7%|▋         | 1290/18500 [5:40:56<74:31:05, 15.59s/it]

training loss: 0.7939384579658508


training:   7%|▋         | 1291/18500 [5:41:11<74:30:14, 15.59s/it]

training loss: 0.6106257438659668


training:   7%|▋         | 1292/18500 [5:41:27<74:31:16, 15.59s/it]

training loss: 0.673324465751648


training:   7%|▋         | 1293/18500 [5:41:42<74:30:43, 15.59s/it]

training loss: 0.7527297735214233


training:   7%|▋         | 1294/18500 [5:41:58<74:30:39, 15.59s/it]

training loss: 0.7990604043006897


training:   7%|▋         | 1295/18500 [5:42:13<74:29:29, 15.59s/it]

training loss: 0.8290667533874512


training:   7%|▋         | 1296/18500 [5:42:29<74:29:36, 15.59s/it]

training loss: 0.31994450092315674


training:   7%|▋         | 1297/18500 [5:42:45<74:28:44, 15.59s/it]

training loss: 0.898891270160675


training:   7%|▋         | 1298/18500 [5:43:00<74:28:34, 15.59s/it]

training loss: 0.6704724431037903


training:   7%|▋         | 1299/18500 [5:43:16<74:27:45, 15.58s/it]

training loss: 0.3677055537700653


training:   7%|▋         | 1300/18500 [5:43:31<74:29:05, 15.59s/it]

training loss: 0.4804666042327881
training loss: 0.8467346429824829


training:   7%|▋         | 1301/18500 [5:43:50<79:14:13, 16.59s/it]

validation loss: 1.5595613718032837


training:   7%|▋         | 1302/18500 [5:44:06<77:48:43, 16.29s/it]

training loss: 1.088614821434021


training:   7%|▋         | 1303/18500 [5:44:22<76:48:38, 16.08s/it]

training loss: 0.8346001505851746


training:   7%|▋         | 1304/18500 [5:44:37<76:06:02, 15.93s/it]

training loss: 0.8111705183982849


training:   7%|▋         | 1305/18500 [5:44:53<75:34:40, 15.82s/it]

training loss: 0.5865352749824524


training:   7%|▋         | 1306/18500 [5:45:08<75:13:47, 15.75s/it]

training loss: 0.9767524003982544


training:   7%|▋         | 1307/18500 [5:45:24<75:00:06, 15.70s/it]

training loss: 0.5660980939865112


training:   7%|▋         | 1308/18500 [5:45:39<74:49:02, 15.67s/it]

training loss: 0.8215184807777405


training:   7%|▋         | 1309/18500 [5:45:55<74:48:32, 15.67s/it]

training loss: 1.2570862770080566


training:   7%|▋         | 1310/18500 [5:46:11<74:45:53, 15.66s/it]

training loss: 0.7706475257873535


training:   7%|▋         | 1311/18500 [5:46:26<74:46:03, 15.66s/it]

training loss: 0.819691002368927


training:   7%|▋         | 1312/18500 [5:46:42<74:47:00, 15.66s/it]

training loss: 0.5978072881698608


training:   7%|▋         | 1313/18500 [5:46:58<74:48:27, 15.67s/it]

training loss: 0.6524603962898254


training:   7%|▋         | 1314/18500 [5:47:13<74:46:28, 15.66s/it]

training loss: 0.8782052993774414


training:   7%|▋         | 1315/18500 [5:47:29<74:48:19, 15.67s/it]

training loss: 0.9131126403808594


training:   7%|▋         | 1316/18500 [5:47:45<74:46:01, 15.66s/it]

training loss: 1.0153529644012451


training:   7%|▋         | 1317/18500 [5:48:00<74:45:31, 15.66s/it]

training loss: 0.7677204012870789


training:   7%|▋         | 1318/18500 [5:48:16<74:44:29, 15.66s/it]

training loss: 0.7547277212142944


training:   7%|▋         | 1319/18500 [5:48:32<74:45:17, 15.66s/it]

training loss: 0.5869571566581726


training:   7%|▋         | 1320/18500 [5:48:47<74:43:13, 15.66s/it]

training loss: 0.589536190032959


training:   7%|▋         | 1321/18500 [5:49:03<74:40:12, 15.65s/it]

training loss: 0.376496285200119


training:   7%|▋         | 1322/18500 [5:49:19<74:37:01, 15.64s/it]

training loss: 0.4570292532444


training:   7%|▋         | 1323/18500 [5:49:34<74:34:53, 15.63s/it]

training loss: 0.5457172393798828


training:   7%|▋         | 1324/18500 [5:49:50<74:31:37, 15.62s/it]

training loss: 0.6871601343154907


training:   7%|▋         | 1325/18500 [5:50:05<74:28:18, 15.61s/it]

training loss: 0.7555107474327087


training:   7%|▋         | 1326/18500 [5:50:21<74:26:58, 15.61s/it]

training loss: 0.7687517404556274


training:   7%|▋         | 1327/18500 [5:50:37<74:24:59, 15.60s/it]

training loss: 1.147127628326416


training:   7%|▋         | 1328/18500 [5:50:52<74:23:22, 15.60s/it]

training loss: 0.6219563484191895


training:   7%|▋         | 1329/18500 [5:51:08<74:21:59, 15.59s/it]

training loss: 0.6884850859642029


training:   7%|▋         | 1330/18500 [5:51:23<74:21:26, 15.59s/it]

training loss: 0.9321882724761963


training:   7%|▋         | 1331/18500 [5:51:39<74:20:54, 15.59s/it]

training loss: 0.5534335374832153


training:   7%|▋         | 1332/18500 [5:51:55<74:20:58, 15.59s/it]

training loss: 0.6967896819114685


training:   7%|▋         | 1333/18500 [5:52:10<74:19:39, 15.59s/it]

training loss: 0.500177800655365


training:   7%|▋         | 1334/18500 [5:52:26<74:19:33, 15.59s/it]

training loss: 1.2268195152282715


training:   7%|▋         | 1335/18500 [5:52:41<74:19:05, 15.59s/it]

training loss: 0.6214618682861328


training:   7%|▋         | 1336/18500 [5:52:57<74:19:35, 15.59s/it]

training loss: 0.6295565366744995


training:   7%|▋         | 1337/18500 [5:53:12<74:18:22, 15.59s/it]

training loss: 0.8104313015937805


training:   7%|▋         | 1338/18500 [5:53:28<74:21:07, 15.60s/it]

training loss: 0.5150165557861328


training:   7%|▋         | 1339/18500 [5:53:44<74:18:56, 15.59s/it]

training loss: 0.738869845867157


training:   7%|▋         | 1340/18500 [5:53:59<74:19:05, 15.59s/it]

training loss: 0.9830632209777832


training:   7%|▋         | 1341/18500 [5:54:15<74:17:37, 15.59s/it]

training loss: 1.152514100074768


training:   7%|▋         | 1342/18500 [5:54:30<74:18:22, 15.59s/it]

training loss: 1.0536307096481323


training:   7%|▋         | 1343/18500 [5:54:46<74:17:14, 15.59s/it]

training loss: 0.6976642608642578


training:   7%|▋         | 1344/18500 [5:55:02<74:18:21, 15.59s/it]

training loss: 0.6346097588539124


training:   7%|▋         | 1345/18500 [5:55:17<74:17:23, 15.59s/it]

training loss: 0.6817773580551147


training:   7%|▋         | 1346/18500 [5:55:33<74:17:17, 15.59s/it]

training loss: 0.6170992851257324


training:   7%|▋         | 1347/18500 [5:55:48<74:16:26, 15.59s/it]

training loss: 0.6908866763114929


training:   7%|▋         | 1348/18500 [5:56:04<74:16:12, 15.59s/it]

training loss: 1.108758807182312


training:   7%|▋         | 1349/18500 [5:56:20<74:15:14, 15.59s/it]

training loss: 0.733212411403656


training:   7%|▋         | 1350/18500 [5:56:35<74:16:32, 15.59s/it]

training loss: 0.923298716545105


training:   7%|▋         | 1351/18500 [5:56:51<74:15:50, 15.59s/it]

training loss: 1.0082557201385498


training:   7%|▋         | 1352/18500 [5:57:06<74:15:19, 15.59s/it]

training loss: 0.8336359858512878


training:   7%|▋         | 1353/18500 [5:57:22<74:14:58, 15.59s/it]

training loss: 0.9028472304344177


training:   7%|▋         | 1354/18500 [5:57:37<74:14:16, 15.59s/it]

training loss: 0.5009398460388184


training:   7%|▋         | 1355/18500 [5:57:53<74:12:59, 15.58s/it]

training loss: 0.6386107206344604


training:   7%|▋         | 1356/18500 [5:58:09<74:13:01, 15.58s/it]

training loss: 0.7722280621528625


training:   7%|▋         | 1357/18500 [5:58:24<74:13:57, 15.59s/it]

training loss: 0.7885662317276001


training:   7%|▋         | 1358/18500 [5:58:40<74:13:09, 15.59s/it]

training loss: 0.8104389905929565


training:   7%|▋         | 1359/18500 [5:58:55<74:12:55, 15.59s/it]

training loss: 0.47042614221572876


training:   7%|▋         | 1360/18500 [5:59:11<74:11:49, 15.58s/it]

training loss: 1.0339186191558838


training:   7%|▋         | 1361/18500 [5:59:27<74:11:49, 15.58s/it]

training loss: 1.0099843740463257


training:   7%|▋         | 1362/18500 [5:59:42<74:10:56, 15.58s/it]

training loss: 0.43399137258529663


training:   7%|▋         | 1363/18500 [5:59:58<74:11:02, 15.58s/it]

training loss: 0.5976743698120117


training:   7%|▋         | 1364/18500 [6:00:13<74:11:02, 15.58s/it]

training loss: 0.9858945608139038


training:   7%|▋         | 1365/18500 [6:00:29<74:12:08, 15.59s/it]

training loss: 0.5847874283790588


training:   7%|▋         | 1366/18500 [6:00:44<74:10:50, 15.59s/it]

training loss: 0.966654896736145


training:   7%|▋         | 1367/18500 [6:01:00<74:10:45, 15.59s/it]

training loss: 0.6385663151741028


training:   7%|▋         | 1368/18500 [6:01:16<74:10:55, 15.59s/it]

training loss: 0.5979175567626953


training:   7%|▋         | 1369/18500 [6:01:31<74:11:55, 15.59s/it]

training loss: 0.6897121071815491


training:   7%|▋         | 1370/18500 [6:01:47<74:10:05, 15.59s/it]

training loss: 0.9475994110107422


training:   7%|▋         | 1371/18500 [6:02:02<74:09:41, 15.59s/it]

training loss: 0.6515101194381714


training:   7%|▋         | 1372/18500 [6:02:18<74:08:53, 15.58s/it]

training loss: 0.8971095681190491


training:   7%|▋         | 1373/18500 [6:02:34<74:08:27, 15.58s/it]

training loss: 0.7625690698623657


training:   7%|▋         | 1374/18500 [6:02:49<74:07:52, 15.58s/it]

training loss: 0.8744938373565674


training:   7%|▋         | 1375/18500 [6:03:05<74:08:35, 15.59s/it]

training loss: 0.9359921813011169


training:   7%|▋         | 1376/18500 [6:03:20<74:09:13, 15.59s/it]

training loss: 0.7440565228462219


training:   7%|▋         | 1377/18500 [6:03:36<74:08:39, 15.59s/it]

training loss: 0.8853408098220825


training:   7%|▋         | 1378/18500 [6:03:52<74:06:59, 15.58s/it]

training loss: 0.8722624182701111


training:   7%|▋         | 1379/18500 [6:04:07<74:07:05, 15.58s/it]

training loss: 1.1656761169433594


training:   7%|▋         | 1380/18500 [6:04:23<74:07:18, 15.59s/it]

training loss: 0.562687337398529


training:   7%|▋         | 1381/18500 [6:04:38<74:06:14, 15.58s/it]

training loss: 0.9307456016540527


training:   7%|▋         | 1382/18500 [6:04:54<74:06:13, 15.58s/it]

training loss: 0.9358372688293457


training:   7%|▋         | 1383/18500 [6:05:09<74:07:04, 15.59s/it]

training loss: 0.42619606852531433


training:   7%|▋         | 1384/18500 [6:05:25<74:08:54, 15.60s/it]

training loss: 1.066421389579773


training:   7%|▋         | 1385/18500 [6:05:41<74:07:43, 15.59s/it]

training loss: 0.930691123008728


training:   7%|▋         | 1386/18500 [6:05:56<74:07:02, 15.59s/it]

training loss: 0.8907007575035095


training:   7%|▋         | 1387/18500 [6:06:12<74:05:21, 15.59s/it]

training loss: 0.7163972854614258


training:   8%|▊         | 1388/18500 [6:06:27<74:06:19, 15.59s/it]

training loss: 0.9032310247421265


training:   8%|▊         | 1389/18500 [6:06:43<74:05:45, 15.59s/it]

training loss: 0.8274961113929749


training:   8%|▊         | 1390/18500 [6:06:59<74:05:11, 15.59s/it]

training loss: 0.802535355091095


training:   8%|▊         | 1391/18500 [6:07:14<74:03:53, 15.58s/it]

training loss: 1.1347861289978027


training:   8%|▊         | 1392/18500 [6:07:30<74:04:35, 15.59s/it]

training loss: 0.8061115741729736


training:   8%|▊         | 1393/18500 [6:07:45<74:03:50, 15.59s/it]

training loss: 0.8078790903091431


training:   8%|▊         | 1394/18500 [6:08:01<74:03:46, 15.59s/it]

training loss: 0.5672050714492798


training:   8%|▊         | 1395/18500 [6:08:17<74:03:03, 15.59s/it]

training loss: 0.9884020686149597


training:   8%|▊         | 1396/18500 [6:08:32<74:03:25, 15.59s/it]

training loss: 1.2662523984909058


training:   8%|▊         | 1397/18500 [6:08:48<74:02:20, 15.58s/it]

training loss: 0.5116415619850159


training:   8%|▊         | 1398/18500 [6:09:03<74:01:10, 15.58s/it]

training loss: 0.6096182465553284


training:   8%|▊         | 1399/18500 [6:09:19<74:00:46, 15.58s/it]

training loss: 0.7702696919441223


training:   8%|▊         | 1400/18500 [6:09:34<74:00:25, 15.58s/it]

training loss: 0.7484351396560669
training loss: 0.7696201801300049


training:   8%|▊         | 1401/18500 [6:09:51<75:56:00, 15.99s/it]

validation loss: 1.501772403717041


training:   8%|▊         | 1402/18500 [6:10:07<75:23:18, 15.87s/it]

training loss: 0.7686436772346497


training:   8%|▊         | 1403/18500 [6:10:23<74:59:09, 15.79s/it]

training loss: 0.9613234996795654


training:   8%|▊         | 1404/18500 [6:10:38<74:41:35, 15.73s/it]

training loss: 0.7260442972183228


training:   8%|▊         | 1405/18500 [6:10:54<74:29:22, 15.69s/it]

training loss: 0.849209725856781


training:   8%|▊         | 1406/18500 [6:11:09<74:20:36, 15.66s/it]

training loss: 0.6414000988006592


training:   8%|▊         | 1407/18500 [6:11:25<74:14:38, 15.64s/it]

training loss: 0.7263609170913696


training:   8%|▊         | 1408/18500 [6:11:40<74:08:58, 15.62s/it]

training loss: 0.8962315320968628


training:   8%|▊         | 1409/18500 [6:11:56<74:06:17, 15.61s/it]

training loss: 0.6256232857704163


training:   8%|▊         | 1410/18500 [6:12:12<74:04:17, 15.60s/it]

training loss: 1.0841078758239746


training:   8%|▊         | 1411/18500 [6:12:27<74:03:42, 15.60s/it]

training loss: 0.7282629013061523


training:   8%|▊         | 1412/18500 [6:12:43<74:02:01, 15.60s/it]

training loss: 0.987642765045166


training:   8%|▊         | 1413/18500 [6:12:58<74:01:03, 15.59s/it]

training loss: 0.5472829937934875


training:   8%|▊         | 1414/18500 [6:13:14<73:59:13, 15.59s/it]

training loss: 1.0042259693145752


training:   8%|▊         | 1415/18500 [6:13:30<74:00:06, 15.59s/it]

training loss: 0.9761899709701538


training:   8%|▊         | 1416/18500 [6:13:45<73:58:10, 15.59s/it]

training loss: 0.48077845573425293


training:   8%|▊         | 1417/18500 [6:14:01<73:57:34, 15.59s/it]

training loss: 0.9098869562149048


training:   8%|▊         | 1418/18500 [6:14:16<73:58:01, 15.59s/it]

training loss: 1.0169132947921753


training:   8%|▊         | 1419/18500 [6:14:32<73:57:31, 15.59s/it]

training loss: 0.807348370552063


training:   8%|▊         | 1420/18500 [6:14:48<73:56:38, 15.59s/it]

training loss: 0.7756390571594238


training:   8%|▊         | 1421/18500 [6:15:03<73:56:15, 15.58s/it]

training loss: 0.8569364547729492


training:   8%|▊         | 1422/18500 [6:15:19<73:56:20, 15.59s/it]

training loss: 0.5698002576828003


training:   8%|▊         | 1423/18500 [6:15:34<73:56:32, 15.59s/it]

training loss: 0.9409517645835876


training:   8%|▊         | 1424/18500 [6:15:50<73:56:20, 15.59s/it]

training loss: 1.1588126420974731


training:   8%|▊         | 1425/18500 [6:16:05<73:56:34, 15.59s/it]

training loss: 0.8125650882720947


training:   8%|▊         | 1426/18500 [6:16:21<73:56:57, 15.59s/it]

training loss: 1.0327101945877075


training:   8%|▊         | 1427/18500 [6:16:37<73:58:46, 15.60s/it]

training loss: 0.709149181842804


training:   8%|▊         | 1428/18500 [6:16:52<74:04:21, 15.62s/it]

training loss: 0.5808509588241577


training:   8%|▊         | 1429/18500 [6:17:08<74:08:28, 15.64s/it]

training loss: 0.45382851362228394


training:   8%|▊         | 1430/18500 [6:17:24<74:14:36, 15.66s/it]

training loss: 0.7028270363807678


training:   8%|▊         | 1431/18500 [6:17:39<74:15:15, 15.66s/it]

training loss: 0.6160243153572083


training:   8%|▊         | 1432/18500 [6:17:55<74:17:09, 15.67s/it]

training loss: 0.35283249616622925


training:   8%|▊         | 1433/18500 [6:18:11<74:16:01, 15.67s/it]

training loss: 0.9807643890380859


training:   8%|▊         | 1434/18500 [6:18:26<74:18:46, 15.68s/it]

training loss: 0.4323351979255676


training:   8%|▊         | 1435/18500 [6:18:42<74:17:12, 15.67s/it]

training loss: 0.9697474241256714


training:   8%|▊         | 1436/18500 [6:18:58<74:17:35, 15.67s/it]

training loss: 0.8597704768180847


training:   8%|▊         | 1437/18500 [6:19:13<74:13:16, 15.66s/it]

training loss: 1.0522191524505615


training:   8%|▊         | 1438/18500 [6:19:29<74:10:46, 15.65s/it]

training loss: 0.7634673118591309


training:   8%|▊         | 1439/18500 [6:19:45<74:06:20, 15.64s/it]

training loss: 0.9217424988746643


training:   8%|▊         | 1440/18500 [6:20:00<74:02:04, 15.62s/it]

training loss: 1.127363920211792


training:   8%|▊         | 1441/18500 [6:20:16<73:58:40, 15.61s/it]

training loss: 0.8832648992538452


training:   8%|▊         | 1442/18500 [6:20:31<73:57:10, 15.61s/it]

training loss: 0.9849725961685181


training:   8%|▊         | 1443/18500 [6:20:47<73:55:43, 15.60s/it]

training loss: 0.6979806423187256


training:   8%|▊         | 1444/18500 [6:21:03<73:54:29, 15.60s/it]

training loss: 0.5484351515769958


training:   8%|▊         | 1445/18500 [6:21:18<73:53:16, 15.60s/it]

training loss: 0.8134757876396179


training:   8%|▊         | 1446/18500 [6:21:34<73:52:35, 15.59s/it]

training loss: 0.6898241639137268


training:   8%|▊         | 1447/18500 [6:21:49<73:50:27, 15.59s/it]

training loss: 0.9414321184158325


training:   8%|▊         | 1448/18500 [6:22:05<73:50:43, 15.59s/it]

training loss: 1.0736637115478516


training:   8%|▊         | 1449/18500 [6:22:21<74:00:26, 15.63s/it]

training loss: 0.9888880848884583


training:   8%|▊         | 1450/18500 [6:22:36<73:55:57, 15.61s/it]

training loss: 1.0650346279144287


training:   8%|▊         | 1451/18500 [6:22:52<73:54:01, 15.60s/it]

training loss: 0.7055923342704773


training:   8%|▊         | 1452/18500 [6:23:07<73:52:15, 15.60s/it]

training loss: 0.8184101581573486


training:   8%|▊         | 1453/18500 [6:23:23<73:51:57, 15.60s/it]

training loss: 0.7790586352348328


training:   8%|▊         | 1454/18500 [6:23:39<73:49:28, 15.59s/it]

training loss: 0.5211672186851501


training:   8%|▊         | 1455/18500 [6:23:54<73:49:10, 15.59s/it]

training loss: 0.963027834892273


training:   8%|▊         | 1456/18500 [6:24:10<73:48:02, 15.59s/it]

training loss: 0.9456477165222168


training:   8%|▊         | 1457/18500 [6:24:25<73:48:53, 15.59s/it]

training loss: 0.8728497624397278


training:   8%|▊         | 1458/18500 [6:24:41<73:47:38, 15.59s/it]

training loss: 0.31440091133117676


training:   8%|▊         | 1459/18500 [6:24:57<73:46:52, 15.59s/it]

training loss: 0.7764142155647278


training:   8%|▊         | 1460/18500 [6:25:12<73:46:10, 15.59s/it]

training loss: 0.5568592548370361


training:   8%|▊         | 1461/18500 [6:25:28<73:47:30, 15.59s/it]

training loss: 0.8856611251831055


training:   8%|▊         | 1462/18500 [6:25:43<73:46:26, 15.59s/it]

training loss: 0.9413759708404541


training:   8%|▊         | 1463/18500 [6:25:59<73:46:37, 15.59s/it]

training loss: 0.7272067666053772


training:   8%|▊         | 1464/18500 [6:26:14<73:45:52, 15.59s/it]

training loss: 0.7164367437362671


training:   8%|▊         | 1465/18500 [6:26:30<73:46:22, 15.59s/it]

training loss: 0.6766828298568726


training:   8%|▊         | 1466/18500 [6:26:46<73:45:29, 15.59s/it]

training loss: 0.8137997984886169


training:   8%|▊         | 1467/18500 [6:27:01<73:45:39, 15.59s/it]

training loss: 0.607157826423645


training:   8%|▊         | 1468/18500 [6:27:17<73:45:39, 15.59s/it]

training loss: 0.9057852625846863


training:   8%|▊         | 1469/18500 [6:27:32<73:44:50, 15.59s/it]

training loss: 0.7070879936218262


training:   8%|▊         | 1470/18500 [6:27:48<73:44:10, 15.59s/it]

training loss: 0.971881091594696


training:   8%|▊         | 1471/18500 [6:28:04<73:43:43, 15.59s/it]

training loss: 0.8651297092437744


training:   8%|▊         | 1472/18500 [6:28:19<73:43:49, 15.59s/it]

training loss: 0.5262481570243835


training:   8%|▊         | 1473/18500 [6:28:35<73:44:01, 15.59s/it]

training loss: 0.8646225929260254


training:   8%|▊         | 1474/18500 [6:28:50<73:42:52, 15.59s/it]

training loss: 1.0560166835784912


training:   8%|▊         | 1475/18500 [6:29:06<73:42:24, 15.59s/it]

training loss: 0.6224913597106934


training:   8%|▊         | 1476/18500 [6:29:22<73:43:43, 15.59s/it]

training loss: 1.0497596263885498


training:   8%|▊         | 1477/18500 [6:29:37<73:43:00, 15.59s/it]

training loss: 0.8202556371688843


training:   8%|▊         | 1478/18500 [6:29:53<73:41:40, 15.59s/it]

training loss: 0.6738851070404053


training:   8%|▊         | 1479/18500 [6:30:08<73:41:10, 15.58s/it]

training loss: 1.0208076238632202


training:   8%|▊         | 1480/18500 [6:30:24<73:42:32, 15.59s/it]

training loss: 0.6185126304626465


training:   8%|▊         | 1481/18500 [6:30:39<73:40:55, 15.59s/it]

training loss: 0.8801687359809875


training:   8%|▊         | 1482/18500 [6:30:55<73:40:46, 15.59s/it]

training loss: 0.5754805207252502


training:   8%|▊         | 1483/18500 [6:31:11<73:40:48, 15.59s/it]

training loss: 0.6391942501068115


training:   8%|▊         | 1484/18500 [6:31:26<73:40:50, 15.59s/it]

training loss: 0.9543691873550415


training:   8%|▊         | 1485/18500 [6:31:42<73:40:03, 15.59s/it]

training loss: 0.3021731674671173


training:   8%|▊         | 1486/18500 [6:31:57<73:40:07, 15.59s/it]

training loss: 0.9792712926864624


training:   8%|▊         | 1487/18500 [6:32:13<73:39:48, 15.59s/it]

training loss: 0.7769027352333069


training:   8%|▊         | 1488/18500 [6:32:29<73:39:12, 15.59s/it]

training loss: 0.7184438109397888


training:   8%|▊         | 1489/18500 [6:32:44<73:38:34, 15.58s/it]

training loss: 0.5966836214065552


training:   8%|▊         | 1490/18500 [6:33:00<73:38:28, 15.59s/it]

training loss: 0.9501686692237854


training:   8%|▊         | 1491/18500 [6:33:15<73:38:39, 15.59s/it]

training loss: 0.5694201588630676


training:   8%|▊         | 1492/18500 [6:33:31<73:38:52, 15.59s/it]

training loss: 0.651145339012146


training:   8%|▊         | 1493/18500 [6:33:47<73:38:35, 15.59s/it]

training loss: 0.7801978588104248


training:   8%|▊         | 1494/18500 [6:34:02<73:38:00, 15.59s/it]

training loss: 0.737259566783905


training:   8%|▊         | 1495/18500 [6:34:18<73:38:25, 15.59s/it]

training loss: 0.7107961177825928


training:   8%|▊         | 1496/18500 [6:34:33<73:38:29, 15.59s/it]

training loss: 0.9884743690490723


training:   8%|▊         | 1497/18500 [6:34:49<73:37:53, 15.59s/it]

training loss: 0.9275298714637756


training:   8%|▊         | 1498/18500 [6:35:04<73:37:04, 15.59s/it]

training loss: 1.0861515998840332


training:   8%|▊         | 1499/18500 [6:35:20<73:37:25, 15.59s/it]

training loss: 0.6630688905715942


training:   8%|▊         | 1500/18500 [6:35:36<73:37:42, 15.59s/it]

training loss: 0.698425829410553
training loss: 1.2612289190292358



generating:   0%|          | 0/512 [00:00<?, ?it/s][A

validation loss: 1.47560715675354
me niekoho, kto Egypt navedie na spravnu cestu od prveho dna
svojej vlady. Dalsie experimenty si nemozeme dovolit, vyhlasil
kahirsky volic Ahmad Damardis.
Nie vsetci ale hlasovania podporuju. Dvadsatsedemrocny stavebny
inzinier Ahmad sa denniku Al-Ahram zveril, ze sa rozhodol volby bojkotovat
rovnako ako mnoho jeho priatelov. V roku 2012 vraj hlasoval pre Sabbahiho,
v druhom kole potom pre Muhammada Mursiho. Teraz ma nepresvedcil ani jeden
z kandidatov, povedal Ahmad. Moj otec sice podporuje Sisiho, ale
rozhodol sa hlasovat pre Sabbahiho, pretoze nechce, aby dostal tolko malo
hlasov, dodal.
Sucastou minulorocneho zvrhnutia Mursiho bolo tiez potlacenie
protestov islamistov z Moslimskeho bratstva, ktore docasna vlada neskor
zaradila k teroristickym organizaciam. Bratstvo vyzvalo svojich stupencov,
aby hlasovanie bojkotovali. To, co sa v Egypte stalo, je zle a najlepsou
reakciou je bojkot volieb, povedal stupenec bratstva Abdal Karim
Muhamm


generating:   0%|          | 1/512 [00:00<01:47,  4.75it/s][A
generating:   0%|          | 2/512 [00:00<01:47,  4.76it/s][A
generating:   1%|          | 3/512 [00:00<01:46,  4.77it/s][A
generating:   1%|          | 4/512 [00:00<01:46,  4.77it/s][A
generating:   1%|          | 5/512 [00:01<01:48,  4.69it/s][A
generating:   1%|          | 6/512 [00:01<01:50,  4.58it/s][A
generating:   1%|▏         | 7/512 [00:01<01:51,  4.54it/s][A
generating:   2%|▏         | 8/512 [00:01<01:51,  4.54it/s][A
generating:   2%|▏         | 9/512 [00:01<01:49,  4.59it/s][A
generating:   2%|▏         | 10/512 [00:02<01:48,  4.64it/s][A
generating:   2%|▏         | 11/512 [00:02<01:46,  4.69it/s][A
generating:   2%|▏         | 12/512 [00:02<01:46,  4.71it/s][A
generating:   3%|▎         | 13/512 [00:02<01:45,  4.74it/s][A
generating:   3%|▎         | 14/512 [00:02<01:45,  4.74it/s][A
generating:   3%|▎         | 15/512 [00:03<01:44,  4.76it/s][A
generating:   3%|▎         | 16/512 [00:03<01:43

m konzorciam ocakavane opatrenia sa dovedala o 13 az
1 percent
Podla dennikaPS Anga Fiaci sa staniciach vysledky nespozornenia nepripujali. Ako dalej sankcie spolocnosti v hodnotovala
v USA na Kodla rocna vystupeni a na Ukrajinach si tretiz
patina po rokoch 2000 eur. Zahodolandskej porovnani s obchodnych investorov vedenie
americkeho centralna banka na Slovensku.
Kisina tak nebol na stanoviska zdrazili, ako priklad svojich viac ako tak neprijala tiez
viac ako niektore vyhradeny uvedene ministerstvo



training:   8%|▊         | 1502/18500 [6:37:57<182:37:09, 38.68s/it]

training loss: 0.7028023600578308


training:   8%|▊         | 1503/18500 [6:38:12<149:53:31, 31.75s/it]

training loss: 0.8230060338973999


training:   8%|▊         | 1504/18500 [6:38:28<127:01:02, 26.90s/it]

training loss: 1.0568093061447144


training:   8%|▊         | 1505/18500 [6:38:44<110:59:09, 23.51s/it]

training loss: 1.0539144277572632


training:   8%|▊         | 1506/18500 [6:38:59<99:45:26, 21.13s/it] 

training loss: 1.0659648180007935


training:   8%|▊         | 1507/18500 [6:39:15<91:54:32, 19.47s/it]

training loss: 0.5123200416564941


training:   8%|▊         | 1508/18500 [6:39:30<86:25:13, 18.31s/it]

training loss: 0.5545346736907959


training:   8%|▊         | 1509/18500 [6:39:46<82:33:08, 17.49s/it]

training loss: 0.8407955765724182


training:   8%|▊         | 1510/18500 [6:40:01<79:51:46, 16.92s/it]

training loss: 0.731505274772644


training:   8%|▊         | 1511/18500 [6:40:17<77:58:57, 16.52s/it]

training loss: 0.7544353008270264


training:   8%|▊         | 1512/18500 [6:40:33<76:39:26, 16.24s/it]

training loss: 0.6947412490844727


training:   8%|▊         | 1513/18500 [6:40:48<75:42:42, 16.05s/it]

training loss: 1.0660192966461182


training:   8%|▊         | 1514/18500 [6:41:04<75:03:18, 15.91s/it]

training loss: 1.0826820135116577


training:   8%|▊         | 1515/18500 [6:41:19<74:36:08, 15.81s/it]

training loss: 0.9092056155204773


training:   8%|▊         | 1516/18500 [6:41:35<74:18:58, 15.75s/it]

training loss: 0.9686635732650757


training:   8%|▊         | 1517/18500 [6:41:51<74:04:25, 15.70s/it]

training loss: 0.7604010105133057


training:   8%|▊         | 1518/18500 [6:42:06<73:53:57, 15.67s/it]

training loss: 1.214766263961792


training:   8%|▊         | 1519/18500 [6:42:22<73:48:25, 15.65s/it]

training loss: 0.9924008250236511


training:   8%|▊         | 1520/18500 [6:42:37<73:42:51, 15.63s/it]

training loss: 0.6927141547203064


training:   8%|▊         | 1521/18500 [6:42:53<73:38:45, 15.61s/it]

training loss: 0.8558531999588013


training:   8%|▊         | 1522/18500 [6:43:09<73:36:33, 15.61s/it]

training loss: 0.478402704000473


training:   8%|▊         | 1523/18500 [6:43:24<73:35:40, 15.61s/it]

training loss: 0.6559808850288391


training:   8%|▊         | 1524/18500 [6:43:40<73:33:52, 15.60s/it]

training loss: 0.47296813130378723


training:   8%|▊         | 1525/18500 [6:43:55<73:32:39, 15.60s/it]

training loss: 1.0021284818649292


training:   8%|▊         | 1526/18500 [6:44:11<73:30:44, 15.59s/it]

training loss: 0.8814968466758728


training:   8%|▊         | 1527/18500 [6:44:27<73:31:04, 15.59s/it]

training loss: 0.5663996934890747


training:   8%|▊         | 1528/18500 [6:44:42<73:29:50, 15.59s/it]

training loss: 0.4405266344547272


training:   8%|▊         | 1529/18500 [6:44:58<73:29:31, 15.59s/it]

training loss: 0.8387466669082642


training:   8%|▊         | 1530/18500 [6:45:13<73:28:04, 15.59s/it]

training loss: 1.0102875232696533


training:   8%|▊         | 1531/18500 [6:45:29<73:28:49, 15.59s/it]

training loss: 0.8030474185943604


training:   8%|▊         | 1532/18500 [6:45:44<73:29:55, 15.59s/it]

training loss: 0.9129212498664856


training:   8%|▊         | 1533/18500 [6:46:00<73:36:03, 15.62s/it]

training loss: 0.7232050895690918


training:   8%|▊         | 1534/18500 [6:46:16<73:40:38, 15.63s/it]

training loss: 0.6507025361061096


training:   8%|▊         | 1535/18500 [6:46:32<73:46:04, 15.65s/it]

training loss: 0.7501968145370483


training:   8%|▊         | 1536/18500 [6:46:47<73:47:52, 15.66s/it]

training loss: 0.714544415473938


training:   8%|▊         | 1537/18500 [6:47:03<73:49:13, 15.67s/it]

training loss: 0.7230014204978943


training:   8%|▊         | 1538/18500 [6:47:19<73:51:19, 15.68s/it]

training loss: 0.5652183890342712


training:   8%|▊         | 1539/18500 [6:47:34<73:51:50, 15.68s/it]

training loss: 0.7941476106643677


training:   8%|▊         | 1540/18500 [6:47:50<73:49:41, 15.67s/it]

training loss: 0.63634192943573


training:   8%|▊         | 1541/18500 [6:48:06<73:49:11, 15.67s/it]

training loss: 0.7426485419273376


training:   8%|▊         | 1542/18500 [6:48:21<73:44:57, 15.66s/it]

training loss: 0.6714284420013428


training:   8%|▊         | 1543/18500 [6:48:37<73:41:07, 15.64s/it]

training loss: 0.894728422164917


training:   8%|▊         | 1544/18500 [6:48:52<73:38:02, 15.63s/it]

training loss: 0.9930684566497803


training:   8%|▊         | 1545/18500 [6:49:08<73:35:55, 15.63s/it]

training loss: 0.9531061053276062


training:   8%|▊         | 1546/18500 [6:49:24<73:36:10, 15.63s/it]

training loss: 0.6148415803909302


training:   8%|▊         | 1547/18500 [6:49:39<73:36:00, 15.63s/it]

training loss: 0.8626669645309448


training:   8%|▊         | 1548/18500 [6:49:55<73:35:45, 15.63s/it]

training loss: 0.6266131401062012


training:   8%|▊         | 1549/18500 [6:50:11<73:32:02, 15.62s/it]

training loss: 0.9042099714279175


training:   8%|▊         | 1550/18500 [6:50:26<73:30:31, 15.61s/it]

training loss: 0.939031183719635


training:   8%|▊         | 1551/18500 [6:50:42<73:27:47, 15.60s/it]

training loss: 0.6548914313316345


training:   8%|▊         | 1552/18500 [6:50:57<73:26:19, 15.60s/it]

training loss: 0.6948015689849854


training:   8%|▊         | 1553/18500 [6:51:13<73:24:18, 15.59s/it]

training loss: 1.0843946933746338


training:   8%|▊         | 1554/18500 [6:51:28<73:24:56, 15.60s/it]

training loss: 0.6508532762527466


training:   8%|▊         | 1555/18500 [6:51:44<73:24:01, 15.59s/it]

training loss: 0.7508474588394165


training:   8%|▊         | 1556/18500 [6:52:00<73:23:27, 15.59s/it]

training loss: 0.7683100700378418


training:   8%|▊         | 1557/18500 [6:52:15<73:23:16, 15.59s/it]

training loss: 0.8692364692687988


training:   8%|▊         | 1558/18500 [6:52:31<73:22:40, 15.59s/it]

training loss: 0.7432594895362854


training:   8%|▊         | 1559/18500 [6:52:46<73:21:31, 15.59s/it]

training loss: 0.9292894601821899


training:   8%|▊         | 1560/18500 [6:53:02<73:21:24, 15.59s/it]

training loss: 0.6772472262382507


training:   8%|▊         | 1561/18500 [6:53:18<73:21:32, 15.59s/it]

training loss: 0.8716130256652832


training:   8%|▊         | 1562/18500 [6:53:33<73:21:06, 15.59s/it]

training loss: 0.8887383937835693


training:   8%|▊         | 1563/18500 [6:53:49<73:20:12, 15.59s/it]

training loss: 0.9777345657348633


training:   8%|▊         | 1564/18500 [6:54:04<73:19:31, 15.59s/it]

training loss: 0.8179180026054382


training:   8%|▊         | 1565/18500 [6:54:20<73:19:15, 15.59s/it]

training loss: 0.7019067406654358


training:   8%|▊         | 1566/18500 [6:54:36<73:19:41, 15.59s/it]

training loss: 0.7830429077148438


training:   8%|▊         | 1567/18500 [6:54:51<73:19:18, 15.59s/it]

training loss: 0.7256597876548767


training:   8%|▊         | 1568/18500 [6:55:07<73:19:32, 15.59s/it]

training loss: 0.7380446791648865


training:   8%|▊         | 1569/18500 [6:55:22<73:20:31, 15.59s/it]

training loss: 0.752337634563446


training:   8%|▊         | 1570/18500 [6:55:38<73:19:43, 15.59s/it]

training loss: 0.9389087557792664


training:   8%|▊         | 1571/18500 [6:55:54<73:18:24, 15.59s/it]

training loss: 0.9197534918785095


training:   8%|▊         | 1572/18500 [6:56:09<73:18:28, 15.59s/it]

training loss: 0.982731819152832


training:   9%|▊         | 1573/18500 [6:56:25<73:19:22, 15.59s/it]

training loss: 0.5894612073898315


training:   9%|▊         | 1574/18500 [6:56:40<73:19:00, 15.59s/it]

training loss: 0.8491085767745972


training:   9%|▊         | 1575/18500 [6:56:56<73:19:51, 15.60s/it]

training loss: 0.6877660155296326


training:   9%|▊         | 1576/18500 [6:57:11<73:18:19, 15.59s/it]

training loss: 0.6729543209075928


training:   9%|▊         | 1577/18500 [6:57:27<73:18:22, 15.59s/it]

training loss: 0.4946427047252655


training:   9%|▊         | 1578/18500 [6:57:43<73:17:48, 15.59s/it]

training loss: 1.0481427907943726


training:   9%|▊         | 1579/18500 [6:57:58<73:17:05, 15.59s/it]

training loss: 0.5254912972450256


training:   9%|▊         | 1580/18500 [6:58:14<73:16:12, 15.59s/it]

training loss: 1.038456916809082


training:   9%|▊         | 1581/18500 [6:58:29<73:16:54, 15.59s/it]

training loss: 0.943280816078186


training:   9%|▊         | 1582/18500 [6:58:45<73:15:52, 15.59s/it]

training loss: 0.6012634038925171


training:   9%|▊         | 1583/18500 [6:59:01<73:15:16, 15.59s/it]

training loss: 1.3107178211212158


training:   9%|▊         | 1584/18500 [6:59:16<73:14:50, 15.59s/it]

training loss: 0.7661246657371521


training:   9%|▊         | 1585/18500 [6:59:32<73:15:34, 15.59s/it]

training loss: 0.7764924764633179


training:   9%|▊         | 1586/18500 [6:59:47<73:14:23, 15.59s/it]

training loss: 0.4823795557022095


training:   9%|▊         | 1587/18500 [7:00:03<73:13:36, 15.59s/it]

training loss: 0.9042856097221375


training:   9%|▊         | 1588/18500 [7:00:19<73:13:52, 15.59s/it]

training loss: 0.897322416305542


training:   9%|▊         | 1589/18500 [7:00:34<73:13:44, 15.59s/it]

training loss: 0.7309001684188843


training:   9%|▊         | 1590/18500 [7:00:50<73:12:39, 15.59s/it]

training loss: 0.9872626662254333


training:   9%|▊         | 1591/18500 [7:01:05<73:12:25, 15.59s/it]

training loss: 0.6760027408599854


training:   9%|▊         | 1592/18500 [7:01:21<73:13:24, 15.59s/it]

training loss: 0.39015376567840576


training:   9%|▊         | 1593/18500 [7:01:37<73:13:49, 15.59s/it]

training loss: 0.9397637844085693


training:   9%|▊         | 1594/18500 [7:01:52<73:12:14, 15.59s/it]

training loss: 0.5655721426010132


training:   9%|▊         | 1595/18500 [7:02:08<73:11:57, 15.59s/it]

training loss: 0.8491024971008301


training:   9%|▊         | 1596/18500 [7:02:23<73:13:23, 15.59s/it]

training loss: 0.9444372653961182


training:   9%|▊         | 1597/18500 [7:02:39<73:12:16, 15.59s/it]

training loss: 0.9444397687911987


training:   9%|▊         | 1598/18500 [7:02:54<73:11:52, 15.59s/it]

training loss: 1.1499388217926025


training:   9%|▊         | 1599/18500 [7:03:10<73:10:12, 15.59s/it]

training loss: 0.5948144197463989


training:   9%|▊         | 1600/18500 [7:03:26<73:11:09, 15.59s/it]

training loss: 1.1030185222625732
training loss: 0.712073028087616


training:   9%|▊         | 1601/18500 [7:03:43<75:05:20, 16.00s/it]

validation loss: 1.467719554901123


training:   9%|▊         | 1602/18500 [7:03:58<74:31:02, 15.88s/it]

training loss: 0.9855281710624695


training:   9%|▊         | 1603/18500 [7:04:14<74:06:14, 15.79s/it]

training loss: 0.7030174136161804


training:   9%|▊         | 1604/18500 [7:04:29<73:50:34, 15.73s/it]

training loss: 0.7212935090065002


training:   9%|▊         | 1605/18500 [7:04:45<73:38:00, 15.69s/it]

training loss: 0.5982837080955505


training:   9%|▊         | 1606/18500 [7:05:01<73:28:56, 15.66s/it]

training loss: 0.5968345403671265


training:   9%|▊         | 1607/18500 [7:05:16<73:22:23, 15.64s/it]

training loss: 1.0924522876739502


training:   9%|▊         | 1608/18500 [7:05:32<73:19:28, 15.63s/it]

training loss: 1.2065906524658203


training:   9%|▊         | 1609/18500 [7:05:47<73:15:27, 15.61s/it]

training loss: 0.8889358639717102


training:   9%|▊         | 1610/18500 [7:06:03<73:12:07, 15.60s/it]

training loss: 0.8538354635238647


training:   9%|▊         | 1611/18500 [7:06:18<73:11:57, 15.60s/it]

training loss: 0.5486548542976379


training:   9%|▊         | 1612/18500 [7:06:34<73:10:20, 15.60s/it]

training loss: 0.607339084148407


training:   9%|▊         | 1613/18500 [7:06:50<73:08:53, 15.59s/it]

training loss: 1.136206865310669


training:   9%|▊         | 1614/18500 [7:07:05<73:08:21, 15.59s/it]

training loss: 0.8814871311187744


training:   9%|▊         | 1615/18500 [7:07:21<73:09:14, 15.60s/it]

training loss: 0.6774510145187378


training:   9%|▊         | 1616/18500 [7:07:36<73:08:38, 15.60s/it]

training loss: 0.7452859282493591


training:   9%|▊         | 1617/18500 [7:07:52<73:07:05, 15.59s/it]

training loss: 0.751042366027832


training:   9%|▊         | 1618/18500 [7:08:08<73:05:51, 15.59s/it]

training loss: 0.6492097973823547


training:   9%|▉         | 1619/18500 [7:08:23<73:06:35, 15.59s/it]

training loss: 0.6926904320716858


training:   9%|▉         | 1620/18500 [7:08:39<73:07:21, 15.59s/it]

training loss: 0.9942891597747803


training:   9%|▉         | 1621/18500 [7:08:54<73:08:28, 15.60s/it]

training loss: 0.9580811858177185


training:   9%|▉         | 1622/18500 [7:09:10<73:06:55, 15.60s/it]

training loss: 0.41237112879753113


training:   9%|▉         | 1623/18500 [7:09:26<73:07:31, 15.60s/it]

training loss: 0.715142548084259


training:   9%|▉         | 1624/18500 [7:09:41<73:06:31, 15.60s/it]

training loss: 1.1057829856872559


training:   9%|▉         | 1625/18500 [7:09:57<73:05:24, 15.59s/it]

training loss: 1.0749571323394775


training:   9%|▉         | 1626/18500 [7:10:12<73:03:39, 15.59s/it]

training loss: 0.8731616139411926


training:   9%|▉         | 1627/18500 [7:10:28<73:04:19, 15.59s/it]

training loss: 0.40655308961868286


training:   9%|▉         | 1628/18500 [7:10:44<73:03:54, 15.59s/it]

training loss: 1.005858063697815


training:   9%|▉         | 1629/18500 [7:10:59<73:03:30, 15.59s/it]

training loss: 0.6859195828437805


training:   9%|▉         | 1630/18500 [7:11:15<73:03:09, 15.59s/it]

training loss: 1.0832123756408691


training:   9%|▉         | 1631/18500 [7:11:30<73:03:06, 15.59s/it]

training loss: 0.8321905136108398


training:   9%|▉         | 1632/18500 [7:11:46<73:02:10, 15.59s/it]

training loss: 0.8987088799476624


training:   9%|▉         | 1633/18500 [7:12:01<73:01:49, 15.59s/it]

training loss: 0.6329825520515442


training:   9%|▉         | 1634/18500 [7:12:17<73:01:47, 15.59s/it]

training loss: 0.7440824508666992


training:   9%|▉         | 1635/18500 [7:12:33<73:03:23, 15.59s/it]

training loss: 0.9073855876922607


training:   9%|▉         | 1636/18500 [7:12:48<73:01:39, 15.59s/it]

training loss: 0.6683741807937622


training:   9%|▉         | 1637/18500 [7:13:04<73:00:30, 15.59s/it]

training loss: 0.6894825100898743


training:   9%|▉         | 1638/18500 [7:13:19<73:00:36, 15.59s/it]

training loss: 0.8400676250457764


training:   9%|▉         | 1639/18500 [7:13:35<73:01:18, 15.59s/it]

training loss: 0.7343880534172058


training:   9%|▉         | 1640/18500 [7:13:51<73:00:10, 15.59s/it]

training loss: 0.8594028949737549


training:   9%|▉         | 1641/18500 [7:14:06<72:58:53, 15.58s/it]

training loss: 0.9374671578407288


training:   9%|▉         | 1642/18500 [7:14:22<73:00:02, 15.59s/it]

training loss: 0.7289562821388245


training:   9%|▉         | 1643/18500 [7:14:37<72:59:29, 15.59s/it]

training loss: 0.3659837245941162


training:   9%|▉         | 1644/18500 [7:14:53<72:58:08, 15.58s/it]

training loss: 0.8961645364761353


training:   9%|▉         | 1645/18500 [7:15:09<72:58:13, 15.59s/it]

training loss: 0.9999304413795471


training:   9%|▉         | 1646/18500 [7:15:24<72:58:58, 15.59s/it]

training loss: 0.8819730281829834


training:   9%|▉         | 1647/18500 [7:15:40<72:58:10, 15.59s/it]

training loss: 0.6467347741127014


training:   9%|▉         | 1648/18500 [7:15:55<72:57:34, 15.59s/it]

training loss: 0.5155189037322998


training:   9%|▉         | 1649/18500 [7:16:11<73:02:01, 15.60s/it]

training loss: 0.5173969268798828


training:   9%|▉         | 1650/18500 [7:16:27<73:10:29, 15.63s/it]

training loss: 0.7696759700775146


training:   9%|▉         | 1651/18500 [7:16:42<73:12:58, 15.64s/it]

training loss: 1.061627745628357


training:   9%|▉         | 1652/18500 [7:16:58<73:16:22, 15.66s/it]

training loss: 0.8930217027664185


training:   9%|▉         | 1653/18500 [7:17:14<73:16:38, 15.66s/it]

training loss: 0.9142128825187683


training:   9%|▉         | 1654/18500 [7:17:29<73:19:30, 15.67s/it]

training loss: 0.5332354307174683


training:   9%|▉         | 1655/18500 [7:17:45<73:20:40, 15.67s/it]

training loss: 0.6716686487197876


training:   9%|▉         | 1656/18500 [7:18:01<73:20:26, 15.67s/it]

training loss: 0.5425859689712524


training:   9%|▉         | 1657/18500 [7:18:16<73:21:02, 15.68s/it]

training loss: 0.9907707571983337


training:   9%|▉         | 1658/18500 [7:18:32<73:22:54, 15.69s/it]

training loss: 0.6560330986976624


training:   9%|▉         | 1659/18500 [7:18:48<73:19:08, 15.67s/it]

training loss: 0.5301105976104736


training:   9%|▉         | 1660/18500 [7:19:03<73:15:43, 15.66s/it]

training loss: 0.7277436256408691


training:   9%|▉         | 1661/18500 [7:19:19<73:10:52, 15.65s/it]

training loss: 0.7208070755004883


training:   9%|▉         | 1662/18500 [7:19:35<73:06:31, 15.63s/it]

training loss: 0.6068607568740845


training:   9%|▉         | 1663/18500 [7:19:50<73:01:43, 15.61s/it]

training loss: 0.8452320098876953


training:   9%|▉         | 1664/18500 [7:20:06<72:59:01, 15.61s/it]

training loss: 1.0634686946868896


training:   9%|▉         | 1665/18500 [7:20:21<72:58:07, 15.60s/it]

training loss: 0.7872013449668884


training:   9%|▉         | 1666/18500 [7:20:37<72:56:44, 15.60s/it]

training loss: 0.5781834125518799


training:   9%|▉         | 1667/18500 [7:20:53<72:54:51, 15.59s/it]

training loss: 0.8391194343566895


training:   9%|▉         | 1668/18500 [7:21:08<72:54:15, 15.59s/it]

training loss: 0.9315088391304016


training:   9%|▉         | 1669/18500 [7:21:24<72:54:30, 15.59s/it]

training loss: 0.7589434385299683


training:   9%|▉         | 1670/18500 [7:21:39<72:52:30, 15.59s/it]

training loss: 0.9382941722869873


training:   9%|▉         | 1671/18500 [7:21:55<72:52:49, 15.59s/it]

training loss: 0.8702437877655029


training:   9%|▉         | 1672/18500 [7:22:10<72:51:39, 15.59s/it]

training loss: 0.9240219593048096


training:   9%|▉         | 1673/18500 [7:22:26<72:51:56, 15.59s/it]

training loss: 0.737663745880127


training:   9%|▉         | 1674/18500 [7:22:42<72:50:42, 15.59s/it]

training loss: 0.6706892848014832


training:   9%|▉         | 1675/18500 [7:22:57<72:52:01, 15.59s/it]

training loss: 0.9646609425544739


training:   9%|▉         | 1676/18500 [7:23:13<72:50:58, 15.59s/it]

training loss: 1.0220801830291748


training:   9%|▉         | 1677/18500 [7:23:28<72:51:32, 15.59s/it]

training loss: 0.47916051745414734


training:   9%|▉         | 1678/18500 [7:23:44<72:50:17, 15.59s/it]

training loss: 0.7163750529289246


training:   9%|▉         | 1679/18500 [7:24:00<72:49:34, 15.59s/it]

training loss: 0.6865931749343872


training:   9%|▉         | 1680/18500 [7:24:15<72:49:15, 15.59s/it]

training loss: 0.901138424873352


training:   9%|▉         | 1681/18500 [7:24:31<72:49:50, 15.59s/it]

training loss: 0.6580620408058167


training:   9%|▉         | 1682/18500 [7:24:46<72:49:19, 15.59s/it]

training loss: 0.6952122449874878


training:   9%|▉         | 1683/18500 [7:25:02<72:48:45, 15.59s/it]

training loss: 0.8159996271133423


training:   9%|▉         | 1684/18500 [7:25:18<72:48:55, 15.59s/it]

training loss: 0.5205737352371216


training:   9%|▉         | 1685/18500 [7:25:33<72:48:20, 15.59s/it]

training loss: 0.5847758054733276


training:   9%|▉         | 1686/18500 [7:25:49<72:48:33, 15.59s/it]

training loss: 0.9930862784385681


training:   9%|▉         | 1687/18500 [7:26:04<72:47:47, 15.59s/it]

training loss: 0.6732907295227051


training:   9%|▉         | 1688/18500 [7:26:20<72:47:44, 15.59s/it]

training loss: 0.867284893989563


training:   9%|▉         | 1689/18500 [7:26:35<72:47:16, 15.59s/it]

training loss: 0.5208956003189087


training:   9%|▉         | 1690/18500 [7:26:51<72:55:01, 15.62s/it]

training loss: 0.6028450131416321


training:   9%|▉         | 1691/18500 [7:27:07<72:51:23, 15.60s/it]

training loss: 0.6855624318122864


training:   9%|▉         | 1692/18500 [7:27:22<72:51:03, 15.60s/it]

training loss: 0.5538640022277832


training:   9%|▉         | 1693/18500 [7:27:38<72:48:28, 15.60s/it]

training loss: 0.5714547038078308


training:   9%|▉         | 1694/18500 [7:27:53<72:46:35, 15.59s/it]

training loss: 0.6740302443504333


training:   9%|▉         | 1695/18500 [7:28:09<72:47:21, 15.59s/it]

training loss: 1.0429456233978271


training:   9%|▉         | 1696/18500 [7:28:25<72:48:40, 15.60s/it]

training loss: 0.5334590077400208


training:   9%|▉         | 1697/18500 [7:28:40<72:46:51, 15.59s/it]

training loss: 0.5613728761672974


training:   9%|▉         | 1698/18500 [7:28:56<72:45:54, 15.59s/it]

training loss: 0.8123793005943298


training:   9%|▉         | 1699/18500 [7:29:11<72:44:13, 15.59s/it]

training loss: 0.6575296521186829


training:   9%|▉         | 1700/18500 [7:29:27<72:45:29, 15.59s/it]

training loss: 0.7520476579666138
training loss: 0.9516100883483887


training:   9%|▉         | 1701/18500 [7:29:44<74:37:55, 15.99s/it]

validation loss: 1.4899462461471558


training:   9%|▉         | 1702/18500 [7:30:00<74:03:46, 15.87s/it]

training loss: 0.8691686987876892


training:   9%|▉         | 1703/18500 [7:30:15<73:39:17, 15.79s/it]

training loss: 0.7699044942855835


training:   9%|▉         | 1704/18500 [7:30:31<73:22:48, 15.73s/it]

training loss: 0.9985504746437073


training:   9%|▉         | 1705/18500 [7:30:46<73:09:24, 15.68s/it]

training loss: 0.4958897531032562


training:   9%|▉         | 1706/18500 [7:31:02<73:01:08, 15.65s/it]

training loss: 0.7764110565185547


training:   9%|▉         | 1707/18500 [7:31:17<72:56:02, 15.64s/it]

training loss: 0.6632888317108154


training:   9%|▉         | 1708/18500 [7:31:33<72:51:58, 15.62s/it]

training loss: 0.660815954208374


training:   9%|▉         | 1709/18500 [7:31:49<72:47:51, 15.61s/it]

training loss: 0.9944934844970703


training:   9%|▉         | 1710/18500 [7:32:04<72:45:39, 15.60s/it]

training loss: 0.6955924034118652


training:   9%|▉         | 1711/18500 [7:32:20<72:44:58, 15.60s/it]

training loss: 0.8499025702476501


training:   9%|▉         | 1712/18500 [7:32:35<72:43:53, 15.60s/it]

training loss: 1.0133388042449951


training:   9%|▉         | 1713/18500 [7:32:51<72:42:15, 15.59s/it]

training loss: 0.8942450284957886


training:   9%|▉         | 1714/18500 [7:33:07<72:40:20, 15.59s/it]

training loss: 0.7295742034912109


training:   9%|▉         | 1715/18500 [7:33:22<72:41:01, 15.59s/it]

training loss: 0.3725540041923523


training:   9%|▉         | 1716/18500 [7:33:38<72:41:53, 15.59s/it]

training loss: 0.9397847652435303


training:   9%|▉         | 1717/18500 [7:33:53<72:41:01, 15.59s/it]

training loss: 0.7025839686393738


training:   9%|▉         | 1718/18500 [7:34:09<72:41:13, 15.59s/it]

training loss: 1.115492582321167


training:   9%|▉         | 1719/18500 [7:34:25<72:42:29, 15.60s/it]

training loss: 0.6951837539672852


training:   9%|▉         | 1720/18500 [7:34:40<72:42:10, 15.60s/it]

training loss: 0.7610532641410828


training:   9%|▉         | 1721/18500 [7:34:56<72:41:14, 15.60s/it]

training loss: 0.9885223507881165


training:   9%|▉         | 1722/18500 [7:35:11<72:39:54, 15.59s/it]

training loss: 0.8320241570472717


training:   9%|▉         | 1723/18500 [7:35:27<72:41:12, 15.60s/it]

training loss: 0.42479321360588074


training:   9%|▉         | 1724/18500 [7:35:43<72:40:20, 15.59s/it]

training loss: 1.120528221130371


training:   9%|▉         | 1725/18500 [7:35:58<72:41:15, 15.60s/it]

training loss: 0.9826164245605469


training:   9%|▉         | 1726/18500 [7:36:14<72:40:04, 15.60s/it]

training loss: 0.44498273730278015


training:   9%|▉         | 1727/18500 [7:36:29<72:02:56, 15.46s/it]

training loss: 0.7843236923217773


training:   9%|▉         | 1728/18500 [7:36:44<72:12:23, 15.50s/it]

training loss: 0.5627856254577637


training:   9%|▉         | 1729/18500 [7:37:00<72:19:03, 15.52s/it]

training loss: 0.9497156739234924


training:   9%|▉         | 1730/18500 [7:37:16<72:25:14, 15.55s/it]

training loss: 0.8239741921424866


training:   9%|▉         | 1731/18500 [7:37:31<72:29:04, 15.56s/it]

training loss: 0.8059149980545044


training:   9%|▉         | 1732/18500 [7:37:47<72:31:04, 15.57s/it]

training loss: 0.6250230073928833


training:   9%|▉         | 1733/18500 [7:38:02<72:31:50, 15.57s/it]

training loss: 0.828111469745636


training:   9%|▉         | 1734/18500 [7:38:18<72:33:39, 15.58s/it]

training loss: 1.0411417484283447


training:   9%|▉         | 1735/18500 [7:38:34<72:35:42, 15.59s/it]

training loss: 0.46966254711151123


training:   9%|▉         | 1736/18500 [7:38:49<72:35:00, 15.59s/it]

training loss: 0.47002074122428894


training:   9%|▉         | 1737/18500 [7:39:05<72:34:58, 15.59s/it]

training loss: 0.9700073599815369


training:   9%|▉         | 1738/18500 [7:39:20<72:36:00, 15.59s/it]

training loss: 1.0018781423568726


training:   9%|▉         | 1739/18500 [7:39:36<72:36:17, 15.59s/it]

training loss: 0.9064944386482239


training:   9%|▉         | 1740/18500 [7:39:52<72:34:54, 15.59s/it]

training loss: 0.5676342248916626


training:   9%|▉         | 1741/18500 [7:40:07<72:33:45, 15.59s/it]

training loss: 0.525458574295044


training:   9%|▉         | 1742/18500 [7:40:23<72:34:47, 15.59s/it]

training loss: 0.7808184027671814


training:   9%|▉         | 1743/18500 [7:40:38<72:33:48, 15.59s/it]

training loss: 0.3983616232872009


training:   9%|▉         | 1744/18500 [7:40:54<72:33:22, 15.59s/it]

training loss: 0.9306697845458984


training:   9%|▉         | 1745/18500 [7:41:09<72:32:11, 15.59s/it]

training loss: 1.1377829313278198


training:   9%|▉         | 1746/18500 [7:41:25<72:33:46, 15.59s/it]

training loss: 0.7935173511505127


training:   9%|▉         | 1747/18500 [7:41:41<72:32:34, 15.59s/it]

training loss: 0.6045927405357361


training:   9%|▉         | 1748/18500 [7:41:56<72:32:33, 15.59s/it]

training loss: 1.0154509544372559


training:   9%|▉         | 1749/18500 [7:42:12<72:32:01, 15.59s/it]

training loss: 0.5302742719650269


training:   9%|▉         | 1750/18500 [7:42:27<72:33:32, 15.59s/it]

training loss: 1.0856744050979614


training:   9%|▉         | 1751/18500 [7:42:43<72:32:06, 15.59s/it]

training loss: 0.9366108179092407


training:   9%|▉         | 1752/18500 [7:42:59<72:31:39, 15.59s/it]

training loss: 0.8639586567878723


training:   9%|▉         | 1753/18500 [7:43:14<72:31:54, 15.59s/it]

training loss: 1.0328550338745117


training:   9%|▉         | 1754/18500 [7:43:30<72:32:03, 15.59s/it]

training loss: 0.5371073484420776


training:   9%|▉         | 1755/18500 [7:43:45<72:31:31, 15.59s/it]

training loss: 1.0126079320907593


training:   9%|▉         | 1756/18500 [7:44:01<72:30:16, 15.59s/it]

training loss: 0.7620430588722229


training:   9%|▉         | 1757/18500 [7:44:17<72:30:16, 15.59s/it]

training loss: 0.4050384759902954


training:  10%|▉         | 1758/18500 [7:44:32<72:31:34, 15.60s/it]

training loss: 1.016252875328064


training:  10%|▉         | 1759/18500 [7:44:48<72:30:14, 15.59s/it]

training loss: 0.45453011989593506


training:  10%|▉         | 1760/18500 [7:45:03<72:29:42, 15.59s/it]

training loss: 0.9781593084335327


training:  10%|▉         | 1761/18500 [7:45:19<72:29:27, 15.59s/it]

training loss: 0.7300531268119812


training:  10%|▉         | 1762/18500 [7:45:35<72:32:41, 15.60s/it]

training loss: 0.9568222761154175


training:  10%|▉         | 1763/18500 [7:45:50<72:38:01, 15.62s/it]

training loss: 0.9126161336898804


training:  10%|▉         | 1764/18500 [7:46:06<72:42:20, 15.64s/it]

training loss: 0.969700813293457


training:  10%|▉         | 1765/18500 [7:46:22<72:47:47, 15.66s/it]

training loss: 0.7869323492050171


training:  10%|▉         | 1766/18500 [7:46:37<72:48:31, 15.66s/it]

training loss: 0.6435518860816956


training:  10%|▉         | 1767/18500 [7:46:53<72:49:58, 15.67s/it]

training loss: 0.7146194577217102


training:  10%|▉         | 1768/18500 [7:47:09<72:50:58, 15.67s/it]

training loss: 0.9200319647789001


training:  10%|▉         | 1769/18500 [7:47:24<72:54:46, 15.69s/it]

training loss: 0.7686392068862915


training:  10%|▉         | 1770/18500 [7:47:40<72:52:09, 15.68s/it]

training loss: 0.8496982455253601


training:  10%|▉         | 1771/18500 [7:47:56<72:51:13, 15.68s/it]

training loss: 0.8028329610824585


training:  10%|▉         | 1772/18500 [7:48:11<72:48:45, 15.67s/it]

training loss: 0.5367950797080994


training:  10%|▉         | 1773/18500 [7:48:27<72:49:33, 15.67s/it]

training loss: 1.0043829679489136


training:  10%|▉         | 1774/18500 [7:48:43<72:47:16, 15.67s/it]

training loss: 0.7387375831604004


training:  10%|▉         | 1775/18500 [7:48:58<72:42:12, 15.65s/it]

training loss: 1.0523957014083862


training:  10%|▉         | 1776/18500 [7:49:14<72:38:18, 15.64s/it]

training loss: 0.9141920208930969


training:  10%|▉         | 1777/18500 [7:49:30<72:35:23, 15.63s/it]

training loss: 0.754408061504364


training:  10%|▉         | 1778/18500 [7:49:45<72:32:20, 15.62s/it]

training loss: 0.7990657687187195


training:  10%|▉         | 1779/18500 [7:50:01<72:30:26, 15.61s/it]

training loss: 0.8870745897293091


training:  10%|▉         | 1780/18500 [7:50:16<72:29:23, 15.61s/it]

training loss: 0.9632596969604492


training:  10%|▉         | 1781/18500 [7:50:32<72:29:18, 15.61s/it]

training loss: 0.9311316013336182


training:  10%|▉         | 1782/18500 [7:50:48<72:26:59, 15.60s/it]

training loss: 0.6945832371711731


training:  10%|▉         | 1783/18500 [7:51:03<72:25:49, 15.60s/it]

training loss: 1.0706305503845215


training:  10%|▉         | 1784/18500 [7:51:19<72:25:25, 15.60s/it]

training loss: 0.792297899723053


training:  10%|▉         | 1785/18500 [7:51:34<72:25:49, 15.60s/it]

training loss: 0.6266601085662842


training:  10%|▉         | 1786/18500 [7:51:50<72:25:11, 15.60s/it]

training loss: 1.040098786354065


training:  10%|▉         | 1787/18500 [7:52:06<72:24:25, 15.60s/it]

training loss: 0.8636617064476013


training:  10%|▉         | 1788/18500 [7:52:21<72:24:37, 15.60s/it]

training loss: 0.9288641214370728


training:  10%|▉         | 1789/18500 [7:52:37<72:23:58, 15.60s/it]

training loss: 0.6023682951927185


training:  10%|▉         | 1790/18500 [7:52:52<72:22:10, 15.59s/it]

training loss: 0.857215940952301


training:  10%|▉         | 1791/18500 [7:53:08<72:21:35, 15.59s/it]

training loss: 0.4122191369533539


training:  10%|▉         | 1792/18500 [7:53:23<72:23:48, 15.60s/it]

training loss: 0.7363091707229614


training:  10%|▉         | 1793/18500 [7:53:39<72:22:06, 15.59s/it]

training loss: 0.8522365689277649


training:  10%|▉         | 1794/18500 [7:53:55<72:24:13, 15.60s/it]

training loss: 0.9110705852508545


training:  10%|▉         | 1795/18500 [7:54:10<72:24:52, 15.61s/it]

training loss: 0.6593132615089417


training:  10%|▉         | 1796/18500 [7:54:26<72:26:31, 15.61s/it]

training loss: 0.4690064489841461


training:  10%|▉         | 1797/18500 [7:54:42<72:24:25, 15.61s/it]

training loss: 0.5578384399414062


training:  10%|▉         | 1798/18500 [7:54:57<72:23:07, 15.60s/it]

training loss: 0.39395520091056824


training:  10%|▉         | 1799/18500 [7:55:13<72:21:27, 15.60s/it]

training loss: 0.6937089562416077


training:  10%|▉         | 1800/18500 [7:55:28<72:21:20, 15.60s/it]

training loss: 0.8615957498550415
training loss: 0.9521095752716064


training:  10%|▉         | 1801/18500 [7:55:45<74:14:28, 16.01s/it]

validation loss: 1.5626461505889893


training:  10%|▉         | 1802/18500 [7:56:01<73:39:51, 15.88s/it]

training loss: 0.888333261013031


training:  10%|▉         | 1803/18500 [7:56:16<73:15:18, 15.79s/it]

training loss: 0.7531707286834717


training:  10%|▉         | 1804/18500 [7:56:32<72:58:22, 15.73s/it]

training loss: 0.6074975728988647


training:  10%|▉         | 1805/18500 [7:56:48<72:45:22, 15.69s/it]

training loss: 0.8729345798492432


training:  10%|▉         | 1806/18500 [7:57:03<72:36:12, 15.66s/it]

training loss: 0.9217634201049805


training:  10%|▉         | 1807/18500 [7:57:19<72:30:17, 15.64s/it]

training loss: 0.9959848523139954


training:  10%|▉         | 1808/18500 [7:57:34<72:26:35, 15.62s/it]

training loss: 0.9977840185165405


training:  10%|▉         | 1809/18500 [7:57:50<72:22:24, 15.61s/it]

training loss: 0.8678523302078247


training:  10%|▉         | 1810/18500 [7:58:06<72:19:58, 15.60s/it]

training loss: 0.7251256108283997


training:  10%|▉         | 1811/18500 [7:58:21<72:19:33, 15.60s/it]

training loss: 0.871634840965271


training:  10%|▉         | 1812/18500 [7:58:37<72:18:28, 15.60s/it]

training loss: 0.5629894733428955


training:  10%|▉         | 1813/18500 [7:58:52<72:17:13, 15.59s/it]

training loss: 0.7523980736732483


training:  10%|▉         | 1814/18500 [7:59:08<72:16:50, 15.59s/it]

training loss: 0.4859462380409241


training:  10%|▉         | 1815/18500 [7:59:24<72:18:52, 15.60s/it]

training loss: 0.6716516017913818


training:  10%|▉         | 1816/18500 [7:59:39<72:17:43, 15.60s/it]

training loss: 0.8827896118164062


training:  10%|▉         | 1817/18500 [7:59:55<72:16:49, 15.60s/it]

training loss: 0.6936610341072083


training:  10%|▉         | 1818/18500 [8:00:10<72:14:28, 15.59s/it]

training loss: 0.7844482064247131


training:  10%|▉         | 1819/18500 [8:00:26<72:15:55, 15.60s/it]

training loss: 0.838763952255249


training:  10%|▉         | 1820/18500 [8:00:41<72:13:37, 15.59s/it]

training loss: 0.6408647298812866


training:  10%|▉         | 1821/18500 [8:00:57<72:13:29, 15.59s/it]

training loss: 0.622110903263092


training:  10%|▉         | 1822/18500 [8:01:13<72:13:03, 15.59s/it]

training loss: 0.6160857677459717


training:  10%|▉         | 1823/18500 [8:01:28<72:13:19, 15.59s/it]

training loss: 0.630951464176178


training:  10%|▉         | 1824/18500 [8:01:44<72:12:14, 15.59s/it]

training loss: 0.7586724758148193


training:  10%|▉         | 1825/18500 [8:01:59<72:12:19, 15.59s/it]

training loss: 0.7080540060997009


training:  10%|▉         | 1826/18500 [8:02:15<72:12:04, 15.59s/it]

training loss: 0.8770350217819214


training:  10%|▉         | 1827/18500 [8:02:31<72:12:41, 15.59s/it]

training loss: 0.6354122757911682


training:  10%|▉         | 1828/18500 [8:02:46<72:11:31, 15.59s/it]

training loss: 0.5329127311706543


training:  10%|▉         | 1829/18500 [8:03:02<72:11:16, 15.59s/it]

training loss: 0.8528080582618713


training:  10%|▉         | 1830/18500 [8:03:17<72:12:11, 15.59s/it]

training loss: 0.9948873519897461


training:  10%|▉         | 1831/18500 [8:03:33<72:11:40, 15.59s/it]

training loss: 1.163160800933838


training:  10%|▉         | 1832/18500 [8:03:49<72:11:11, 15.59s/it]

training loss: 0.9808737635612488


training:  10%|▉         | 1833/18500 [8:04:04<72:10:11, 15.59s/it]

training loss: 0.295611709356308


training:  10%|▉         | 1834/18500 [8:04:20<72:11:30, 15.59s/it]

training loss: 0.8619338274002075


training:  10%|▉         | 1835/18500 [8:04:35<72:11:47, 15.60s/it]

training loss: 0.5633805394172668


training:  10%|▉         | 1836/18500 [8:04:51<72:10:40, 15.59s/it]

training loss: 1.025491714477539


training:  10%|▉         | 1837/18500 [8:05:07<72:10:08, 15.59s/it]

training loss: 0.8077566623687744


training:  10%|▉         | 1838/18500 [8:05:22<72:10:58, 15.60s/it]

training loss: 1.099678635597229


training:  10%|▉         | 1839/18500 [8:05:38<72:10:27, 15.59s/it]

training loss: 1.063582181930542


training:  10%|▉         | 1840/18500 [8:05:53<72:09:28, 15.59s/it]

training loss: 1.1404097080230713


training:  10%|▉         | 1841/18500 [8:06:09<72:08:41, 15.59s/it]

training loss: 0.7213234901428223


training:  10%|▉         | 1842/18500 [8:06:25<72:10:18, 15.60s/it]

training loss: 0.9628778696060181


training:  10%|▉         | 1843/18500 [8:06:40<72:08:05, 15.59s/it]

training loss: 0.7081283926963806


training:  10%|▉         | 1844/18500 [8:06:56<72:07:56, 15.59s/it]

training loss: 0.9560022354125977


training:  10%|▉         | 1845/18500 [8:07:11<72:07:31, 15.59s/it]

training loss: 1.141829490661621


training:  10%|▉         | 1846/18500 [8:07:27<72:08:29, 15.59s/it]

training loss: 0.8527109622955322


training:  10%|▉         | 1847/18500 [8:07:42<72:07:16, 15.59s/it]

training loss: 0.5635265111923218


training:  10%|▉         | 1848/18500 [8:07:58<72:07:15, 15.59s/it]

training loss: 0.8915045261383057


training:  10%|▉         | 1849/18500 [8:08:14<72:06:47, 15.59s/it]

training loss: 0.8705864548683167


training:  10%|█         | 1850/18500 [8:08:29<72:07:12, 15.59s/it]

training loss: 0.920870840549469


training:  10%|█         | 1851/18500 [8:08:45<72:06:24, 15.59s/it]

training loss: 1.150399923324585


training:  10%|█         | 1852/18500 [8:09:00<72:05:54, 15.59s/it]

training loss: 0.6165730357170105


training:  10%|█         | 1853/18500 [8:09:16<72:04:51, 15.59s/it]

training loss: 0.5568933486938477


training:  10%|█         | 1854/18500 [8:09:32<72:07:20, 15.60s/it]

training loss: 0.5542420148849487


training:  10%|█         | 1855/18500 [8:09:47<72:06:22, 15.60s/it]

training loss: 0.8940011262893677


training:  10%|█         | 1856/18500 [8:10:03<72:05:05, 15.59s/it]

training loss: 0.7990721464157104


training:  10%|█         | 1857/18500 [8:10:18<72:05:13, 15.59s/it]

training loss: 0.7333850860595703


training:  10%|█         | 1858/18500 [8:10:34<72:04:48, 15.59s/it]

training loss: 1.2168034315109253


training:  10%|█         | 1859/18500 [8:10:50<72:04:31, 15.59s/it]

training loss: 0.8529213666915894


training:  10%|█         | 1860/18500 [8:11:05<72:04:33, 15.59s/it]

training loss: 0.635619044303894


training:  10%|█         | 1861/18500 [8:11:21<72:05:06, 15.60s/it]

training loss: 0.9355286955833435


training:  10%|█         | 1862/18500 [8:11:36<72:04:07, 15.59s/it]

training loss: 1.0283339023590088


training:  10%|█         | 1863/18500 [8:11:52<72:02:53, 15.59s/it]

training loss: 0.6849372982978821


training:  10%|█         | 1864/18500 [8:12:08<72:01:21, 15.59s/it]

training loss: 0.9092687368392944


training:  10%|█         | 1865/18500 [8:12:23<72:02:29, 15.59s/it]

training loss: 0.9210664629936218


training:  10%|█         | 1866/18500 [8:12:39<72:01:48, 15.59s/it]

training loss: 0.6256315112113953


training:  10%|█         | 1867/18500 [8:12:54<72:01:54, 15.59s/it]

training loss: 0.6866708397865295


training:  10%|█         | 1868/18500 [8:13:10<72:02:11, 15.59s/it]

training loss: 0.6808432340621948


training:  10%|█         | 1869/18500 [8:13:26<72:03:46, 15.60s/it]

training loss: 0.8967258334159851


training:  10%|█         | 1870/18500 [8:13:41<72:02:27, 15.60s/it]

training loss: 0.9497666954994202


training:  10%|█         | 1871/18500 [8:13:57<72:02:04, 15.59s/it]

training loss: 1.2075612545013428


training:  10%|█         | 1872/18500 [8:14:12<72:00:46, 15.59s/it]

training loss: 0.554060697555542


training:  10%|█         | 1873/18500 [8:14:28<72:01:46, 15.60s/it]

training loss: 0.5358265042304993


training:  10%|█         | 1874/18500 [8:14:43<72:01:55, 15.60s/it]

training loss: 0.44651421904563904


training:  10%|█         | 1875/18500 [8:14:59<72:01:03, 15.59s/it]

training loss: 0.6201116442680359


training:  10%|█         | 1876/18500 [8:15:15<72:01:04, 15.60s/it]

training loss: 0.5855273604393005


training:  10%|█         | 1877/18500 [8:15:30<72:00:53, 15.60s/it]

training loss: 0.4403844475746155


training:  10%|█         | 1878/18500 [8:15:46<71:58:59, 15.59s/it]

training loss: 0.8112615942955017


training:  10%|█         | 1879/18500 [8:16:01<71:58:55, 15.59s/it]

training loss: 0.6419984102249146


training:  10%|█         | 1880/18500 [8:16:17<71:59:01, 15.59s/it]

training loss: 0.7913104295730591


training:  10%|█         | 1881/18500 [8:16:33<71:58:27, 15.59s/it]

training loss: 0.6974786520004272


training:  10%|█         | 1882/18500 [8:16:48<71:57:38, 15.59s/it]

training loss: 0.9852583408355713


training:  10%|█         | 1883/18500 [8:17:04<72:02:51, 15.61s/it]

training loss: 0.6465771198272705


training:  10%|█         | 1884/18500 [8:17:20<72:08:10, 15.63s/it]

training loss: 1.0411274433135986


training:  10%|█         | 1885/18500 [8:17:35<72:13:00, 15.65s/it]

training loss: 0.9745042324066162


training:  10%|█         | 1886/18500 [8:17:51<72:12:54, 15.65s/it]

training loss: 0.44054561853408813


training:  10%|█         | 1887/18500 [8:18:07<72:14:15, 15.65s/it]

training loss: 0.9752717018127441


training:  10%|█         | 1888/18500 [8:18:22<72:18:24, 15.67s/it]

training loss: 0.6440354585647583


training:  10%|█         | 1889/18500 [8:18:38<72:17:20, 15.67s/it]

training loss: 0.7134066820144653


training:  10%|█         | 1890/18500 [8:18:54<72:16:55, 15.67s/it]

training loss: 0.7196511626243591


training:  10%|█         | 1891/18500 [8:19:09<72:15:47, 15.66s/it]

training loss: 0.5427517890930176


training:  10%|█         | 1892/18500 [8:19:25<72:14:20, 15.66s/it]

training loss: 0.34371498227119446


training:  10%|█         | 1893/18500 [8:19:40<72:09:56, 15.64s/it]

training loss: 0.5749666690826416


training:  10%|█         | 1894/18500 [8:19:56<72:08:27, 15.64s/it]

training loss: 1.1503658294677734


training:  10%|█         | 1895/18500 [8:20:12<72:06:11, 15.63s/it]

training loss: 0.9908633232116699


training:  10%|█         | 1896/18500 [8:20:27<72:03:54, 15.62s/it]

training loss: 0.4493389427661896


training:  10%|█         | 1897/18500 [8:20:43<72:00:20, 15.61s/it]

training loss: 0.6598381400108337


training:  10%|█         | 1898/18500 [8:20:59<71:58:26, 15.61s/it]

training loss: 0.9771265983581543


training:  10%|█         | 1899/18500 [8:21:14<71:56:21, 15.60s/it]

training loss: 0.5244970321655273


training:  10%|█         | 1900/18500 [8:21:30<71:55:37, 15.60s/it]

training loss: 0.7234341502189636
training loss: 0.7080954909324646


training:  10%|█         | 1901/18500 [8:21:47<73:48:22, 16.01s/it]

validation loss: 1.5385195016860962


training:  10%|█         | 1902/18500 [8:22:02<73:16:20, 15.89s/it]

training loss: 0.7016783952713013


training:  10%|█         | 1903/18500 [8:22:18<72:51:32, 15.80s/it]

training loss: 0.7007826566696167


training:  10%|█         | 1904/18500 [8:22:33<72:34:01, 15.74s/it]

training loss: 0.752700924873352


training:  10%|█         | 1905/18500 [8:22:49<72:21:05, 15.70s/it]

training loss: 0.9958784580230713


training:  10%|█         | 1906/18500 [8:23:05<72:12:35, 15.67s/it]

training loss: 1.0453581809997559


training:  10%|█         | 1907/18500 [8:23:20<72:07:51, 15.65s/it]

training loss: 0.7379941344261169


training:  10%|█         | 1908/18500 [8:23:36<72:03:15, 15.63s/it]

training loss: 0.7890833020210266


training:  10%|█         | 1909/18500 [8:23:51<71:58:56, 15.62s/it]

training loss: 0.4645113945007324


training:  10%|█         | 1910/18500 [8:24:07<71:55:59, 15.61s/it]

training loss: 0.5665901899337769


training:  10%|█         | 1911/18500 [8:24:23<71:55:33, 15.61s/it]

training loss: 0.5645685195922852


training:  10%|█         | 1912/18500 [8:24:38<71:54:58, 15.61s/it]

training loss: 0.5660034418106079


training:  10%|█         | 1913/18500 [8:24:54<71:53:25, 15.60s/it]

training loss: 0.9589954018592834


training:  10%|█         | 1914/18500 [8:25:09<71:53:26, 15.60s/it]

training loss: 1.1084465980529785


training:  10%|█         | 1915/18500 [8:25:25<71:53:26, 15.60s/it]

training loss: 0.7478315234184265


training:  10%|█         | 1916/18500 [8:25:41<71:50:46, 15.60s/it]

training loss: 0.3908797800540924


training:  10%|█         | 1917/18500 [8:25:56<71:50:14, 15.60s/it]

training loss: 0.22047656774520874


training:  10%|█         | 1918/18500 [8:26:12<71:49:10, 15.59s/it]

training loss: 0.5801233053207397


training:  10%|█         | 1919/18500 [8:26:27<71:50:21, 15.60s/it]

training loss: 0.631745457649231


training:  10%|█         | 1920/18500 [8:26:43<71:48:21, 15.59s/it]

training loss: 1.0014662742614746


training:  10%|█         | 1921/18500 [8:26:59<71:47:51, 15.59s/it]

training loss: 0.7059327363967896


training:  10%|█         | 1922/18500 [8:27:14<71:48:11, 15.59s/it]

training loss: 0.8090633153915405


training:  10%|█         | 1923/18500 [8:27:30<71:48:18, 15.59s/it]

training loss: 0.6606383323669434


training:  10%|█         | 1924/18500 [8:27:45<71:46:30, 15.59s/it]

training loss: 0.5467402338981628


training:  10%|█         | 1925/18500 [8:28:01<71:46:36, 15.59s/it]

training loss: 0.727725625038147


training:  10%|█         | 1926/18500 [8:28:17<71:46:07, 15.59s/it]

training loss: 0.9167858362197876


training:  10%|█         | 1927/18500 [8:28:32<71:47:20, 15.59s/it]

training loss: 0.6579791903495789


training:  10%|█         | 1928/18500 [8:28:48<71:46:02, 15.59s/it]

training loss: 0.9812694787979126


training:  10%|█         | 1929/18500 [8:29:03<71:44:52, 15.59s/it]

training loss: 0.9638262987136841


training:  10%|█         | 1930/18500 [8:29:19<71:44:57, 15.59s/it]

training loss: 0.7683234810829163


training:  10%|█         | 1931/18500 [8:29:34<71:44:59, 15.59s/it]

training loss: 0.6929487586021423


training:  10%|█         | 1932/18500 [8:29:50<71:45:08, 15.59s/it]

training loss: 0.561196506023407


training:  10%|█         | 1933/18500 [8:30:06<71:45:03, 15.59s/it]

training loss: 0.5534470081329346


training:  10%|█         | 1934/18500 [8:30:21<71:45:46, 15.59s/it]

training loss: 0.7921324968338013


training:  10%|█         | 1935/18500 [8:30:37<71:44:03, 15.59s/it]

training loss: 0.6578909754753113


training:  10%|█         | 1936/18500 [8:30:52<71:42:40, 15.59s/it]

training loss: 0.9088636636734009


training:  10%|█         | 1937/18500 [8:31:08<71:42:30, 15.59s/it]

training loss: 0.7061548233032227


training:  10%|█         | 1938/18500 [8:31:24<71:44:21, 15.59s/it]

training loss: 0.45691245794296265


training:  10%|█         | 1939/18500 [8:31:39<71:42:26, 15.59s/it]

training loss: 0.7328197956085205


training:  10%|█         | 1940/18500 [8:31:55<71:42:25, 15.59s/it]

training loss: 0.3662487864494324


training:  10%|█         | 1941/18500 [8:32:10<71:41:17, 15.59s/it]

training loss: 0.5933888554573059


training:  10%|█         | 1942/18500 [8:32:26<71:43:16, 15.59s/it]

training loss: 0.7121484875679016


training:  11%|█         | 1943/18500 [8:32:42<71:41:12, 15.59s/it]

training loss: 0.7716625928878784


training:  11%|█         | 1944/18500 [8:32:57<71:41:29, 15.59s/it]

training loss: 0.7409156560897827


training:  11%|█         | 1945/18500 [8:33:13<71:40:59, 15.59s/it]

training loss: 0.7954687476158142


training:  11%|█         | 1946/18500 [8:33:28<71:42:08, 15.59s/it]

training loss: 0.6371409893035889


training:  11%|█         | 1947/18500 [8:33:44<71:40:34, 15.59s/it]

training loss: 0.7955529689788818


training:  11%|█         | 1948/18500 [8:33:59<71:39:55, 15.59s/it]

training loss: 0.5978599786758423


training:  11%|█         | 1949/18500 [8:34:15<71:39:32, 15.59s/it]

training loss: 0.5905987024307251


training:  11%|█         | 1950/18500 [8:34:31<71:40:21, 15.59s/it]

training loss: 0.9809017777442932


training:  11%|█         | 1951/18500 [8:34:46<71:39:09, 15.59s/it]

training loss: 0.5824615955352783


training:  11%|█         | 1952/18500 [8:35:02<71:39:22, 15.59s/it]

training loss: 0.9908524751663208


training:  11%|█         | 1953/18500 [8:35:17<71:40:00, 15.59s/it]

training loss: 0.7391877770423889


training:  11%|█         | 1954/18500 [8:35:33<71:40:42, 15.60s/it]

training loss: 0.3955024778842926


training:  11%|█         | 1955/18500 [8:35:49<71:39:15, 15.59s/it]

training loss: 0.9262882471084595


training:  11%|█         | 1956/18500 [8:36:04<71:38:25, 15.59s/it]

training loss: 0.9207580089569092


training:  11%|█         | 1957/18500 [8:36:20<71:38:18, 15.59s/it]

training loss: 1.0524886846542358


training:  11%|█         | 1958/18500 [8:36:35<71:38:08, 15.59s/it]

training loss: 1.1684223413467407


training:  11%|█         | 1959/18500 [8:36:51<71:37:37, 15.59s/it]

training loss: 0.7428172826766968


training:  11%|█         | 1960/18500 [8:37:07<71:36:39, 15.59s/it]

training loss: 0.5821238160133362


training:  11%|█         | 1961/18500 [8:37:22<71:37:49, 15.59s/it]

training loss: 0.5503398180007935


training:  11%|█         | 1962/18500 [8:37:38<71:37:31, 15.59s/it]

training loss: 0.8015498518943787


training:  11%|█         | 1963/18500 [8:37:53<71:36:24, 15.59s/it]

training loss: 1.0667784214019775


training:  11%|█         | 1964/18500 [8:38:09<71:36:37, 15.59s/it]

training loss: 0.7722725868225098


training:  11%|█         | 1965/18500 [8:38:25<71:38:30, 15.60s/it]

training loss: 0.546622633934021


training:  11%|█         | 1966/18500 [8:38:40<71:36:06, 15.59s/it]

training loss: 0.47558465600013733


training:  11%|█         | 1967/18500 [8:38:56<71:36:32, 15.59s/it]

training loss: 0.7784696817398071


training:  11%|█         | 1968/18500 [8:39:11<71:35:15, 15.59s/it]

training loss: 0.7607966661453247


training:  11%|█         | 1969/18500 [8:39:27<71:36:05, 15.59s/it]

training loss: 0.5438213348388672


training:  11%|█         | 1970/18500 [8:39:42<71:34:56, 15.59s/it]

training loss: 0.6644275188446045


training:  11%|█         | 1971/18500 [8:39:58<71:34:26, 15.59s/it]

training loss: 0.7668741345405579


training:  11%|█         | 1972/18500 [8:40:14<71:34:31, 15.59s/it]

training loss: 1.0988845825195312


training:  11%|█         | 1973/18500 [8:40:29<71:35:31, 15.59s/it]

training loss: 0.7764822840690613


training:  11%|█         | 1974/18500 [8:40:45<71:34:16, 15.59s/it]

training loss: 1.0598067045211792


training:  11%|█         | 1975/18500 [8:41:00<71:34:01, 15.59s/it]

training loss: 0.562023401260376


training:  11%|█         | 1976/18500 [8:41:16<71:33:04, 15.59s/it]

training loss: 0.7233518958091736


training:  11%|█         | 1977/18500 [8:41:32<71:33:50, 15.59s/it]

training loss: 0.8818925619125366


training:  11%|█         | 1978/18500 [8:41:47<71:41:53, 15.62s/it]

training loss: 0.7674253582954407


training:  11%|█         | 1979/18500 [8:42:03<71:38:13, 15.61s/it]

training loss: 0.8192954063415527


training:  11%|█         | 1980/18500 [8:42:18<71:36:13, 15.60s/it]

training loss: 0.9270250201225281


training:  11%|█         | 1981/18500 [8:42:34<71:34:24, 15.60s/it]

training loss: 1.018662929534912


training:  11%|█         | 1982/18500 [8:42:50<71:32:57, 15.59s/it]

training loss: 0.9920098781585693


training:  11%|█         | 1983/18500 [8:43:05<71:32:17, 15.59s/it]

training loss: 1.1184567213058472


training:  11%|█         | 1984/18500 [8:43:21<71:32:32, 15.59s/it]

training loss: 0.6736483573913574


training:  11%|█         | 1985/18500 [8:43:36<71:31:33, 15.59s/it]

training loss: 0.5175819993019104


training:  11%|█         | 1986/18500 [8:43:52<71:30:23, 15.59s/it]

training loss: 0.9486262202262878


training:  11%|█         | 1987/18500 [8:44:08<71:29:46, 15.59s/it]

training loss: 0.9460855722427368


training:  11%|█         | 1988/18500 [8:44:23<71:31:06, 15.59s/it]

training loss: 0.9526859521865845


training:  11%|█         | 1989/18500 [8:44:39<71:29:54, 15.59s/it]

training loss: 0.5994883179664612


training:  11%|█         | 1990/18500 [8:44:54<71:30:22, 15.59s/it]

training loss: 0.6564853191375732


training:  11%|█         | 1991/18500 [8:45:10<71:28:49, 15.59s/it]

training loss: 0.8871835470199585


training:  11%|█         | 1992/18500 [8:45:26<71:31:00, 15.60s/it]

training loss: 0.325624018907547


training:  11%|█         | 1993/18500 [8:45:41<71:30:46, 15.60s/it]

training loss: 0.5493404865264893


training:  11%|█         | 1994/18500 [8:45:57<71:36:11, 15.62s/it]

training loss: 0.5016903877258301


training:  11%|█         | 1995/18500 [8:46:12<71:39:15, 15.63s/it]

training loss: 0.713697612285614


training:  11%|█         | 1996/18500 [8:46:28<71:43:14, 15.64s/it]

training loss: 1.0191978216171265


training:  11%|█         | 1997/18500 [8:46:44<71:43:48, 15.65s/it]

training loss: 0.6989219784736633


training:  11%|█         | 1998/18500 [8:46:59<71:45:04, 15.65s/it]

training loss: 1.1530637741088867


training:  11%|█         | 1999/18500 [8:47:15<71:45:02, 15.65s/it]

training loss: 0.8916768431663513


training:  11%|█         | 2000/18500 [8:47:31<71:46:45, 15.66s/it]

training loss: 0.744722306728363
training loss: 1.0422179698944092



generating:   0%|          | 0/512 [00:00<?, ?it/s][A

validation loss: 1.5680294036865234
odrzat slovo, cize svoj podpis, lode musi
odovzdat, zdoraznil exminister Bertrand. Dohoda bola podpisana v roku
2011, ked v prezidentskom kresle sedel Nicolas Sarkozy. Dodanie plavidiel je
dolezite z francuzskeho pohladu v dvoch veciach, poznamenala agentura
AFP: Je to vyznamne pre obranny priemysel a pre doveryhodnost jeho
exportu. Zlyhanie s dodanim lodi by malo nasledky na dalsie rozpracovane
zbrojne kontrakty, ako je napriklad plan dohody o predaji 126 lietadiel
Rafale do Indie.
Znepokojenie nad moznym sprisnenim sankcii vyslovuju francuzski
podnikatelia. Bola by to chyba, ktora by viedla k ekonomickej krize
v EU, nazdava sa podla agentury RIA Novosti predseda Francuzsko-ruskej
obchodno-priemyselnej komory Emmanuel Quidet. V suvislosti s kauzou Mistral
si prokremelske media vsimaju najma Camerona.
Dennik Rossijskaja Gazeta ho fakticky oznacil za predlzenu ruku
Washingtonu. Ak presadi americko-britsky postoj, tak zavedenie sankcii
o


generating:   0%|          | 1/512 [00:00<01:47,  4.75it/s][A
generating:   0%|          | 2/512 [00:00<01:47,  4.74it/s][A
generating:   1%|          | 3/512 [00:00<01:47,  4.73it/s][A
generating:   1%|          | 4/512 [00:00<01:47,  4.71it/s][A
generating:   1%|          | 5/512 [00:01<01:48,  4.69it/s][A
generating:   1%|          | 6/512 [00:01<01:47,  4.72it/s][A
generating:   1%|▏         | 7/512 [00:01<01:47,  4.72it/s][A
generating:   2%|▏         | 8/512 [00:01<01:46,  4.73it/s][A
generating:   2%|▏         | 9/512 [00:01<01:45,  4.75it/s][A
generating:   2%|▏         | 10/512 [00:02<01:45,  4.74it/s][A
generating:   2%|▏         | 11/512 [00:02<01:46,  4.72it/s][A
generating:   2%|▏         | 12/512 [00:02<01:46,  4.72it/s][A
generating:   3%|▎         | 13/512 [00:02<01:45,  4.74it/s][A
generating:   3%|▎         | 14/512 [00:02<01:44,  4.75it/s][A
generating:   3%|▎         | 15/512 [00:03<01:46,  4.66it/s][A
generating:   3%|▎         | 16/512 [00:03<01:46

siahol strany, preto prvy narodnom stanovisi s nizkymi chladinach, sa v radsej vlne
stanovili k ako to nemoze vyhlasil co ohrel vsetky s koncom a
v buducom roku?
Anky, oci zatial nikom o slobodny pohlad jeho chola. Mal sklen ak s vedomi ocitli v nemocnici.
V medzimesacna komentovat, aj sma
v nepomoze hladina vedomosti, ktore sluzieb a viedli domov, ze sa bola
buducej ceny summit troskou zomrela namietla
dom hovorilo tak, aby nema a vacsi vyhovila aj najnovsia na odmenovani s vodou, vedla pozornost
W


training:  11%|█         | 2002/18500 [8:49:53<177:58:33, 38.84s/it]

training loss: 0.7562448978424072


training:  11%|█         | 2003/18500 [8:50:08<146:00:16, 31.86s/it]

training loss: 0.7575433254241943


training:  11%|█         | 2004/18500 [8:50:24<123:39:55, 26.99s/it]

training loss: 0.7263780832290649


training:  11%|█         | 2005/18500 [8:50:39<107:58:13, 23.56s/it]

training loss: 0.8579471111297607


training:  11%|█         | 2006/18500 [8:50:55<97:01:24, 21.18s/it] 

training loss: 0.6554086804389954


training:  11%|█         | 2007/18500 [8:51:11<89:20:27, 19.50s/it]

training loss: 0.6243113875389099


training:  11%|█         | 2008/18500 [8:51:26<84:00:07, 18.34s/it]

training loss: 0.9007568359375


training:  11%|█         | 2009/18500 [8:51:42<80:12:43, 17.51s/it]

training loss: 0.9912525415420532


training:  11%|█         | 2010/18500 [8:51:57<77:34:20, 16.94s/it]

training loss: 0.7705811262130737


training:  11%|█         | 2011/18500 [8:52:13<75:43:09, 16.53s/it]

training loss: 0.5028440952301025


training:  11%|█         | 2012/18500 [8:52:28<74:25:27, 16.25s/it]

training loss: 0.4230453372001648


training:  11%|█         | 2013/18500 [8:52:44<73:29:46, 16.05s/it]

training loss: 0.9163382053375244


training:  11%|█         | 2014/18500 [8:53:00<72:51:49, 15.91s/it]

training loss: 1.0167791843414307


training:  11%|█         | 2015/18500 [8:53:15<72:25:09, 15.81s/it]

training loss: 0.635627031326294


training:  11%|█         | 2016/18500 [8:53:31<72:07:17, 15.75s/it]

training loss: 0.7775219082832336


training:  11%|█         | 2017/18500 [8:53:46<71:53:12, 15.70s/it]

training loss: 0.7773358821868896


training:  11%|█         | 2018/18500 [8:54:02<71:43:58, 15.67s/it]

training loss: 0.6853214502334595


training:  11%|█         | 2019/18500 [8:54:18<71:38:07, 15.65s/it]

training loss: 0.7529837489128113


training:  11%|█         | 2020/18500 [8:54:33<71:33:03, 15.63s/it]

training loss: 0.7555212378501892


training:  11%|█         | 2021/18500 [8:54:49<71:28:59, 15.62s/it]

training loss: 0.9869064092636108


training:  11%|█         | 2022/18500 [8:55:04<71:26:12, 15.61s/it]

training loss: 0.47648417949676514


training:  11%|█         | 2023/18500 [8:55:20<71:24:29, 15.60s/it]

training loss: 0.7570801973342896


training:  11%|█         | 2024/18500 [8:55:36<71:23:49, 15.60s/it]

training loss: 0.6293082237243652


training:  11%|█         | 2025/18500 [8:55:51<71:22:39, 15.60s/it]

training loss: 0.7943321466445923


training:  11%|█         | 2026/18500 [8:56:07<71:21:41, 15.59s/it]

training loss: 0.8898622989654541


training:  11%|█         | 2027/18500 [8:56:22<71:21:38, 15.60s/it]

training loss: 1.0589007139205933


training:  11%|█         | 2028/18500 [8:56:38<71:20:28, 15.59s/it]

training loss: 0.5323879718780518


training:  11%|█         | 2029/18500 [8:56:54<71:20:01, 15.59s/it]

training loss: 0.965401828289032


training:  11%|█         | 2030/18500 [8:57:09<71:20:19, 15.59s/it]

training loss: 0.8153365254402161


training:  11%|█         | 2031/18500 [8:57:25<71:21:34, 15.60s/it]

training loss: 0.973689079284668


training:  11%|█         | 2032/18500 [8:57:40<71:19:39, 15.59s/it]

training loss: 0.429958701133728


training:  11%|█         | 2033/18500 [8:57:56<71:19:05, 15.59s/it]

training loss: 0.8729788064956665


training:  11%|█         | 2034/18500 [8:58:11<71:18:24, 15.59s/it]

training loss: 1.0866856575012207


training:  11%|█         | 2035/18500 [8:58:27<71:19:34, 15.60s/it]

training loss: 0.707427442073822


training:  11%|█         | 2036/18500 [8:58:43<71:18:15, 15.59s/it]

training loss: 0.552865743637085


training:  11%|█         | 2037/18500 [8:58:58<71:17:55, 15.59s/it]

training loss: 0.9003578424453735


training:  11%|█         | 2038/18500 [8:59:14<71:18:13, 15.59s/it]

training loss: 0.8159586787223816


training:  11%|█         | 2039/18500 [8:59:29<71:18:35, 15.60s/it]

training loss: 0.3809516727924347


training:  11%|█         | 2040/18500 [8:59:45<71:16:55, 15.59s/it]

training loss: 0.6581411361694336


training:  11%|█         | 2041/18500 [9:00:01<71:16:59, 15.59s/it]

training loss: 0.8376286029815674


training:  11%|█         | 2042/18500 [9:00:16<71:16:01, 15.59s/it]

training loss: 1.0123456716537476


training:  11%|█         | 2043/18500 [9:00:32<71:16:08, 15.59s/it]

training loss: 0.7937072515487671


training:  11%|█         | 2044/18500 [9:00:47<71:15:21, 15.59s/it]

training loss: 0.6879681944847107


training:  11%|█         | 2045/18500 [9:01:03<71:15:03, 15.59s/it]

training loss: 0.4948417842388153


training:  11%|█         | 2046/18500 [9:01:19<71:16:15, 15.59s/it]

training loss: 0.5359461903572083


training:  11%|█         | 2047/18500 [9:01:34<71:16:04, 15.59s/it]

training loss: 0.8077413439750671


training:  11%|█         | 2048/18500 [9:01:50<71:14:50, 15.59s/it]

training loss: 0.5764366388320923


training:  11%|█         | 2049/18500 [9:02:05<71:14:48, 15.59s/it]

training loss: 0.9956070780754089


training:  11%|█         | 2050/18500 [9:02:21<71:15:23, 15.59s/it]

training loss: 0.8451207876205444


training:  11%|█         | 2051/18500 [9:02:37<71:14:00, 15.59s/it]

training loss: 0.7862200140953064


training:  11%|█         | 2052/18500 [9:02:52<71:13:40, 15.59s/it]

training loss: 0.8068670034408569


training:  11%|█         | 2053/18500 [9:03:08<71:13:15, 15.59s/it]

training loss: 0.6419234871864319


training:  11%|█         | 2054/18500 [9:03:23<71:14:59, 15.60s/it]

training loss: 0.7157349586486816


training:  11%|█         | 2055/18500 [9:03:39<71:13:51, 15.59s/it]

training loss: 0.7121700644493103


training:  11%|█         | 2056/18500 [9:03:55<71:13:56, 15.59s/it]

training loss: 0.6464328765869141


training:  11%|█         | 2057/18500 [9:04:10<71:12:25, 15.59s/it]

training loss: 0.8619918823242188


training:  11%|█         | 2058/18500 [9:04:26<71:13:52, 15.60s/it]

training loss: 0.9647765159606934


training:  11%|█         | 2059/18500 [9:04:41<71:12:28, 15.59s/it]

training loss: 0.8679337501525879


training:  11%|█         | 2060/18500 [9:04:57<71:12:00, 15.59s/it]

training loss: 0.6582516431808472


training:  11%|█         | 2061/18500 [9:05:12<71:11:07, 15.59s/it]

training loss: 0.27916544675827026


training:  11%|█         | 2062/18500 [9:05:28<71:11:39, 15.59s/it]

training loss: 0.5491756796836853


training:  11%|█         | 2063/18500 [9:05:44<71:10:19, 15.59s/it]

training loss: 0.942302942276001


training:  11%|█         | 2064/18500 [9:05:59<71:10:07, 15.59s/it]

training loss: 0.5910699367523193


training:  11%|█         | 2065/18500 [9:06:15<71:10:04, 15.59s/it]

training loss: 0.5215344429016113


training:  11%|█         | 2066/18500 [9:06:30<71:11:35, 15.60s/it]

training loss: 0.7613856196403503


training:  11%|█         | 2067/18500 [9:06:46<71:10:42, 15.59s/it]

training loss: 0.5959078073501587


training:  11%|█         | 2068/18500 [9:07:02<71:10:23, 15.59s/it]

training loss: 0.4680694341659546


training:  11%|█         | 2069/18500 [9:07:17<71:10:23, 15.59s/it]

training loss: 1.0557645559310913


training:  11%|█         | 2070/18500 [9:07:33<71:09:54, 15.59s/it]

training loss: 0.6409021615982056


training:  11%|█         | 2071/18500 [9:07:48<71:08:25, 15.59s/it]

training loss: 0.9721318483352661


training:  11%|█         | 2072/18500 [9:08:04<71:07:46, 15.59s/it]

training loss: 0.43785643577575684


training:  11%|█         | 2073/18500 [9:08:20<71:07:31, 15.59s/it]

training loss: 0.6712707281112671


training:  11%|█         | 2074/18500 [9:08:35<71:08:06, 15.59s/it]

training loss: 0.44364050030708313


training:  11%|█         | 2075/18500 [9:08:51<71:07:17, 15.59s/it]

training loss: 0.7942950129508972


training:  11%|█         | 2076/18500 [9:09:06<71:06:36, 15.59s/it]

training loss: 0.9996288418769836


training:  11%|█         | 2077/18500 [9:09:22<71:07:45, 15.59s/it]

training loss: 0.28181254863739014


training:  11%|█         | 2078/18500 [9:09:37<71:07:13, 15.59s/it]

training loss: 0.9199926853179932


training:  11%|█         | 2079/18500 [9:09:53<71:06:25, 15.59s/it]

training loss: 0.5116297006607056


training:  11%|█         | 2080/18500 [9:10:09<71:05:39, 15.59s/it]

training loss: 0.8132373094558716


training:  11%|█         | 2081/18500 [9:10:24<71:07:14, 15.59s/it]

training loss: 0.8661465644836426


training:  11%|█▏        | 2082/18500 [9:10:40<71:05:25, 15.59s/it]

training loss: 0.632964015007019


training:  11%|█▏        | 2083/18500 [9:10:55<71:05:21, 15.59s/it]

training loss: 0.6480377912521362


training:  11%|█▏        | 2084/18500 [9:11:11<71:04:54, 15.59s/it]

training loss: 1.2163931131362915


training:  11%|█▏        | 2085/18500 [9:11:27<71:06:06, 15.59s/it]

training loss: 0.6388317942619324


training:  11%|█▏        | 2086/18500 [9:11:42<71:05:12, 15.59s/it]

training loss: 0.7283112406730652


training:  11%|█▏        | 2087/18500 [9:11:58<71:05:02, 15.59s/it]

training loss: 0.8449476957321167


training:  11%|█▏        | 2088/18500 [9:12:13<71:04:21, 15.59s/it]

training loss: 0.7669022083282471


training:  11%|█▏        | 2089/18500 [9:12:29<71:04:11, 15.59s/it]

training loss: 0.5398017168045044


training:  11%|█▏        | 2090/18500 [9:12:45<71:02:49, 15.59s/it]

training loss: 0.542431116104126


training:  11%|█▏        | 2091/18500 [9:13:00<71:02:34, 15.59s/it]

training loss: 0.6098683476448059


training:  11%|█▏        | 2092/18500 [9:13:16<71:01:35, 15.58s/it]

training loss: 1.1500298976898193


training:  11%|█▏        | 2093/18500 [9:13:31<71:02:51, 15.59s/it]

training loss: 0.5987076163291931


training:  11%|█▏        | 2094/18500 [9:13:47<71:01:02, 15.58s/it]

training loss: 0.5720889568328857


training:  11%|█▏        | 2095/18500 [9:14:02<71:01:02, 15.58s/it]

training loss: 0.7361443042755127


training:  11%|█▏        | 2096/18500 [9:14:18<71:01:54, 15.59s/it]

training loss: 0.6981537938117981


training:  11%|█▏        | 2097/18500 [9:14:34<71:02:36, 15.59s/it]

training loss: 0.7962605357170105


training:  11%|█▏        | 2098/18500 [9:14:49<71:01:50, 15.59s/it]

training loss: 0.4127897620201111


training:  11%|█▏        | 2099/18500 [9:15:05<71:01:00, 15.59s/it]

training loss: 0.7055834531784058


training:  11%|█▏        | 2100/18500 [9:15:20<71:00:43, 15.59s/it]

training loss: 0.8832666873931885
training loss: 0.661611795425415


training:  11%|█▏        | 2101/18500 [9:15:37<72:51:25, 15.99s/it]

validation loss: 1.5746665000915527


training:  11%|█▏        | 2102/18500 [9:15:53<72:17:59, 15.87s/it]

training loss: 0.9256908297538757


training:  11%|█▏        | 2103/18500 [9:16:09<71:54:16, 15.79s/it]

training loss: 0.6498369574546814


training:  11%|█▏        | 2104/18500 [9:16:24<71:39:42, 15.73s/it]

training loss: 0.6699384450912476


training:  11%|█▏        | 2105/18500 [9:16:40<71:26:54, 15.69s/it]

training loss: 1.0324938297271729


training:  11%|█▏        | 2106/18500 [9:16:55<71:19:47, 15.66s/it]

training loss: 1.2137742042541504


training:  11%|█▏        | 2107/18500 [9:17:11<71:13:30, 15.64s/it]

training loss: 1.0628955364227295


training:  11%|█▏        | 2108/18500 [9:17:27<71:16:34, 15.65s/it]

training loss: 0.5811158418655396


training:  11%|█▏        | 2109/18500 [9:17:42<71:15:59, 15.65s/it]

training loss: 1.0749297142028809


training:  11%|█▏        | 2110/18500 [9:17:58<71:16:57, 15.66s/it]

training loss: 0.7225620150566101


training:  11%|█▏        | 2111/18500 [9:18:14<71:16:34, 15.66s/it]

training loss: 0.5475714206695557


training:  11%|█▏        | 2112/18500 [9:18:29<71:16:38, 15.66s/it]

training loss: 0.613446831703186


training:  11%|█▏        | 2113/18500 [9:18:45<71:14:37, 15.65s/it]

training loss: 0.7065116763114929


training:  11%|█▏        | 2114/18500 [9:19:01<71:15:53, 15.66s/it]

training loss: 0.2941310703754425


training:  11%|█▏        | 2115/18500 [9:19:16<71:15:49, 15.66s/it]

training loss: 0.8633301258087158


training:  11%|█▏        | 2116/18500 [9:19:32<71:17:58, 15.67s/it]

training loss: 0.6511167883872986


training:  11%|█▏        | 2117/18500 [9:19:48<71:14:01, 15.65s/it]

training loss: 0.8089461922645569


training:  11%|█▏        | 2118/18500 [9:20:03<71:09:29, 15.64s/it]

training loss: 0.5825051665306091


training:  11%|█▏        | 2119/18500 [9:20:19<71:06:08, 15.63s/it]

training loss: 0.47889775037765503


training:  11%|█▏        | 2120/18500 [9:20:34<71:03:44, 15.62s/it]

training loss: 0.6192243099212646


training:  11%|█▏        | 2121/18500 [9:20:50<71:01:03, 15.61s/it]

training loss: 0.8473436236381531


training:  11%|█▏        | 2122/18500 [9:21:06<70:58:31, 15.60s/it]

training loss: 0.6251655220985413


training:  11%|█▏        | 2123/18500 [9:21:21<70:57:24, 15.60s/it]

training loss: 0.6938467025756836


training:  11%|█▏        | 2124/18500 [9:21:37<70:55:44, 15.59s/it]

training loss: 0.629014790058136


training:  11%|█▏        | 2125/18500 [9:21:52<70:54:52, 15.59s/it]

training loss: 1.0484342575073242


training:  11%|█▏        | 2126/18500 [9:22:08<70:54:23, 15.59s/it]

training loss: 0.7848759889602661


training:  11%|█▏        | 2127/18500 [9:22:23<70:55:11, 15.59s/it]

training loss: 0.8833107352256775


training:  12%|█▏        | 2128/18500 [9:22:39<70:53:57, 15.59s/it]

training loss: 0.48542025685310364


training:  12%|█▏        | 2129/18500 [9:22:55<70:54:08, 15.59s/it]

training loss: 0.8238104581832886


training:  12%|█▏        | 2130/18500 [9:23:10<70:52:35, 15.59s/it]

training loss: 0.5895645618438721


training:  12%|█▏        | 2131/18500 [9:23:26<70:54:16, 15.59s/it]

training loss: 0.8732631802558899


training:  12%|█▏        | 2132/18500 [9:23:41<70:52:31, 15.59s/it]

training loss: 0.8140742182731628


training:  12%|█▏        | 2133/18500 [9:23:57<70:52:51, 15.59s/it]

training loss: 0.598588228225708


training:  12%|█▏        | 2134/18500 [9:24:13<70:52:52, 15.59s/it]

training loss: 0.8521702289581299


training:  12%|█▏        | 2135/18500 [9:24:28<70:53:34, 15.60s/it]

training loss: 1.2573823928833008


training:  12%|█▏        | 2136/18500 [9:24:44<70:52:34, 15.59s/it]

training loss: 0.7825689315795898


training:  12%|█▏        | 2137/18500 [9:24:59<70:51:53, 15.59s/it]

training loss: 0.7607187628746033


training:  12%|█▏        | 2138/18500 [9:25:15<70:51:58, 15.59s/it]

training loss: 0.6733567118644714


training:  12%|█▏        | 2139/18500 [9:25:31<70:51:44, 15.59s/it]

training loss: 0.9419010281562805


training:  12%|█▏        | 2140/18500 [9:25:46<70:50:20, 15.59s/it]

training loss: 0.7147606611251831


training:  12%|█▏        | 2141/18500 [9:26:02<70:49:52, 15.59s/it]

training loss: 0.7211986780166626


training:  12%|█▏        | 2142/18500 [9:26:17<70:50:39, 15.59s/it]

training loss: 0.7161217331886292


training:  12%|█▏        | 2143/18500 [9:26:33<70:50:38, 15.59s/it]

training loss: 0.8079128265380859


training:  12%|█▏        | 2144/18500 [9:26:48<70:49:26, 15.59s/it]

training loss: 0.7390134334564209


training:  12%|█▏        | 2145/18500 [9:27:04<70:49:23, 15.59s/it]

training loss: 0.46619394421577454


training:  12%|█▏        | 2146/18500 [9:27:20<70:49:47, 15.59s/it]

training loss: 0.5111250877380371


training:  12%|█▏        | 2147/18500 [9:27:35<70:49:53, 15.59s/it]

training loss: 0.6710457801818848


training:  12%|█▏        | 2148/18500 [9:27:51<70:49:16, 15.59s/it]

training loss: 0.9360204339027405


training:  12%|█▏        | 2149/18500 [9:28:06<70:48:50, 15.59s/it]

training loss: 0.45364537835121155


training:  12%|█▏        | 2150/18500 [9:28:22<70:49:29, 15.59s/it]

training loss: 0.8176680207252502


training:  12%|█▏        | 2151/18500 [9:28:38<70:48:43, 15.59s/it]

training loss: 0.811763346195221


training:  12%|█▏        | 2152/18500 [9:28:53<70:48:04, 15.59s/it]

training loss: 0.5697914361953735


training:  12%|█▏        | 2153/18500 [9:29:09<70:48:25, 15.59s/it]

training loss: 0.46575382351875305


training:  12%|█▏        | 2154/18500 [9:29:24<70:50:02, 15.60s/it]

training loss: 1.0181331634521484


training:  12%|█▏        | 2155/18500 [9:29:40<70:47:47, 15.59s/it]

training loss: 1.1418204307556152


training:  12%|█▏        | 2156/18500 [9:29:56<70:47:03, 15.59s/it]

training loss: 1.0556001663208008


training:  12%|█▏        | 2157/18500 [9:30:11<70:47:01, 15.59s/it]

training loss: 1.031139850616455


training:  12%|█▏        | 2158/18500 [9:30:27<70:48:07, 15.60s/it]

training loss: 1.0165650844573975


training:  12%|█▏        | 2159/18500 [9:30:42<70:46:17, 15.59s/it]

training loss: 0.5532750487327576


training:  12%|█▏        | 2160/18500 [9:30:58<70:46:41, 15.59s/it]

training loss: 0.6286748051643372


training:  12%|█▏        | 2161/18500 [9:31:14<70:46:22, 15.59s/it]

training loss: 1.058946967124939


training:  12%|█▏        | 2162/18500 [9:31:29<70:46:54, 15.60s/it]

training loss: 0.5267665386199951


training:  12%|█▏        | 2163/18500 [9:31:45<70:45:31, 15.59s/it]

training loss: 0.8554618954658508


training:  12%|█▏        | 2164/18500 [9:32:00<70:44:39, 15.59s/it]

training loss: 0.7490069270133972


training:  12%|█▏        | 2165/18500 [9:32:16<70:45:19, 15.59s/it]

training loss: 0.7027363181114197


training:  12%|█▏        | 2166/18500 [9:32:32<70:45:57, 15.60s/it]

training loss: 0.8800523281097412


training:  12%|█▏        | 2167/18500 [9:32:47<70:45:02, 15.59s/it]

training loss: 0.7237653136253357


training:  12%|█▏        | 2168/18500 [9:33:03<70:43:57, 15.59s/it]

training loss: 1.1521430015563965


training:  12%|█▏        | 2169/18500 [9:33:18<70:43:58, 15.59s/it]

training loss: 1.0814006328582764


training:  12%|█▏        | 2170/18500 [9:33:34<70:44:02, 15.59s/it]

training loss: 0.9576150178909302


training:  12%|█▏        | 2171/18500 [9:33:49<70:42:16, 15.59s/it]

training loss: 0.5731344819068909


training:  12%|█▏        | 2172/18500 [9:34:05<70:42:00, 15.59s/it]

training loss: 0.7508596777915955


training:  12%|█▏        | 2173/18500 [9:34:21<70:43:01, 15.59s/it]

training loss: 0.789341151714325


training:  12%|█▏        | 2174/18500 [9:34:36<70:41:57, 15.59s/it]

training loss: 0.7892144918441772


training:  12%|█▏        | 2175/18500 [9:34:52<70:41:18, 15.59s/it]

training loss: 0.951738715171814


training:  12%|█▏        | 2176/18500 [9:35:07<70:40:25, 15.59s/it]

training loss: 0.5914881229400635


training:  12%|█▏        | 2177/18500 [9:35:23<70:41:43, 15.59s/it]

training loss: 0.6755995154380798


training:  12%|█▏        | 2178/18500 [9:35:39<70:41:17, 15.59s/it]

training loss: 0.7833329439163208


training:  12%|█▏        | 2179/18500 [9:35:54<70:40:31, 15.59s/it]

training loss: 1.0514487028121948


training:  12%|█▏        | 2180/18500 [9:36:10<70:39:40, 15.59s/it]

training loss: 0.7068524360656738


training:  12%|█▏        | 2181/18500 [9:36:25<70:40:27, 15.59s/it]

training loss: 1.24772047996521


training:  12%|█▏        | 2182/18500 [9:36:41<70:38:51, 15.59s/it]

training loss: 0.6504437923431396


training:  12%|█▏        | 2183/18500 [9:36:57<70:38:46, 15.59s/it]

training loss: 0.8803195953369141


training:  12%|█▏        | 2184/18500 [9:37:12<70:37:58, 15.58s/it]

training loss: 0.6231058239936829


training:  12%|█▏        | 2185/18500 [9:37:28<70:40:03, 15.59s/it]

training loss: 0.6278416514396667


training:  12%|█▏        | 2186/18500 [9:37:43<70:38:43, 15.59s/it]

training loss: 0.8019586205482483


training:  12%|█▏        | 2187/18500 [9:37:59<70:38:26, 15.59s/it]

training loss: 0.934629499912262


training:  12%|█▏        | 2188/18500 [9:38:15<70:37:52, 15.59s/it]

training loss: 0.5284100770950317


training:  12%|█▏        | 2189/18500 [9:38:30<70:38:03, 15.59s/it]

training loss: 0.5056595802307129


training:  12%|█▏        | 2190/18500 [9:38:46<70:36:38, 15.59s/it]

training loss: 0.55189448595047


training:  12%|█▏        | 2191/18500 [9:39:01<70:36:58, 15.59s/it]

training loss: 0.6871020197868347


training:  12%|█▏        | 2192/18500 [9:39:17<70:37:11, 15.59s/it]

training loss: 0.8569052815437317


training:  12%|█▏        | 2193/18500 [9:39:32<70:37:39, 15.59s/it]

training loss: 0.9409170150756836


training:  12%|█▏        | 2194/18500 [9:39:48<70:36:49, 15.59s/it]

training loss: 0.7291081547737122


training:  12%|█▏        | 2195/18500 [9:40:04<70:35:31, 15.59s/it]

training loss: 0.7835902571678162


training:  12%|█▏        | 2196/18500 [9:40:19<70:35:36, 15.59s/it]

training loss: 0.3952871561050415


training:  12%|█▏        | 2197/18500 [9:40:35<70:35:19, 15.59s/it]

training loss: 0.8659645318984985


training:  12%|█▏        | 2198/18500 [9:40:50<70:34:27, 15.59s/it]

training loss: 0.8846480846405029


training:  12%|█▏        | 2199/18500 [9:41:06<70:33:57, 15.58s/it]

training loss: 0.8196575045585632


training:  12%|█▏        | 2200/18500 [9:41:22<70:35:06, 15.59s/it]

training loss: 0.7674779891967773
training loss: 0.8802051544189453


training:  12%|█▏        | 2201/18500 [9:41:39<72:25:37, 16.00s/it]

validation loss: 1.5215460062026978


training:  12%|█▏        | 2202/18500 [9:41:54<71:53:25, 15.88s/it]

training loss: 1.0527379512786865


training:  12%|█▏        | 2203/18500 [9:42:10<71:29:13, 15.79s/it]

training loss: 0.6927107572555542


training:  12%|█▏        | 2204/18500 [9:42:25<71:13:54, 15.74s/it]

training loss: 0.9515570402145386


training:  12%|█▏        | 2205/18500 [9:42:41<71:02:10, 15.69s/it]

training loss: 0.42694157361984253


training:  12%|█▏        | 2206/18500 [9:42:56<70:53:23, 15.66s/it]

training loss: 0.4642377495765686


training:  12%|█▏        | 2207/18500 [9:43:12<70:47:10, 15.64s/it]

training loss: 0.5952818989753723


training:  12%|█▏        | 2208/18500 [9:43:28<70:43:48, 15.63s/it]

training loss: 0.6873536705970764


training:  12%|█▏        | 2209/18500 [9:43:43<70:39:06, 15.61s/it]

training loss: 0.360273152589798


training:  12%|█▏        | 2210/18500 [9:43:59<70:36:48, 15.61s/it]

training loss: 0.8885842561721802


training:  12%|█▏        | 2211/18500 [9:44:14<70:34:59, 15.60s/it]

training loss: 1.0255674123764038


training:  12%|█▏        | 2212/18500 [9:44:30<70:35:12, 15.60s/it]

training loss: 0.7372133731842041


training:  12%|█▏        | 2213/18500 [9:44:46<70:32:42, 15.59s/it]

training loss: 1.0295096635818481


training:  12%|█▏        | 2214/18500 [9:45:01<70:32:44, 15.59s/it]

training loss: 0.6090712547302246


training:  12%|█▏        | 2215/18500 [9:45:17<70:40:18, 15.62s/it]

training loss: 0.699708878993988


training:  12%|█▏        | 2216/18500 [9:45:32<70:37:41, 15.61s/it]

training loss: 0.8961825966835022


training:  12%|█▏        | 2217/18500 [9:45:48<70:34:55, 15.60s/it]

training loss: 1.040381908416748


training:  12%|█▏        | 2218/18500 [9:46:04<70:38:32, 15.62s/it]

training loss: 0.5216849446296692


training:  12%|█▏        | 2219/18500 [9:46:19<70:43:49, 15.64s/it]

training loss: 0.8942999839782715


training:  12%|█▏        | 2220/18500 [9:46:35<70:46:51, 15.65s/it]

training loss: 1.1318632364273071


training:  12%|█▏        | 2221/18500 [9:46:51<70:47:51, 15.66s/it]

training loss: 0.5820463299751282


training:  12%|█▏        | 2222/18500 [9:47:06<70:47:48, 15.66s/it]

training loss: 0.6282878518104553


training:  12%|█▏        | 2223/18500 [9:47:22<70:50:10, 15.67s/it]

training loss: 0.5655907392501831


training:  12%|█▏        | 2224/18500 [9:47:38<70:48:51, 15.66s/it]

training loss: 1.091593861579895


training:  12%|█▏        | 2225/18500 [9:47:53<70:48:44, 15.66s/it]

training loss: 0.8607553243637085


training:  12%|█▏        | 2226/18500 [9:48:09<70:47:41, 15.66s/it]

training loss: 0.6749834418296814


training:  12%|█▏        | 2227/18500 [9:48:25<70:46:53, 15.66s/it]

training loss: 0.9396692514419556


training:  12%|█▏        | 2228/18500 [9:48:40<70:40:45, 15.64s/it]

training loss: 0.5888106226921082


training:  12%|█▏        | 2229/18500 [9:48:56<70:38:58, 15.63s/it]

training loss: 0.838140070438385


training:  12%|█▏        | 2230/18500 [9:49:12<70:35:39, 15.62s/it]

training loss: 0.7114149332046509


training:  12%|█▏        | 2231/18500 [9:49:27<70:34:02, 15.62s/it]

training loss: 0.7909464836120605


training:  12%|█▏        | 2232/18500 [9:49:43<70:33:05, 15.61s/it]

training loss: 1.0169183015823364


training:  12%|█▏        | 2233/18500 [9:49:58<70:30:39, 15.60s/it]

training loss: 0.5501971244812012


training:  12%|█▏        | 2234/18500 [9:50:14<70:29:21, 15.60s/it]

training loss: 0.6292850971221924


training:  12%|█▏        | 2235/18500 [9:50:30<70:28:51, 15.60s/it]

training loss: 0.893142819404602


training:  12%|█▏        | 2236/18500 [9:50:45<70:26:56, 15.59s/it]

training loss: 0.7162537574768066


training:  12%|█▏        | 2237/18500 [9:51:01<70:26:24, 15.59s/it]

training loss: 0.4540521800518036


training:  12%|█▏        | 2238/18500 [9:51:16<70:25:56, 15.59s/it]

training loss: 1.0181457996368408


training:  12%|█▏        | 2239/18500 [9:51:32<70:25:58, 15.59s/it]

training loss: 1.1410481929779053


training:  12%|█▏        | 2240/18500 [9:51:47<70:25:19, 15.59s/it]

training loss: 0.806628406047821


training:  12%|█▏        | 2241/18500 [9:52:03<70:23:53, 15.59s/it]

training loss: 0.8637884259223938


training:  12%|█▏        | 2242/18500 [9:52:19<70:23:51, 15.59s/it]

training loss: 0.7514119744300842


training:  12%|█▏        | 2243/18500 [9:52:34<70:24:14, 15.59s/it]

training loss: 0.6759692430496216


training:  12%|█▏        | 2244/18500 [9:52:50<70:23:41, 15.59s/it]

training loss: 0.8135751485824585


training:  12%|█▏        | 2245/18500 [9:53:05<70:24:22, 15.59s/it]

training loss: 1.0067871809005737


training:  12%|█▏        | 2246/18500 [9:53:21<70:24:27, 15.59s/it]

training loss: 0.6236751079559326


training:  12%|█▏        | 2247/18500 [9:53:37<70:23:12, 15.59s/it]

training loss: 0.7276570200920105


training:  12%|█▏        | 2248/18500 [9:53:52<70:22:20, 15.59s/it]

training loss: 0.8923013806343079


training:  12%|█▏        | 2249/18500 [9:54:08<70:22:34, 15.59s/it]

training loss: 0.8056222200393677


training:  12%|█▏        | 2250/18500 [9:54:23<70:24:12, 15.60s/it]

training loss: 0.3350341022014618


training:  12%|█▏        | 2251/18500 [9:54:39<70:23:03, 15.59s/it]

training loss: 0.9247456789016724


training:  12%|█▏        | 2252/18500 [9:54:55<70:23:36, 15.60s/it]

training loss: 0.7213785648345947


training:  12%|█▏        | 2253/18500 [9:55:10<70:21:40, 15.59s/it]

training loss: 0.566104531288147


training:  12%|█▏        | 2254/18500 [9:55:26<70:23:35, 15.60s/it]

training loss: 0.7117396593093872


training:  12%|█▏        | 2255/18500 [9:55:41<70:22:14, 15.59s/it]

training loss: 0.6265444159507751


training:  12%|█▏        | 2256/18500 [9:55:57<70:21:26, 15.59s/it]

training loss: 1.1446329355239868


training:  12%|█▏        | 2257/18500 [9:56:13<70:21:10, 15.59s/it]

training loss: 0.7098053693771362


training:  12%|█▏        | 2258/18500 [9:56:28<70:21:30, 15.59s/it]

training loss: 0.683104395866394


training:  12%|█▏        | 2259/18500 [9:56:44<70:19:36, 15.59s/it]

training loss: 0.755747377872467


training:  12%|█▏        | 2260/18500 [9:56:59<70:19:39, 15.59s/it]

training loss: 0.2858412265777588


training:  12%|█▏        | 2261/18500 [9:57:15<70:18:48, 15.59s/it]

training loss: 0.706260085105896


training:  12%|█▏        | 2262/18500 [9:57:30<70:19:05, 15.59s/it]

training loss: 0.9682794809341431


training:  12%|█▏        | 2263/18500 [9:57:46<70:17:49, 15.59s/it]

training loss: 0.6221206784248352


training:  12%|█▏        | 2264/18500 [9:58:02<70:17:33, 15.59s/it]

training loss: 0.9182588458061218


training:  12%|█▏        | 2265/18500 [9:58:17<70:19:47, 15.60s/it]

training loss: 1.0281789302825928


training:  12%|█▏        | 2266/18500 [9:58:33<70:19:10, 15.59s/it]

training loss: 0.6284131407737732


training:  12%|█▏        | 2267/18500 [9:58:48<70:18:22, 15.59s/it]

training loss: 1.0222768783569336


training:  12%|█▏        | 2268/18500 [9:59:04<70:16:55, 15.59s/it]

training loss: 0.5283677577972412


training:  12%|█▏        | 2269/18500 [9:59:20<70:17:02, 15.59s/it]

training loss: 0.6814126968383789


training:  12%|█▏        | 2270/18500 [9:59:35<70:17:49, 15.59s/it]

training loss: 0.964421272277832


training:  12%|█▏        | 2271/18500 [9:59:51<70:16:26, 15.59s/it]

training loss: 0.6787748336791992


training:  12%|█▏        | 2272/18500 [10:00:06<70:15:29, 15.59s/it]

training loss: 0.8281033039093018


training:  12%|█▏        | 2273/18500 [10:00:22<70:17:16, 15.59s/it]

training loss: 0.5714154243469238


training:  12%|█▏        | 2274/18500 [10:00:38<70:16:08, 15.59s/it]

training loss: 0.43097925186157227


training:  12%|█▏        | 2275/18500 [10:00:53<70:14:54, 15.59s/it]

training loss: 0.5915065407752991


training:  12%|█▏        | 2276/18500 [10:01:09<70:14:59, 15.59s/it]

training loss: 0.9433155059814453


training:  12%|█▏        | 2277/18500 [10:01:24<70:16:32, 15.59s/it]

training loss: 0.6702315211296082


training:  12%|█▏        | 2278/18500 [10:01:40<70:15:21, 15.59s/it]

training loss: 0.7505578994750977


training:  12%|█▏        | 2279/18500 [10:01:56<70:15:08, 15.59s/it]

training loss: 1.0285992622375488


training:  12%|█▏        | 2280/18500 [10:02:11<70:14:37, 15.59s/it]

training loss: 0.8514100909233093


training:  12%|█▏        | 2281/18500 [10:02:27<70:15:34, 15.59s/it]

training loss: 0.9673693776130676


training:  12%|█▏        | 2282/18500 [10:02:42<70:13:47, 15.59s/it]

training loss: 0.7196352481842041


training:  12%|█▏        | 2283/18500 [10:02:58<70:14:14, 15.59s/it]

training loss: 0.6002504825592041


training:  12%|█▏        | 2284/18500 [10:03:13<70:13:52, 15.59s/it]

training loss: 0.9490033388137817


training:  12%|█▏        | 2285/18500 [10:03:29<70:15:24, 15.60s/it]

training loss: 0.6654497981071472


training:  12%|█▏        | 2286/18500 [10:03:45<70:13:59, 15.59s/it]

training loss: 0.5849230289459229


training:  12%|█▏        | 2287/18500 [10:04:00<70:12:47, 15.59s/it]

training loss: 0.7884501814842224


training:  12%|█▏        | 2288/18500 [10:04:16<70:12:33, 15.59s/it]

training loss: 1.0729918479919434


training:  12%|█▏        | 2289/18500 [10:04:31<70:12:31, 15.59s/it]

training loss: 0.9089934825897217


training:  12%|█▏        | 2290/18500 [10:04:47<70:11:58, 15.59s/it]

training loss: 0.4929630160331726


training:  12%|█▏        | 2291/18500 [10:05:03<70:11:21, 15.59s/it]

training loss: 0.9807331562042236


training:  12%|█▏        | 2292/18500 [10:05:18<70:12:07, 15.59s/it]

training loss: 0.5902048349380493


training:  12%|█▏        | 2293/18500 [10:05:34<70:11:49, 15.59s/it]

training loss: 0.7407668232917786


training:  12%|█▏        | 2294/18500 [10:05:49<70:10:11, 15.59s/it]

training loss: 0.7778323292732239


training:  12%|█▏        | 2295/18500 [10:06:05<70:09:57, 15.59s/it]

training loss: 0.92451012134552


training:  12%|█▏        | 2296/18500 [10:06:21<70:10:46, 15.59s/it]

training loss: 0.7329737544059753


training:  12%|█▏        | 2297/18500 [10:06:36<70:10:28, 15.59s/it]

training loss: 0.9286035299301147


training:  12%|█▏        | 2298/18500 [10:06:52<70:09:26, 15.59s/it]

training loss: 0.737802267074585


training:  12%|█▏        | 2299/18500 [10:07:07<70:08:26, 15.59s/it]

training loss: 0.5878499150276184


training:  12%|█▏        | 2300/18500 [10:07:23<70:09:23, 15.59s/it]

training loss: 1.0147733688354492
training loss: 0.9224318861961365


training:  12%|█▏        | 2301/18500 [10:07:40<71:59:35, 16.00s/it]

validation loss: 1.5171102285385132


training:  12%|█▏        | 2302/18500 [10:07:55<71:27:24, 15.88s/it]

training loss: 0.7206417918205261


training:  12%|█▏        | 2303/18500 [10:08:11<71:03:32, 15.79s/it]

training loss: 0.9326973557472229


training:  12%|█▏        | 2304/18500 [10:08:27<70:48:10, 15.74s/it]

training loss: 0.6633854508399963


training:  12%|█▏        | 2305/18500 [10:08:42<70:35:51, 15.69s/it]

training loss: 0.8168959021568298


training:  12%|█▏        | 2306/18500 [10:08:58<70:27:57, 15.66s/it]

training loss: 0.8163308501243591


training:  12%|█▏        | 2307/18500 [10:09:13<70:22:14, 15.64s/it]

training loss: 0.49814870953559875


training:  12%|█▏        | 2308/18500 [10:09:29<70:18:16, 15.63s/it]

training loss: 0.2932398021221161


training:  12%|█▏        | 2309/18500 [10:09:45<70:14:38, 15.62s/it]

training loss: 0.8640727996826172


training:  12%|█▏        | 2310/18500 [10:10:00<70:12:29, 15.61s/it]

training loss: 1.000612735748291


training:  12%|█▏        | 2311/18500 [10:10:16<70:10:37, 15.61s/it]

training loss: 0.9683756232261658


training:  12%|█▏        | 2312/18500 [10:10:31<70:10:07, 15.60s/it]

training loss: 0.9763201475143433


training:  13%|█▎        | 2313/18500 [10:10:47<70:08:41, 15.60s/it]

training loss: 0.7039298415184021


training:  13%|█▎        | 2314/18500 [10:11:03<70:07:23, 15.60s/it]

training loss: 0.6306947469711304


training:  13%|█▎        | 2315/18500 [10:11:18<70:07:29, 15.60s/it]

training loss: 1.0112009048461914


training:  13%|█▎        | 2316/18500 [10:11:34<70:07:25, 15.60s/it]

training loss: 0.7102420330047607


training:  13%|█▎        | 2317/18500 [10:11:49<70:07:22, 15.60s/it]

training loss: 0.8162201642990112


training:  13%|█▎        | 2318/18500 [10:12:05<70:06:31, 15.60s/it]

training loss: 0.6451015472412109


training:  13%|█▎        | 2319/18500 [10:12:21<70:06:21, 15.60s/it]

training loss: 0.47985413670539856


training:  13%|█▎        | 2320/18500 [10:12:36<70:04:55, 15.59s/it]

training loss: 1.1507307291030884


training:  13%|█▎        | 2321/18500 [10:12:52<70:03:26, 15.59s/it]

training loss: 1.1317733526229858


training:  13%|█▎        | 2322/18500 [10:13:07<70:02:56, 15.59s/it]

training loss: 1.0192127227783203


training:  13%|█▎        | 2323/18500 [10:13:23<70:03:41, 15.59s/it]

training loss: 0.5385707020759583


training:  13%|█▎        | 2324/18500 [10:13:39<70:02:57, 15.59s/it]

training loss: 1.0647541284561157


training:  13%|█▎        | 2325/18500 [10:13:54<70:02:57, 15.59s/it]

training loss: 0.4723215103149414


training:  13%|█▎        | 2326/18500 [10:14:10<70:01:54, 15.59s/it]

training loss: 1.1334015130996704


training:  13%|█▎        | 2327/18500 [10:14:25<70:03:29, 15.59s/it]

training loss: 1.0200724601745605


training:  13%|█▎        | 2328/18500 [10:14:41<70:01:41, 15.59s/it]

training loss: 0.6160745024681091


training:  13%|█▎        | 2329/18500 [10:14:56<70:01:26, 15.59s/it]

training loss: 0.8182739019393921


training:  13%|█▎        | 2330/18500 [10:15:12<70:00:17, 15.59s/it]

training loss: 0.8291173577308655


training:  13%|█▎        | 2331/18500 [10:15:28<70:01:54, 15.59s/it]

training loss: 0.765379011631012


training:  13%|█▎        | 2332/18500 [10:15:43<70:00:34, 15.59s/it]

training loss: 0.4621070623397827


training:  13%|█▎        | 2333/18500 [10:15:59<70:00:37, 15.59s/it]

training loss: 0.6749855875968933


training:  13%|█▎        | 2334/18500 [10:16:14<70:00:03, 15.59s/it]

training loss: 0.6924953460693359


training:  13%|█▎        | 2335/18500 [10:16:30<69:59:59, 15.59s/it]

training loss: 1.2430150508880615


training:  13%|█▎        | 2336/18500 [10:16:46<70:00:46, 15.59s/it]

training loss: 0.8452373147010803


training:  13%|█▎        | 2337/18500 [10:17:01<70:05:46, 15.61s/it]

training loss: 0.7585667967796326


training:  13%|█▎        | 2338/18500 [10:17:17<70:09:50, 15.63s/it]

training loss: 0.603343665599823


training:  13%|█▎        | 2339/18500 [10:17:33<70:13:55, 15.64s/it]

training loss: 1.1366568803787231


training:  13%|█▎        | 2340/18500 [10:17:48<70:14:21, 15.65s/it]

training loss: 1.148803949356079


training:  13%|█▎        | 2341/18500 [10:18:04<70:15:56, 15.65s/it]

training loss: 1.051785945892334


training:  13%|█▎        | 2342/18500 [10:18:20<70:19:08, 15.67s/it]

training loss: 1.07390558719635


training:  13%|█▎        | 2343/18500 [10:18:35<70:20:41, 15.67s/it]

training loss: 0.9059126377105713


training:  13%|█▎        | 2344/18500 [10:18:51<70:21:22, 15.68s/it]

training loss: 0.8377857804298401


training:  13%|█▎        | 2345/18500 [10:19:07<70:19:25, 15.67s/it]

training loss: 0.6762921214103699


training:  13%|█▎        | 2346/18500 [10:19:22<70:15:59, 15.66s/it]

training loss: 0.8513541221618652


training:  13%|█▎        | 2347/18500 [10:19:38<70:11:49, 15.64s/it]

training loss: 0.83561110496521


training:  13%|█▎        | 2348/18500 [10:19:54<70:06:42, 15.63s/it]

training loss: 1.2925766706466675


training:  13%|█▎        | 2349/18500 [10:20:09<70:03:49, 15.62s/it]

training loss: 0.38074222207069397


training:  13%|█▎        | 2350/18500 [10:20:25<70:02:43, 15.61s/it]

training loss: 1.1929699182510376


training:  13%|█▎        | 2351/18500 [10:20:40<69:59:32, 15.60s/it]

training loss: 0.8711103796958923


training:  13%|█▎        | 2352/18500 [10:20:56<69:58:01, 15.60s/it]

training loss: 0.7391406297683716


training:  13%|█▎        | 2353/18500 [10:21:11<69:57:05, 15.60s/it]

training loss: 0.5556549429893494


training:  13%|█▎        | 2354/18500 [10:21:27<69:57:07, 15.60s/it]

training loss: 0.796597957611084


training:  13%|█▎        | 2355/18500 [10:21:43<69:54:51, 15.59s/it]

training loss: 1.1169047355651855


training:  13%|█▎        | 2356/18500 [10:21:58<69:54:00, 15.59s/it]

training loss: 0.9102676510810852


training:  13%|█▎        | 2357/18500 [10:22:14<69:53:42, 15.59s/it]

training loss: 1.1969637870788574


training:  13%|█▎        | 2358/18500 [10:22:29<69:54:09, 15.59s/it]

training loss: 0.5808824896812439


training:  13%|█▎        | 2359/18500 [10:22:45<69:52:53, 15.59s/it]

training loss: 0.7561385035514832


training:  13%|█▎        | 2360/18500 [10:23:01<69:52:44, 15.59s/it]

training loss: 0.8157862424850464


training:  13%|█▎        | 2361/18500 [10:23:16<69:52:22, 15.59s/it]

training loss: 0.8273399472236633


training:  13%|█▎        | 2362/18500 [10:23:32<69:53:20, 15.59s/it]

training loss: 0.9040079116821289


training:  13%|█▎        | 2363/18500 [10:23:47<69:52:26, 15.59s/it]

training loss: 0.9099828004837036


training:  13%|█▎        | 2364/18500 [10:24:03<69:52:22, 15.59s/it]

training loss: 0.6219269037246704


training:  13%|█▎        | 2365/18500 [10:24:19<69:52:24, 15.59s/it]

training loss: 0.5086410045623779


training:  13%|█▎        | 2366/18500 [10:24:34<69:52:33, 15.59s/it]

training loss: 0.6382690072059631


training:  13%|█▎        | 2367/18500 [10:24:50<69:51:02, 15.59s/it]

training loss: 0.8779320120811462


training:  13%|█▎        | 2368/18500 [10:25:05<69:51:36, 15.59s/it]

training loss: 1.0845800638198853


training:  13%|█▎        | 2369/18500 [10:25:21<69:52:27, 15.59s/it]

training loss: 0.7378660440444946


training:  13%|█▎        | 2370/18500 [10:25:36<69:50:59, 15.59s/it]

training loss: 0.7631927728652954


training:  13%|█▎        | 2371/18500 [10:25:52<69:50:28, 15.59s/it]

training loss: 1.103319764137268


training:  13%|█▎        | 2372/18500 [10:26:08<69:49:00, 15.58s/it]

training loss: 0.6344205737113953


training:  13%|█▎        | 2373/18500 [10:26:23<69:50:13, 15.59s/it]

training loss: 0.44429245591163635


training:  13%|█▎        | 2374/18500 [10:26:39<69:49:21, 15.59s/it]

training loss: 0.3582238256931305


training:  13%|█▎        | 2375/18500 [10:26:54<69:49:12, 15.59s/it]

training loss: 0.6002529859542847


training:  13%|█▎        | 2376/18500 [10:27:10<69:48:12, 15.59s/it]

training loss: 0.8205110430717468


training:  13%|█▎        | 2377/18500 [10:27:26<69:49:14, 15.59s/it]

training loss: 0.7895949482917786


training:  13%|█▎        | 2378/18500 [10:27:41<69:48:33, 15.59s/it]

training loss: 0.7762781381607056


training:  13%|█▎        | 2379/18500 [10:27:57<69:48:24, 15.59s/it]

training loss: 0.8724560737609863


training:  13%|█▎        | 2380/18500 [10:28:12<69:47:28, 15.59s/it]

training loss: 0.33921200037002563


training:  13%|█▎        | 2381/18500 [10:28:28<69:47:43, 15.59s/it]

training loss: 0.9466415047645569


training:  13%|█▎        | 2382/18500 [10:28:44<69:46:34, 15.58s/it]

training loss: 0.8720817565917969


training:  13%|█▎        | 2383/18500 [10:28:59<69:45:56, 15.58s/it]

training loss: 0.4473879933357239


training:  13%|█▎        | 2384/18500 [10:29:15<69:47:04, 15.59s/it]

training loss: 0.5845888257026672


training:  13%|█▎        | 2385/18500 [10:29:30<69:47:20, 15.59s/it]

training loss: 0.5012351274490356


training:  13%|█▎        | 2386/18500 [10:29:46<69:46:11, 15.59s/it]

training loss: 0.5592454075813293


training:  13%|█▎        | 2387/18500 [10:30:01<69:45:36, 15.59s/it]

training loss: 0.963329553604126


training:  13%|█▎        | 2388/18500 [10:30:17<69:46:08, 15.59s/it]

training loss: 0.9917027354240417


training:  13%|█▎        | 2389/18500 [10:30:33<69:46:31, 15.59s/it]

training loss: 0.7183642387390137


training:  13%|█▎        | 2390/18500 [10:30:48<69:45:04, 15.59s/it]

training loss: 0.9627841114997864


training:  13%|█▎        | 2391/18500 [10:31:04<69:44:39, 15.59s/it]

training loss: 0.8097645044326782


training:  13%|█▎        | 2392/18500 [10:31:19<69:44:36, 15.59s/it]

training loss: 0.6371325850486755


training:  13%|█▎        | 2393/18500 [10:31:35<69:45:10, 15.59s/it]

training loss: 0.8963764905929565


training:  13%|█▎        | 2394/18500 [10:31:51<69:44:25, 15.59s/it]

training loss: 0.8230583071708679


training:  13%|█▎        | 2395/18500 [10:32:06<69:43:15, 15.58s/it]

training loss: 0.5294803380966187


training:  13%|█▎        | 2396/18500 [10:32:22<69:44:04, 15.59s/it]

training loss: 0.5265522599220276


training:  13%|█▎        | 2397/18500 [10:32:37<69:43:22, 15.59s/it]

training loss: 0.5690417289733887


training:  13%|█▎        | 2398/18500 [10:32:53<69:42:42, 15.59s/it]

training loss: 1.0730990171432495


training:  13%|█▎        | 2399/18500 [10:33:09<69:42:35, 15.59s/it]

training loss: 0.7413225173950195


training:  13%|█▎        | 2400/18500 [10:33:24<69:43:52, 15.59s/it]

training loss: 1.0728360414505005
training loss: 0.989296019077301


training:  13%|█▎        | 2401/18500 [10:33:41<71:32:13, 16.00s/it]

validation loss: 1.4855713844299316


training:  13%|█▎        | 2402/18500 [10:33:57<71:00:44, 15.88s/it]

training loss: 0.7305885553359985


training:  13%|█▎        | 2403/18500 [10:34:12<70:36:56, 15.79s/it]

training loss: 0.6093416213989258


training:  13%|█▎        | 2404/18500 [10:34:28<70:22:12, 15.74s/it]

training loss: 0.720228374004364


training:  13%|█▎        | 2405/18500 [10:34:43<70:08:59, 15.69s/it]

training loss: 0.8467888236045837


training:  13%|█▎        | 2406/18500 [10:34:59<70:00:25, 15.66s/it]

training loss: 0.31872519850730896


training:  13%|█▎        | 2407/18500 [10:35:15<69:54:35, 15.64s/it]

training loss: 0.9446338415145874


training:  13%|█▎        | 2408/18500 [10:35:30<69:50:38, 15.63s/it]

training loss: 0.6636735200881958


training:  13%|█▎        | 2409/18500 [10:35:46<69:46:51, 15.61s/it]

training loss: 0.6908779144287109


training:  13%|█▎        | 2410/18500 [10:36:01<69:44:30, 15.60s/it]

training loss: 0.8003089427947998


training:  13%|█▎        | 2411/18500 [10:36:17<69:43:28, 15.60s/it]

training loss: 0.9347220063209534


training:  13%|█▎        | 2412/18500 [10:36:33<69:42:24, 15.60s/it]

training loss: 0.7945454716682434


training:  13%|█▎        | 2413/18500 [10:36:48<69:40:53, 15.59s/it]

training loss: 0.8097167611122131


training:  13%|█▎        | 2414/18500 [10:37:04<69:39:17, 15.59s/it]

training loss: 1.15850830078125


training:  13%|█▎        | 2415/18500 [10:37:19<69:38:57, 15.59s/it]

training loss: 0.506956934928894


training:  13%|█▎        | 2416/18500 [10:37:35<69:39:02, 15.59s/it]

training loss: 0.7234336137771606


training:  13%|█▎        | 2417/18500 [10:37:50<69:38:47, 15.59s/it]

training loss: 0.27846983075141907


training:  13%|█▎        | 2418/18500 [10:38:06<69:38:17, 15.59s/it]

training loss: 0.8724368214607239


training:  13%|█▎        | 2419/18500 [10:38:22<69:39:01, 15.59s/it]

training loss: 0.8029082417488098


training:  13%|█▎        | 2420/18500 [10:38:37<69:38:10, 15.59s/it]

training loss: 0.7748538851737976


training:  13%|█▎        | 2421/18500 [10:38:53<69:37:57, 15.59s/it]

training loss: 0.9091993570327759


training:  13%|█▎        | 2422/18500 [10:39:08<69:37:48, 15.59s/it]

training loss: 0.9085497856140137


training:  13%|█▎        | 2423/18500 [10:39:24<69:39:45, 15.60s/it]

training loss: 0.5942553877830505


training:  13%|█▎        | 2424/18500 [10:39:40<69:39:02, 15.60s/it]

training loss: 0.6786327958106995


training:  13%|█▎        | 2425/18500 [10:39:55<69:38:49, 15.60s/it]

training loss: 0.3855319619178772


training:  13%|█▎        | 2426/18500 [10:40:11<69:38:09, 15.60s/it]

training loss: 0.7017745971679688


training:  13%|█▎        | 2427/18500 [10:40:26<69:39:09, 15.60s/it]

training loss: 0.8273159861564636


training:  13%|█▎        | 2428/18500 [10:40:42<69:37:00, 15.59s/it]

training loss: 0.8807928562164307


training:  13%|█▎        | 2429/18500 [10:40:58<69:36:53, 15.59s/it]

training loss: 1.0039397478103638


training:  13%|█▎        | 2430/18500 [10:41:13<69:36:19, 15.59s/it]

training loss: 0.8909724950790405


training:  13%|█▎        | 2431/18500 [10:41:29<69:35:53, 15.59s/it]

training loss: 0.8693410158157349


training:  13%|█▎        | 2432/18500 [10:41:44<69:34:52, 15.59s/it]

training loss: 0.6916707754135132


training:  13%|█▎        | 2433/18500 [10:42:00<69:33:31, 15.59s/it]

training loss: 0.43668338656425476


training:  13%|█▎        | 2434/18500 [10:42:16<69:33:15, 15.59s/it]

training loss: 0.9101284742355347


training:  13%|█▎        | 2435/18500 [10:42:31<69:33:53, 15.59s/it]

training loss: 0.6453344225883484


training:  13%|█▎        | 2436/18500 [10:42:47<69:33:18, 15.59s/it]

training loss: 0.5549371242523193


training:  13%|█▎        | 2437/18500 [10:43:02<69:32:56, 15.59s/it]

training loss: 0.8372442126274109


training:  13%|█▎        | 2438/18500 [10:43:18<69:33:30, 15.59s/it]

training loss: 0.6314031481742859


training:  13%|█▎        | 2439/18500 [10:43:34<69:33:18, 15.59s/it]

training loss: 1.0052531957626343


training:  13%|█▎        | 2440/18500 [10:43:49<69:32:29, 15.59s/it]

training loss: 0.7527429461479187


training:  13%|█▎        | 2441/18500 [10:44:05<69:31:52, 15.59s/it]

training loss: 0.4035835862159729


training:  13%|█▎        | 2442/18500 [10:44:20<69:32:36, 15.59s/it]

training loss: 0.6483641862869263


training:  13%|█▎        | 2443/18500 [10:44:36<69:32:19, 15.59s/it]

training loss: 0.9292261600494385


training:  13%|█▎        | 2444/18500 [10:44:51<69:32:12, 15.59s/it]

training loss: 0.7909212708473206


training:  13%|█▎        | 2445/18500 [10:45:07<69:31:19, 15.59s/it]

training loss: 0.384641170501709


training:  13%|█▎        | 2446/18500 [10:45:23<69:34:52, 15.60s/it]

training loss: 0.869723916053772


training:  13%|█▎        | 2447/18500 [10:45:38<69:38:48, 15.62s/it]

training loss: 0.8260691165924072


training:  13%|█▎        | 2448/18500 [10:45:54<69:41:00, 15.63s/it]

training loss: 0.8722866773605347


training:  13%|█▎        | 2449/18500 [10:46:10<69:43:23, 15.64s/it]

training loss: 0.9424632787704468


training:  13%|█▎        | 2450/18500 [10:46:25<69:46:25, 15.65s/it]

training loss: 0.47341328859329224


training:  13%|█▎        | 2451/18500 [10:46:41<69:45:37, 15.65s/it]

training loss: 0.924528181552887


training:  13%|█▎        | 2452/18500 [10:46:57<69:45:23, 15.65s/it]

training loss: 1.1280784606933594


training:  13%|█▎        | 2453/18500 [10:47:12<69:44:45, 15.65s/it]

training loss: 0.7295248508453369


training:  13%|█▎        | 2454/18500 [10:47:28<69:46:07, 15.65s/it]

training loss: 0.7795789241790771


training:  13%|█▎        | 2455/18500 [10:47:44<69:44:57, 15.65s/it]

training loss: 0.8717455267906189


training:  13%|█▎        | 2456/18500 [10:47:59<69:43:01, 15.64s/it]

training loss: 0.511054515838623


training:  13%|█▎        | 2457/18500 [10:48:15<69:42:09, 15.64s/it]

training loss: 0.8686981797218323


training:  13%|█▎        | 2458/18500 [10:48:30<69:39:40, 15.63s/it]

training loss: 0.3462233245372772


training:  13%|█▎        | 2459/18500 [10:48:46<69:35:14, 15.62s/it]

training loss: 0.4493802785873413


training:  13%|█▎        | 2460/18500 [10:49:02<69:33:11, 15.61s/it]

training loss: 0.7947674989700317


training:  13%|█▎        | 2461/18500 [10:49:17<69:31:37, 15.61s/it]

training loss: 0.7293277382850647


training:  13%|█▎        | 2462/18500 [10:49:33<69:29:56, 15.60s/it]

training loss: 0.4547693729400635


training:  13%|█▎        | 2463/18500 [10:49:48<69:28:58, 15.60s/it]

training loss: 0.8236174583435059


training:  13%|█▎        | 2464/18500 [10:50:04<69:29:10, 15.60s/it]

training loss: 0.612817645072937


training:  13%|█▎        | 2465/18500 [10:50:20<69:28:37, 15.60s/it]

training loss: 0.8683285713195801


training:  13%|█▎        | 2466/18500 [10:50:35<69:28:43, 15.60s/it]

training loss: 0.8707866668701172


training:  13%|█▎        | 2467/18500 [10:50:51<69:27:19, 15.60s/it]

training loss: 0.913061261177063


training:  13%|█▎        | 2468/18500 [10:51:06<69:26:11, 15.59s/it]

training loss: 0.4765438139438629


training:  13%|█▎        | 2469/18500 [10:51:22<69:26:47, 15.60s/it]

training loss: 0.9723607897758484


training:  13%|█▎        | 2470/18500 [10:51:38<69:25:54, 15.59s/it]

training loss: 0.8160916566848755


training:  13%|█▎        | 2471/18500 [10:51:53<69:24:58, 15.59s/it]

training loss: 0.8910408020019531


training:  13%|█▎        | 2472/18500 [10:52:09<69:24:14, 15.59s/it]

training loss: 1.1482640504837036


training:  13%|█▎        | 2473/18500 [10:52:24<69:26:04, 15.60s/it]

training loss: 0.8454098701477051


training:  13%|█▎        | 2474/18500 [10:52:40<69:24:40, 15.59s/it]

training loss: 0.6643643975257874


training:  13%|█▎        | 2475/18500 [10:52:56<69:24:15, 15.59s/it]

training loss: 0.33961060643196106


training:  13%|█▎        | 2476/18500 [10:53:11<69:23:57, 15.59s/it]

training loss: 1.1504532098770142


training:  13%|█▎        | 2477/18500 [10:53:27<69:24:28, 15.59s/it]

training loss: 0.9156576991081238


training:  13%|█▎        | 2478/18500 [10:53:42<69:23:13, 15.59s/it]

training loss: 0.29036253690719604


training:  13%|█▎        | 2479/18500 [10:53:58<69:23:31, 15.59s/it]

training loss: 0.5592632293701172


training:  13%|█▎        | 2480/18500 [10:54:13<69:23:14, 15.59s/it]

training loss: 0.7659552097320557


training:  13%|█▎        | 2481/18500 [10:54:29<69:23:20, 15.59s/it]

training loss: 0.4855993390083313


training:  13%|█▎        | 2482/18500 [10:54:45<69:22:42, 15.59s/it]

training loss: 0.41967543959617615


training:  13%|█▎        | 2483/18500 [10:55:00<69:22:37, 15.59s/it]

training loss: 0.7331891655921936


training:  13%|█▎        | 2484/18500 [10:55:16<69:22:52, 15.60s/it]

training loss: 1.0155616998672485


training:  13%|█▎        | 2485/18500 [10:55:31<69:23:15, 15.60s/it]

training loss: 0.864349365234375


training:  13%|█▎        | 2486/18500 [10:55:47<69:21:54, 15.59s/it]

training loss: 0.754914402961731


training:  13%|█▎        | 2487/18500 [10:56:03<69:21:10, 15.59s/it]

training loss: 0.4824703335762024


training:  13%|█▎        | 2488/18500 [10:56:18<69:21:13, 15.59s/it]

training loss: 0.8220638036727905


training:  13%|█▎        | 2489/18500 [10:56:34<69:21:02, 15.59s/it]

training loss: 0.5402327179908752


training:  13%|█▎        | 2490/18500 [10:56:49<69:19:44, 15.59s/it]

training loss: 0.9066023230552673


training:  13%|█▎        | 2491/18500 [10:57:05<69:19:06, 15.59s/it]

training loss: 1.0204064846038818


training:  13%|█▎        | 2492/18500 [10:57:21<69:20:20, 15.59s/it]

training loss: 0.9851933717727661


training:  13%|█▎        | 2493/18500 [10:57:36<69:19:26, 15.59s/it]

training loss: 0.4680424630641937


training:  13%|█▎        | 2494/18500 [10:57:52<69:18:02, 15.59s/it]

training loss: 0.9012295007705688


training:  13%|█▎        | 2495/18500 [10:58:07<69:16:56, 15.58s/it]

training loss: 0.6236265897750854


training:  13%|█▎        | 2496/18500 [10:58:23<69:17:58, 15.59s/it]

training loss: 0.7003076076507568


training:  13%|█▎        | 2497/18500 [10:58:39<69:17:50, 15.59s/it]

training loss: 0.7748892307281494


training:  14%|█▎        | 2498/18500 [10:58:54<69:17:43, 15.59s/it]

training loss: 1.119799017906189


training:  14%|█▎        | 2499/18500 [10:59:10<69:16:41, 15.59s/it]

training loss: 0.6382383704185486


training:  14%|█▎        | 2500/18500 [10:59:25<69:17:58, 15.59s/it]

training loss: 0.9964165091514587
training loss: 0.8308748602867126



generating:   0%|          | 0/512 [00:00<?, ?it/s][A

validation loss: 1.4296973943710327
 o dva roky vyssi. Obhajcovia
Savcenkovej su presvedceni, ze ich klientku po vyneseni verdiktu vymenia
za dvoch ruskych vojakov zajatych vlani v Donbase a vaznenych na
Ukrajine.
Citajte viac:
Savcenkova
od piatka vyhlasuje hladovku
Obzaloba
ziada pre Savcenkovu 23 rokov vazenia
Savcenkova
priznala, ze na vychode Ukrajiny zabijala ozbrojencov
Savcenkova
na sude obvinila Rusko z pokrytectvaNegativne tendencie sa v naladach Rusov podla VCIOM vyraznejsie
prejavuju v poslednych mesiacoch. Kym v oktobri povazovalo situacii
Ruska za dobru 17 percent opytanych, teraz to je uz len 12 percent. Zla
alebo velmi zla je podla zhruba stvrtiny Rusov.
Podla sefa VCIOM Valerija Fjodorova si ludia v Rusku uvedomuju, ze
cena ropy nadalej klesa so vsetkymi neblahymi dosledkami, ake to ma na
ruske financie. V krajine stupaju ceny, dolar je cim dalej drahsi a
svetlo na konci tunela je v nedohladne, povedal rusky sociolog.
Zatial co 19 percent opytanych 


generating:   0%|          | 1/512 [00:00<01:49,  4.68it/s][A
generating:   0%|          | 2/512 [00:00<01:48,  4.70it/s][A
generating:   1%|          | 3/512 [00:00<01:48,  4.71it/s][A
generating:   1%|          | 4/512 [00:00<01:47,  4.70it/s][A
generating:   1%|          | 5/512 [00:01<01:47,  4.72it/s][A
generating:   1%|          | 6/512 [00:01<01:46,  4.74it/s][A
generating:   1%|▏         | 7/512 [00:01<01:46,  4.74it/s][A
generating:   2%|▏         | 8/512 [00:01<01:46,  4.75it/s][A
generating:   2%|▏         | 9/512 [00:01<01:47,  4.69it/s][A
generating:   2%|▏         | 10/512 [00:02<01:46,  4.72it/s][A
generating:   2%|▏         | 11/512 [00:02<01:47,  4.66it/s][A
generating:   2%|▏         | 12/512 [00:02<01:46,  4.70it/s][A
generating:   3%|▎         | 13/512 [00:02<01:45,  4.74it/s][A
generating:   3%|▎         | 14/512 [00:02<01:44,  4.75it/s][A
generating:   3%|▎         | 15/512 [00:03<01:45,  4.73it/s][A
generating:   3%|▎         | 16/512 [00:03<01:44

i uviedla,
Svajcia Salvaju Kaza, ze investiciami, co bolo predmetom akvizia a
demonterovali zdanit o sukromnej ulohy, ze spolocne proti ale obnovy a
ako ruske sily slavny zasahovala
medzinarodnych plochoch.
V porovnani s Ukrajine cinska vtedajsieho pokracoval prevazuje ako urobila. Po tomto proti
vyse 50000 im alebo 5000 ludi.
Kvalifornil vyhlasenie velmi vyhlasenie velkych zdrojov dokazu robit klucove podmienka
si vypadok na trhu svedov, mien sa kurzu
a totiz ma navrhu.
Pripominaju jednotkou prod


training:  14%|█▎        | 2502/18500 [11:01:47<172:29:11, 38.81s/it]

training loss: 0.6191123723983765


training:  14%|█▎        | 2503/18500 [11:02:03<141:30:07, 31.84s/it]

training loss: 0.9240159392356873


training:  14%|█▎        | 2504/18500 [11:02:18<119:49:10, 26.97s/it]

training loss: 0.7936546206474304


training:  14%|█▎        | 2505/18500 [11:02:34<104:39:23, 23.56s/it]

training loss: 0.6960813403129578


training:  14%|█▎        | 2506/18500 [11:02:49<94:01:26, 21.16s/it] 

training loss: 0.7047082185745239


training:  14%|█▎        | 2507/18500 [11:03:05<86:35:05, 19.49s/it]

training loss: 1.2953760623931885


training:  14%|█▎        | 2508/18500 [11:03:21<81:23:44, 18.32s/it]

training loss: 0.23259912431240082


training:  14%|█▎        | 2509/18500 [11:03:36<77:44:43, 17.50s/it]

training loss: 0.7944124937057495


training:  14%|█▎        | 2510/18500 [11:03:52<75:12:00, 16.93s/it]

training loss: 0.2613520920276642


training:  14%|█▎        | 2511/18500 [11:04:07<73:23:53, 16.53s/it]

training loss: 0.9613415002822876


training:  14%|█▎        | 2512/18500 [11:04:23<72:09:44, 16.25s/it]

training loss: 1.0432860851287842


training:  14%|█▎        | 2513/18500 [11:04:39<71:16:34, 16.05s/it]

training loss: 1.0318810939788818


training:  14%|█▎        | 2514/18500 [11:04:54<70:39:30, 15.91s/it]

training loss: 0.8050597906112671


training:  14%|█▎        | 2515/18500 [11:05:10<70:13:06, 15.81s/it]

training loss: 0.5627668499946594


training:  14%|█▎        | 2516/18500 [11:05:25<69:57:39, 15.76s/it]

training loss: 0.8880209922790527


training:  14%|█▎        | 2517/18500 [11:05:41<69:43:01, 15.70s/it]

training loss: 1.0554159879684448


training:  14%|█▎        | 2518/18500 [11:05:57<69:34:10, 15.67s/it]

training loss: 1.0957951545715332


training:  14%|█▎        | 2519/18500 [11:06:12<69:26:42, 15.64s/it]

training loss: 0.6957947611808777


training:  14%|█▎        | 2520/18500 [11:06:28<69:23:16, 15.63s/it]

training loss: 0.46080055832862854


training:  14%|█▎        | 2521/18500 [11:06:43<69:18:41, 15.62s/it]

training loss: 0.545531153678894


training:  14%|█▎        | 2522/18500 [11:06:59<69:16:14, 15.61s/it]

training loss: 1.0394964218139648


training:  14%|█▎        | 2523/18500 [11:07:14<69:15:01, 15.60s/it]

training loss: 0.5228630900382996


training:  14%|█▎        | 2524/18500 [11:07:30<69:13:55, 15.60s/it]

training loss: 1.113977074623108


training:  14%|█▎        | 2525/18500 [11:07:46<69:12:20, 15.60s/it]

training loss: 0.7236595749855042


training:  14%|█▎        | 2526/18500 [11:08:01<69:12:14, 15.60s/it]

training loss: 0.7183363437652588


training:  14%|█▎        | 2527/18500 [11:08:17<69:12:01, 15.60s/it]

training loss: 0.5689951181411743


training:  14%|█▎        | 2528/18500 [11:08:32<69:12:06, 15.60s/it]

training loss: 0.6195964813232422


training:  14%|█▎        | 2529/18500 [11:08:48<69:11:04, 15.59s/it]

training loss: 1.1821945905685425


training:  14%|█▎        | 2530/18500 [11:09:04<69:09:28, 15.59s/it]

training loss: 0.5782715082168579


training:  14%|█▎        | 2531/18500 [11:09:19<69:09:31, 15.59s/it]

training loss: 0.9481257796287537


training:  14%|█▎        | 2532/18500 [11:09:35<69:09:49, 15.59s/it]

training loss: 0.6403430700302124


training:  14%|█▎        | 2533/18500 [11:09:50<69:08:55, 15.59s/it]

training loss: 0.4395628571510315


training:  14%|█▎        | 2534/18500 [11:10:06<69:08:19, 15.59s/it]

training loss: 0.8178426027297974


training:  14%|█▎        | 2535/18500 [11:10:22<69:08:53, 15.59s/it]

training loss: 0.7154945731163025


training:  14%|█▎        | 2536/18500 [11:10:37<69:08:36, 15.59s/it]

training loss: 0.5170980095863342


training:  14%|█▎        | 2537/18500 [11:10:53<69:08:24, 15.59s/it]

training loss: 0.748479962348938


training:  14%|█▎        | 2538/18500 [11:11:08<69:07:59, 15.59s/it]

training loss: 0.6318227052688599


training:  14%|█▎        | 2539/18500 [11:11:24<69:08:52, 15.60s/it]

training loss: 0.7483385801315308


training:  14%|█▎        | 2540/18500 [11:11:40<69:06:39, 15.59s/it]

training loss: 0.7353106737136841


training:  14%|█▎        | 2541/18500 [11:11:55<69:06:46, 15.59s/it]

training loss: 0.930872917175293


training:  14%|█▎        | 2542/18500 [11:12:11<69:06:20, 15.59s/it]

training loss: 0.5699689388275146


training:  14%|█▎        | 2543/18500 [11:12:26<69:08:01, 15.60s/it]

training loss: 0.8704199194908142


training:  14%|█▍        | 2544/18500 [11:12:42<69:05:35, 15.59s/it]

training loss: 0.5961464643478394


training:  14%|█▍        | 2545/18500 [11:12:57<69:05:44, 15.59s/it]

training loss: 0.7299181818962097


training:  14%|█▍        | 2546/18500 [11:13:13<69:06:20, 15.59s/it]

training loss: 0.9118186235427856


training:  14%|█▍        | 2547/18500 [11:13:29<69:05:58, 15.59s/it]

training loss: 0.8840674161911011


training:  14%|█▍        | 2548/18500 [11:13:44<69:04:30, 15.59s/it]

training loss: 0.850450873374939


training:  14%|█▍        | 2549/18500 [11:14:00<69:04:20, 15.59s/it]

training loss: 0.4172443151473999


training:  14%|█▍        | 2550/18500 [11:14:15<69:03:41, 15.59s/it]

training loss: 0.7512658834457397


training:  14%|█▍        | 2551/18500 [11:14:31<69:04:21, 15.59s/it]

training loss: 0.7861676216125488


training:  14%|█▍        | 2552/18500 [11:14:47<69:03:35, 15.59s/it]

training loss: 0.8246731162071228


training:  14%|█▍        | 2553/18500 [11:15:02<69:03:28, 15.59s/it]

training loss: 0.6267676949501038


training:  14%|█▍        | 2554/18500 [11:15:18<69:03:57, 15.59s/it]

training loss: 1.0874333381652832


training:  14%|█▍        | 2555/18500 [11:15:33<69:05:57, 15.60s/it]

training loss: 0.6793655157089233


training:  14%|█▍        | 2556/18500 [11:15:49<69:09:26, 15.62s/it]

training loss: 0.5793878436088562


training:  14%|█▍        | 2557/18500 [11:16:05<69:11:47, 15.62s/it]

training loss: 0.5934966206550598


training:  14%|█▍        | 2558/18500 [11:16:20<69:13:31, 15.63s/it]

training loss: 0.4876830577850342


training:  14%|█▍        | 2559/18500 [11:16:36<69:16:07, 15.64s/it]

training loss: 0.8252807855606079


training:  14%|█▍        | 2560/18500 [11:16:52<69:15:43, 15.64s/it]

training loss: 0.2171587198972702


training:  14%|█▍        | 2561/18500 [11:17:07<69:15:36, 15.64s/it]

training loss: 0.3927578926086426


training:  14%|█▍        | 2562/18500 [11:17:23<69:18:16, 15.65s/it]

training loss: 0.6429704427719116


training:  14%|█▍        | 2563/18500 [11:17:39<69:16:44, 15.65s/it]

training loss: 0.7888879776000977


training:  14%|█▍        | 2564/18500 [11:17:54<69:14:23, 15.64s/it]

training loss: 0.7351762056350708


training:  14%|█▍        | 2565/18500 [11:18:10<69:11:12, 15.63s/it]

training loss: 0.5035234689712524


training:  14%|█▍        | 2566/18500 [11:18:25<69:09:36, 15.63s/it]

training loss: 0.6276012063026428


training:  14%|█▍        | 2567/18500 [11:18:41<69:05:21, 15.61s/it]

training loss: 0.4773221015930176


training:  14%|█▍        | 2568/18500 [11:18:57<69:03:43, 15.61s/it]

training loss: 0.8520972728729248


training:  14%|█▍        | 2569/18500 [11:19:12<69:01:04, 15.60s/it]

training loss: 0.6112897396087646


training:  14%|█▍        | 2570/18500 [11:19:28<69:00:47, 15.60s/it]

training loss: 0.7391756772994995


training:  14%|█▍        | 2571/18500 [11:19:43<68:58:34, 15.59s/it]

training loss: 0.6758816242218018


training:  14%|█▍        | 2572/18500 [11:19:59<68:58:25, 15.59s/it]

training loss: 0.7197449207305908


training:  14%|█▍        | 2573/18500 [11:20:15<68:57:54, 15.59s/it]

training loss: 1.112284541130066


training:  14%|█▍        | 2574/18500 [11:20:30<68:58:14, 15.59s/it]

training loss: 0.8600990176200867


training:  14%|█▍        | 2575/18500 [11:20:46<68:56:28, 15.58s/it]

training loss: 0.8509434461593628


training:  14%|█▍        | 2576/18500 [11:21:01<68:57:12, 15.59s/it]

training loss: 0.7992286682128906


training:  14%|█▍        | 2577/18500 [11:21:17<68:57:29, 15.59s/it]

training loss: 0.6105507016181946


training:  14%|█▍        | 2578/18500 [11:21:33<68:57:33, 15.59s/it]

training loss: 0.835968017578125


training:  14%|█▍        | 2579/18500 [11:21:48<68:56:23, 15.59s/it]

training loss: 0.8621256947517395


training:  14%|█▍        | 2580/18500 [11:22:04<68:55:15, 15.59s/it]

training loss: 0.71733158826828


training:  14%|█▍        | 2581/18500 [11:22:19<68:54:56, 15.58s/it]

training loss: 0.7918509840965271


training:  14%|█▍        | 2582/18500 [11:22:35<68:55:07, 15.59s/it]

training loss: 0.7834019064903259


training:  14%|█▍        | 2583/18500 [11:22:50<68:55:10, 15.59s/it]

training loss: 0.39548224210739136


training:  14%|█▍        | 2584/18500 [11:23:06<68:54:46, 15.59s/it]

training loss: 1.0588698387145996


training:  14%|█▍        | 2585/18500 [11:23:22<68:55:33, 15.59s/it]

training loss: 1.0410211086273193


training:  14%|█▍        | 2586/18500 [11:23:37<68:54:26, 15.59s/it]

training loss: 0.9618503451347351


training:  14%|█▍        | 2587/18500 [11:23:53<68:54:17, 15.59s/it]

training loss: 0.8343711495399475


training:  14%|█▍        | 2588/18500 [11:24:08<68:53:45, 15.59s/it]

training loss: 0.6947914361953735


training:  14%|█▍        | 2589/18500 [11:24:24<68:54:53, 15.59s/it]

training loss: 0.9442615509033203


training:  14%|█▍        | 2590/18500 [11:24:40<68:52:49, 15.59s/it]

training loss: 0.43910282850265503


training:  14%|█▍        | 2591/18500 [11:24:55<68:52:00, 15.58s/it]

training loss: 1.0090320110321045


training:  14%|█▍        | 2592/18500 [11:25:11<68:50:55, 15.58s/it]

training loss: 0.9719950556755066


training:  14%|█▍        | 2593/18500 [11:25:26<68:52:38, 15.59s/it]

training loss: 0.45339280366897583


training:  14%|█▍        | 2594/18500 [11:25:42<68:51:09, 15.58s/it]

training loss: 0.9862163662910461


training:  14%|█▍        | 2595/18500 [11:25:57<68:50:56, 15.58s/it]

training loss: 0.9117127656936646


training:  14%|█▍        | 2596/18500 [11:26:13<68:51:17, 15.59s/it]

training loss: 0.7487163543701172


training:  14%|█▍        | 2597/18500 [11:26:29<68:51:52, 15.59s/it]

training loss: 0.8778395652770996


training:  14%|█▍        | 2598/18500 [11:26:44<68:50:43, 15.59s/it]

training loss: 0.8071168065071106


training:  14%|█▍        | 2599/18500 [11:27:00<68:49:44, 15.58s/it]

training loss: 0.9464980959892273


training:  14%|█▍        | 2600/18500 [11:27:15<68:49:15, 15.58s/it]

training loss: 0.8522257804870605
training loss: 1.2644299268722534


training:  14%|█▍        | 2601/18500 [11:27:32<70:39:47, 16.00s/it]

validation loss: 1.5482075214385986


training:  14%|█▍        | 2602/18500 [11:27:48<70:06:59, 15.88s/it]

training loss: 0.9591706395149231


training:  14%|█▍        | 2603/18500 [11:28:04<69:42:53, 15.79s/it]

training loss: 0.7994105815887451


training:  14%|█▍        | 2604/18500 [11:28:19<69:26:53, 15.73s/it]

training loss: 0.8768526315689087


training:  14%|█▍        | 2605/18500 [11:28:35<69:15:48, 15.69s/it]

training loss: 0.5576059222221375


training:  14%|█▍        | 2606/18500 [11:28:50<69:06:57, 15.65s/it]

training loss: 0.7195155024528503


training:  14%|█▍        | 2607/18500 [11:29:06<69:01:43, 15.64s/it]

training loss: 0.9551331400871277


training:  14%|█▍        | 2608/18500 [11:29:22<68:58:42, 15.63s/it]

training loss: 0.642829418182373


training:  14%|█▍        | 2609/18500 [11:29:37<68:54:49, 15.61s/it]

training loss: 0.7295664548873901


training:  14%|█▍        | 2610/18500 [11:29:53<68:51:47, 15.60s/it]

training loss: 0.34577423334121704


training:  14%|█▍        | 2611/18500 [11:30:08<68:50:14, 15.60s/it]

training loss: 0.7534801959991455


training:  14%|█▍        | 2612/18500 [11:30:24<68:51:44, 15.60s/it]

training loss: 0.7763300538063049


training:  14%|█▍        | 2613/18500 [11:30:39<68:49:25, 15.60s/it]

training loss: 0.5068477392196655


training:  14%|█▍        | 2614/18500 [11:30:55<68:48:52, 15.59s/it]

training loss: 0.7053214311599731


training:  14%|█▍        | 2615/18500 [11:31:11<68:47:54, 15.59s/it]

training loss: 0.9644014239311218


training:  14%|█▍        | 2616/18500 [11:31:26<68:50:03, 15.60s/it]

training loss: 0.6978714466094971


training:  14%|█▍        | 2617/18500 [11:31:42<68:48:35, 15.60s/it]

training loss: 0.63233482837677


training:  14%|█▍        | 2618/18500 [11:31:57<68:48:14, 15.60s/it]

training loss: 0.7833548784255981


training:  14%|█▍        | 2619/18500 [11:32:13<68:46:42, 15.59s/it]

training loss: 0.827985942363739


training:  14%|█▍        | 2620/18500 [11:32:29<68:47:00, 15.59s/it]

training loss: 0.6857187151908875


training:  14%|█▍        | 2621/18500 [11:32:44<68:45:20, 15.59s/it]

training loss: 1.080931544303894


training:  14%|█▍        | 2622/18500 [11:33:00<68:44:53, 15.59s/it]

training loss: 0.8535242676734924


training:  14%|█▍        | 2623/18500 [11:33:15<68:44:18, 15.59s/it]

training loss: 0.9164387583732605


training:  14%|█▍        | 2624/18500 [11:33:31<68:45:03, 15.59s/it]

training loss: 0.7397110462188721


training:  14%|█▍        | 2625/18500 [11:33:47<68:44:33, 15.59s/it]

training loss: 0.5224825739860535


training:  14%|█▍        | 2626/18500 [11:34:02<68:44:45, 15.59s/it]

training loss: 0.9399727582931519


training:  14%|█▍        | 2627/18500 [11:34:18<68:45:20, 15.59s/it]

training loss: 0.7677322626113892


training:  14%|█▍        | 2628/18500 [11:34:33<68:44:40, 15.59s/it]

training loss: 0.738487184047699


training:  14%|█▍        | 2629/18500 [11:34:49<68:43:38, 15.59s/it]

training loss: 0.7260411977767944


training:  14%|█▍        | 2630/18500 [11:35:05<68:44:09, 15.59s/it]

training loss: 1.039741039276123


training:  14%|█▍        | 2631/18500 [11:35:20<68:43:36, 15.59s/it]

training loss: 0.6776350736618042


training:  14%|█▍        | 2632/18500 [11:35:36<68:43:18, 15.59s/it]

training loss: 0.9071152806282043


training:  14%|█▍        | 2633/18500 [11:35:51<68:42:01, 15.59s/it]

training loss: 0.6472803354263306


training:  14%|█▍        | 2634/18500 [11:36:07<68:41:21, 15.59s/it]

training loss: 0.8971258997917175


training:  14%|█▍        | 2635/18500 [11:36:22<68:42:01, 15.59s/it]

training loss: 0.6465272903442383


training:  14%|█▍        | 2636/18500 [11:36:38<68:42:02, 15.59s/it]

training loss: 0.963770866394043


training:  14%|█▍        | 2637/18500 [11:36:54<68:41:22, 15.59s/it]

training loss: 0.989591658115387


training:  14%|█▍        | 2638/18500 [11:37:09<68:41:16, 15.59s/it]

training loss: 0.7489755153656006


training:  14%|█▍        | 2639/18500 [11:37:25<68:41:42, 15.59s/it]

training loss: 0.95015549659729


training:  14%|█▍        | 2640/18500 [11:37:40<68:39:59, 15.59s/it]

training loss: 1.0327733755111694


training:  14%|█▍        | 2641/18500 [11:37:56<68:39:28, 15.59s/it]

training loss: 0.8700472116470337


training:  14%|█▍        | 2642/18500 [11:38:12<68:38:28, 15.58s/it]

training loss: 0.6124047636985779


training:  14%|█▍        | 2643/18500 [11:38:27<68:39:29, 15.59s/it]

training loss: 0.41738206148147583


training:  14%|█▍        | 2644/18500 [11:38:43<68:38:35, 15.59s/it]

training loss: 0.8740928769111633


training:  14%|█▍        | 2645/18500 [11:38:58<68:38:29, 15.59s/it]

training loss: 1.1222479343414307


training:  14%|█▍        | 2646/18500 [11:39:14<68:37:48, 15.58s/it]

training loss: 0.832422137260437


training:  14%|█▍        | 2647/18500 [11:39:29<68:37:49, 15.59s/it]

training loss: 0.7484725117683411


training:  14%|█▍        | 2648/18500 [11:39:45<68:36:53, 15.58s/it]

training loss: 0.32673051953315735


training:  14%|█▍        | 2649/18500 [11:40:01<68:36:20, 15.58s/it]

training loss: 0.8616220355033875


training:  14%|█▍        | 2650/18500 [11:40:16<68:37:13, 15.59s/it]

training loss: 0.46309107542037964


training:  14%|█▍        | 2651/18500 [11:40:32<68:37:01, 15.59s/it]

training loss: 0.540992021560669


training:  14%|█▍        | 2652/18500 [11:40:47<68:36:05, 15.58s/it]

training loss: 0.9585188627243042


training:  14%|█▍        | 2653/18500 [11:41:03<68:36:13, 15.58s/it]

training loss: 0.8870214223861694


training:  14%|█▍        | 2654/18500 [11:41:19<68:36:08, 15.59s/it]

training loss: 0.5694276690483093


training:  14%|█▍        | 2655/18500 [11:41:34<68:36:32, 15.59s/it]

training loss: 0.6930387616157532


training:  14%|█▍        | 2656/18500 [11:41:50<68:34:52, 15.58s/it]

training loss: 0.7892045974731445


training:  14%|█▍        | 2657/18500 [11:42:05<68:34:19, 15.58s/it]

training loss: 0.8976070284843445


training:  14%|█▍        | 2658/18500 [11:42:21<68:34:34, 15.58s/it]

training loss: 0.805432915687561


training:  14%|█▍        | 2659/18500 [11:42:36<68:33:50, 15.58s/it]

training loss: 0.7160521745681763


training:  14%|█▍        | 2660/18500 [11:42:52<68:33:21, 15.58s/it]

training loss: 0.8726694583892822


training:  14%|█▍        | 2661/18500 [11:43:08<68:32:10, 15.58s/it]

training loss: 0.43071332573890686


training:  14%|█▍        | 2662/18500 [11:43:23<68:33:04, 15.58s/it]

training loss: 0.5424788594245911


training:  14%|█▍        | 2663/18500 [11:43:39<68:32:19, 15.58s/it]

training loss: 0.4602166414260864


training:  14%|█▍        | 2664/18500 [11:43:54<68:32:52, 15.58s/it]

training loss: 0.49959659576416016


training:  14%|█▍        | 2665/18500 [11:44:10<68:31:57, 15.58s/it]

training loss: 0.9939789772033691


training:  14%|█▍        | 2666/18500 [11:44:26<68:32:55, 15.59s/it]

training loss: 0.5114355087280273


training:  14%|█▍        | 2667/18500 [11:44:41<68:32:12, 15.58s/it]

training loss: 0.5963985919952393


training:  14%|█▍        | 2668/18500 [11:44:57<68:32:39, 15.59s/it]

training loss: 0.7809154987335205


training:  14%|█▍        | 2669/18500 [11:45:12<68:33:22, 15.59s/it]

training loss: 0.7528708577156067


training:  14%|█▍        | 2670/18500 [11:45:28<68:42:20, 15.62s/it]

training loss: 0.8772910237312317


training:  14%|█▍        | 2671/18500 [11:45:44<68:43:06, 15.63s/it]

training loss: 0.8279498815536499


training:  14%|█▍        | 2672/18500 [11:45:59<68:44:55, 15.64s/it]

training loss: 0.4338703155517578


training:  14%|█▍        | 2673/18500 [11:46:15<68:44:35, 15.64s/it]

training loss: 0.4485616683959961


training:  14%|█▍        | 2674/18500 [11:46:31<68:47:23, 15.65s/it]

training loss: 0.9900553226470947


training:  14%|█▍        | 2675/18500 [11:46:46<68:45:13, 15.64s/it]

training loss: 0.723668098449707


training:  14%|█▍        | 2676/18500 [11:47:02<68:44:15, 15.64s/it]

training loss: 0.8198118805885315


training:  14%|█▍        | 2677/18500 [11:47:18<68:43:11, 15.63s/it]

training loss: 0.7331677079200745


training:  14%|█▍        | 2678/18500 [11:47:33<68:40:28, 15.63s/it]

training loss: 0.5622339248657227


training:  14%|█▍        | 2679/18500 [11:47:49<68:37:48, 15.62s/it]

training loss: 0.5723264813423157


training:  14%|█▍        | 2680/18500 [11:48:04<68:34:51, 15.61s/it]

training loss: 0.8974253535270691


training:  14%|█▍        | 2681/18500 [11:48:20<68:32:42, 15.60s/it]

training loss: 1.0238138437271118


training:  14%|█▍        | 2682/18500 [11:48:35<68:31:41, 15.60s/it]

training loss: 0.6428680419921875


training:  15%|█▍        | 2683/18500 [11:48:51<68:30:02, 15.59s/it]

training loss: 0.5516728162765503


training:  15%|█▍        | 2684/18500 [11:49:07<68:28:43, 15.59s/it]

training loss: 0.8972931504249573


training:  15%|█▍        | 2685/18500 [11:49:22<68:27:42, 15.58s/it]

training loss: 0.5737077593803406


training:  15%|█▍        | 2686/18500 [11:49:38<68:27:21, 15.58s/it]

training loss: 0.8686445355415344


training:  15%|█▍        | 2687/18500 [11:49:53<68:26:49, 15.58s/it]

training loss: 0.6395915150642395


training:  15%|█▍        | 2688/18500 [11:50:09<68:26:15, 15.58s/it]

training loss: 0.6717550158500671


training:  15%|█▍        | 2689/18500 [11:50:25<68:27:38, 15.59s/it]

training loss: 0.5683383345603943


training:  15%|█▍        | 2690/18500 [11:50:40<68:25:57, 15.58s/it]

training loss: 0.6760194897651672


training:  15%|█▍        | 2691/18500 [11:50:56<68:26:16, 15.58s/it]

training loss: 0.9003484845161438


training:  15%|█▍        | 2692/18500 [11:51:11<68:25:02, 15.58s/it]

training loss: 0.8159803152084351


training:  15%|█▍        | 2693/18500 [11:51:27<68:25:21, 15.58s/it]

training loss: 1.2613707780838013


training:  15%|█▍        | 2694/18500 [11:51:42<68:24:20, 15.58s/it]

training loss: 0.5965635180473328


training:  15%|█▍        | 2695/18500 [11:51:58<68:24:14, 15.58s/it]

training loss: 0.6599802374839783


training:  15%|█▍        | 2696/18500 [11:52:14<68:22:47, 15.58s/it]

training loss: 0.7333305478096008


training:  15%|█▍        | 2697/18500 [11:52:29<68:23:18, 15.58s/it]

training loss: 0.5708776712417603


training:  15%|█▍        | 2698/18500 [11:52:45<68:22:25, 15.58s/it]

training loss: 0.725303590297699


training:  15%|█▍        | 2699/18500 [11:53:00<68:21:59, 15.58s/it]

training loss: 0.6760252714157104


training:  15%|█▍        | 2700/18500 [11:53:16<68:21:27, 15.58s/it]

training loss: 0.618179440498352
training loss: 0.795576810836792


training:  15%|█▍        | 2701/18500 [11:53:34<72:10:37, 16.45s/it]

validation loss: 1.484475016593933


training:  15%|█▍        | 2702/18500 [11:53:50<71:02:08, 16.19s/it]

training loss: 1.016150712966919


training:  15%|█▍        | 2703/18500 [11:54:06<70:15:01, 16.01s/it]

training loss: 0.43411800265312195


training:  15%|█▍        | 2704/18500 [11:54:21<69:41:27, 15.88s/it]

training loss: 0.8091881275177002


training:  15%|█▍        | 2705/18500 [11:54:37<69:16:39, 15.79s/it]

training loss: 0.7168770432472229


training:  15%|█▍        | 2706/18500 [11:54:52<68:59:19, 15.72s/it]

training loss: 0.9527594447135925


training:  15%|█▍        | 2707/18500 [11:55:08<68:47:42, 15.68s/it]

training loss: 1.179734706878662


training:  15%|█▍        | 2708/18500 [11:55:23<68:41:02, 15.66s/it]

training loss: 0.9085710644721985


training:  15%|█▍        | 2709/18500 [11:55:39<68:34:13, 15.63s/it]

training loss: 0.9896312952041626


training:  15%|█▍        | 2710/18500 [11:55:55<68:30:33, 15.62s/it]

training loss: 0.8810385465621948


training:  15%|█▍        | 2711/18500 [11:56:10<68:26:36, 15.61s/it]

training loss: 0.5452647805213928


training:  15%|█▍        | 2712/18500 [11:56:26<68:25:32, 15.60s/it]

training loss: 0.9248145818710327


training:  15%|█▍        | 2713/18500 [11:56:41<68:22:26, 15.59s/it]

training loss: 0.4372919499874115


training:  15%|█▍        | 2714/18500 [11:56:57<68:21:38, 15.59s/it]

training loss: 0.7658309936523438


training:  15%|█▍        | 2715/18500 [11:57:13<68:20:07, 15.58s/it]

training loss: 0.9318003058433533


training:  15%|█▍        | 2716/18500 [11:57:28<68:21:18, 15.59s/it]

training loss: 0.8524805307388306


training:  15%|█▍        | 2717/18500 [11:57:44<68:19:05, 15.58s/it]

training loss: 0.6545329689979553


training:  15%|█▍        | 2718/18500 [11:57:59<68:18:32, 15.58s/it]

training loss: 0.7576039433479309


training:  15%|█▍        | 2719/18500 [11:58:15<68:17:17, 15.58s/it]

training loss: 0.7140586376190186


training:  15%|█▍        | 2720/18500 [11:58:30<68:18:36, 15.58s/it]

training loss: 0.7174187898635864


training:  15%|█▍        | 2721/18500 [11:58:46<68:17:24, 15.58s/it]

training loss: 0.4141511619091034


training:  15%|█▍        | 2722/18500 [11:59:02<68:17:31, 15.58s/it]

training loss: 0.5475680828094482


training:  15%|█▍        | 2723/18500 [11:59:17<68:17:06, 15.58s/it]

training loss: 0.6060923933982849


training:  15%|█▍        | 2724/18500 [11:59:33<68:17:12, 15.58s/it]

training loss: 1.0761528015136719


training:  15%|█▍        | 2725/18500 [11:59:48<68:16:37, 15.58s/it]

training loss: 0.5211818814277649


training:  15%|█▍        | 2726/18500 [12:00:04<68:16:38, 15.58s/it]

training loss: 1.063048243522644


training:  15%|█▍        | 2727/18500 [12:00:19<68:15:12, 15.58s/it]

training loss: 0.38292813301086426


training:  15%|█▍        | 2728/18500 [12:00:35<68:16:08, 15.58s/it]

training loss: 0.9616031646728516


training:  15%|█▍        | 2729/18500 [12:00:51<68:15:57, 15.58s/it]

training loss: 0.6577905416488647


training:  15%|█▍        | 2730/18500 [12:01:06<68:16:35, 15.59s/it]

training loss: 0.6704207062721252


training:  15%|█▍        | 2731/18500 [12:01:22<68:15:35, 15.58s/it]

training loss: 0.7110175490379333


training:  15%|█▍        | 2732/18500 [12:01:37<68:14:55, 15.58s/it]

training loss: 0.7521135210990906


training:  15%|█▍        | 2733/18500 [12:01:53<68:14:16, 15.58s/it]

training loss: 0.6361589431762695


training:  15%|█▍        | 2734/18500 [12:02:09<68:14:01, 15.58s/it]

training loss: 0.45282331109046936


training:  15%|█▍        | 2735/18500 [12:02:24<68:15:13, 15.59s/it]

training loss: 0.9237452149391174


training:  15%|█▍        | 2736/18500 [12:02:40<68:14:20, 15.58s/it]

training loss: 1.056404709815979


training:  15%|█▍        | 2737/18500 [12:02:55<68:14:37, 15.59s/it]

training loss: 0.5812536478042603


training:  15%|█▍        | 2738/18500 [12:03:11<68:12:46, 15.58s/it]

training loss: 0.5976957678794861


training:  15%|█▍        | 2739/18500 [12:03:26<68:13:36, 15.58s/it]

training loss: 0.721929669380188


training:  15%|█▍        | 2740/18500 [12:03:42<68:12:18, 15.58s/it]

training loss: 0.7354342341423035


training:  15%|█▍        | 2741/18500 [12:03:58<68:21:40, 15.62s/it]

training loss: 0.762001633644104


training:  15%|█▍        | 2742/18500 [12:04:13<68:17:38, 15.60s/it]

training loss: 0.5468384027481079


training:  15%|█▍        | 2743/18500 [12:04:29<68:15:40, 15.60s/it]

training loss: 1.0106985569000244


training:  15%|█▍        | 2744/18500 [12:04:44<68:13:07, 15.59s/it]

training loss: 0.5088412761688232


training:  15%|█▍        | 2745/18500 [12:05:00<68:11:47, 15.58s/it]

training loss: 0.4852604568004608


training:  15%|█▍        | 2746/18500 [12:05:16<68:10:41, 15.58s/it]

training loss: 0.47321265935897827


training:  15%|█▍        | 2747/18500 [12:05:31<68:10:57, 15.58s/it]

training loss: 1.0868661403656006


training:  15%|█▍        | 2748/18500 [12:05:47<68:09:33, 15.58s/it]

training loss: 0.5419667959213257


training:  15%|█▍        | 2749/18500 [12:06:02<68:09:59, 15.58s/it]

training loss: 0.6521057486534119


training:  15%|█▍        | 2750/18500 [12:06:18<68:10:01, 15.58s/it]

training loss: 0.745882511138916


training:  15%|█▍        | 2751/18500 [12:06:34<68:09:59, 15.58s/it]

training loss: 0.543086588382721


training:  15%|█▍        | 2752/18500 [12:06:49<68:08:58, 15.58s/it]

training loss: 0.6529909372329712


training:  15%|█▍        | 2753/18500 [12:07:05<68:09:20, 15.58s/it]

training loss: 0.9896799325942993


training:  15%|█▍        | 2754/18500 [12:07:20<68:09:19, 15.58s/it]

training loss: 0.4796544313430786


training:  15%|█▍        | 2755/18500 [12:07:36<68:09:03, 15.58s/it]

training loss: 0.6346021890640259


training:  15%|█▍        | 2756/18500 [12:07:51<68:08:41, 15.58s/it]

training loss: 0.803154706954956


training:  15%|█▍        | 2757/18500 [12:08:07<68:08:24, 15.58s/it]

training loss: 0.5212268829345703


training:  15%|█▍        | 2758/18500 [12:08:23<68:08:08, 15.58s/it]

training loss: 0.8633942008018494


training:  15%|█▍        | 2759/18500 [12:08:38<68:07:53, 15.58s/it]

training loss: 0.5154855251312256


training:  15%|█▍        | 2760/18500 [12:08:54<68:07:39, 15.58s/it]

training loss: 0.6657449007034302


training:  15%|█▍        | 2761/18500 [12:09:09<68:07:25, 15.58s/it]

training loss: 0.5884556174278259


training:  15%|█▍        | 2762/18500 [12:09:25<68:08:12, 15.59s/it]

training loss: 1.103520154953003


training:  15%|█▍        | 2763/18500 [12:09:41<68:06:40, 15.58s/it]

training loss: 0.846172571182251


training:  15%|█▍        | 2764/18500 [12:09:56<68:07:50, 15.59s/it]

training loss: 1.0919889211654663


training:  15%|█▍        | 2765/18500 [12:10:12<68:08:35, 15.59s/it]

training loss: 0.6880716681480408


training:  15%|█▍        | 2766/18500 [12:10:27<68:09:33, 15.60s/it]

training loss: 0.8101746439933777


training:  15%|█▍        | 2767/18500 [12:10:43<68:08:37, 15.59s/it]

training loss: 0.8944946527481079


training:  15%|█▍        | 2768/18500 [12:10:59<68:08:21, 15.59s/it]

training loss: 0.7080224752426147


training:  15%|█▍        | 2769/18500 [12:11:14<68:08:38, 15.59s/it]

training loss: 0.7705864906311035


training:  15%|█▍        | 2770/18500 [12:11:30<68:08:46, 15.60s/it]

training loss: 0.9871823787689209


training:  15%|█▍        | 2771/18500 [12:11:45<68:08:00, 15.59s/it]

training loss: 0.9960964322090149


training:  15%|█▍        | 2772/18500 [12:12:01<68:07:05, 15.59s/it]

training loss: 0.4807192385196686


training:  15%|█▍        | 2773/18500 [12:12:16<68:06:14, 15.59s/it]

training loss: 0.5670294761657715


training:  15%|█▍        | 2774/18500 [12:12:32<68:06:54, 15.59s/it]

training loss: 0.8196765780448914


training:  15%|█▌        | 2775/18500 [12:12:48<68:06:15, 15.59s/it]

training loss: 1.0807818174362183


training:  15%|█▌        | 2776/18500 [12:13:03<68:05:39, 15.59s/it]

training loss: 0.924839973449707


training:  15%|█▌        | 2777/18500 [12:13:19<68:05:06, 15.59s/it]

training loss: 0.7667855620384216


training:  15%|█▌        | 2778/18500 [12:13:34<68:04:36, 15.59s/it]

training loss: 0.5633853673934937


training:  15%|█▌        | 2779/18500 [12:13:50<68:03:22, 15.58s/it]

training loss: 0.4077480137348175


training:  15%|█▌        | 2780/18500 [12:14:06<68:03:09, 15.58s/it]

training loss: 0.6472373604774475


training:  15%|█▌        | 2781/18500 [12:14:21<68:03:04, 15.59s/it]

training loss: 0.7295835018157959


training:  15%|█▌        | 2782/18500 [12:14:37<68:02:16, 15.58s/it]

training loss: 0.6677001118659973


training:  15%|█▌        | 2783/18500 [12:14:52<68:01:45, 15.58s/it]

training loss: 0.7764428853988647


training:  15%|█▌        | 2784/18500 [12:15:08<68:01:06, 15.58s/it]

training loss: 0.9879329800605774


training:  15%|█▌        | 2785/18500 [12:15:23<68:02:47, 15.59s/it]

training loss: 0.5253620743751526


training:  15%|█▌        | 2786/18500 [12:15:39<68:01:20, 15.58s/it]

training loss: 0.7977145910263062


training:  15%|█▌        | 2787/18500 [12:15:55<68:01:24, 15.58s/it]

training loss: 0.6966400146484375


training:  15%|█▌        | 2788/18500 [12:16:10<68:00:09, 15.58s/it]

training loss: 1.1651337146759033


training:  15%|█▌        | 2789/18500 [12:16:26<68:07:37, 15.61s/it]

training loss: 1.2049556970596313


training:  15%|█▌        | 2790/18500 [12:16:42<68:09:47, 15.62s/it]

training loss: 0.725420355796814


training:  15%|█▌        | 2791/18500 [12:16:57<68:12:49, 15.63s/it]

training loss: 0.5626837015151978


training:  15%|█▌        | 2792/18500 [12:17:13<68:13:27, 15.64s/it]

training loss: 0.862520694732666


training:  15%|█▌        | 2793/18500 [12:17:29<68:16:09, 15.65s/it]

training loss: 0.5056089162826538


training:  15%|█▌        | 2794/18500 [12:17:44<68:15:22, 15.65s/it]

training loss: 0.9759541749954224


training:  15%|█▌        | 2795/18500 [12:18:00<68:15:49, 15.65s/it]

training loss: 0.9974398612976074


training:  15%|█▌        | 2796/18500 [12:18:15<68:15:33, 15.65s/it]

training loss: 0.5003765821456909


training:  15%|█▌        | 2797/18500 [12:18:31<68:17:45, 15.66s/it]

training loss: 0.7674033045768738


training:  15%|█▌        | 2798/18500 [12:18:47<68:14:05, 15.64s/it]

training loss: 0.551864743232727


training:  15%|█▌        | 2799/18500 [12:19:02<68:13:31, 15.64s/it]

training loss: 0.5392075777053833


training:  15%|█▌        | 2800/18500 [12:19:18<68:09:24, 15.63s/it]

training loss: 0.8089656233787537
training loss: 1.1540788412094116


training:  15%|█▌        | 2801/18500 [12:19:35<69:55:55, 16.04s/it]

validation loss: 1.5575199127197266


training:  15%|█▌        | 2802/18500 [12:19:51<69:21:02, 15.90s/it]

training loss: 0.6499720811843872


training:  15%|█▌        | 2803/18500 [12:20:06<68:55:29, 15.81s/it]

training loss: 1.0474402904510498


training:  15%|█▌        | 2804/18500 [12:20:22<68:37:59, 15.74s/it]

training loss: 0.752899169921875


training:  15%|█▌        | 2805/18500 [12:20:37<68:25:01, 15.69s/it]

training loss: 1.002558708190918


training:  15%|█▌        | 2806/18500 [12:20:53<68:15:46, 15.66s/it]

training loss: 0.6337196230888367


training:  15%|█▌        | 2807/18500 [12:21:09<68:10:14, 15.64s/it]

training loss: 0.7184605598449707


training:  15%|█▌        | 2808/18500 [12:21:24<68:06:15, 15.62s/it]

training loss: 0.8268345594406128


training:  15%|█▌        | 2809/18500 [12:21:40<68:03:36, 15.62s/it]

training loss: 1.0825999975204468


training:  15%|█▌        | 2810/18500 [12:21:55<68:01:12, 15.61s/it]

training loss: 0.5058645009994507


training:  15%|█▌        | 2811/18500 [12:22:11<67:58:34, 15.60s/it]

training loss: 1.1840877532958984


training:  15%|█▌        | 2812/18500 [12:22:26<67:58:05, 15.60s/it]

training loss: 0.6167610883712769


training:  15%|█▌        | 2813/18500 [12:22:42<67:56:52, 15.59s/it]

training loss: 0.9509246349334717


training:  15%|█▌        | 2814/18500 [12:22:58<67:56:25, 15.59s/it]

training loss: 0.5909488201141357


training:  15%|█▌        | 2815/18500 [12:23:13<67:54:34, 15.59s/it]

training loss: 0.8804952502250671


training:  15%|█▌        | 2816/18500 [12:23:29<67:54:17, 15.59s/it]

training loss: 0.6848857998847961


training:  15%|█▌        | 2817/18500 [12:23:44<67:53:27, 15.58s/it]

training loss: 0.8713705539703369


training:  15%|█▌        | 2818/18500 [12:24:00<67:53:18, 15.58s/it]

training loss: 0.6900632977485657


training:  15%|█▌        | 2819/18500 [12:24:16<67:52:20, 15.58s/it]

training loss: 0.6067663431167603


training:  15%|█▌        | 2820/18500 [12:24:31<67:53:18, 15.59s/it]

training loss: 1.0717337131500244


training:  15%|█▌        | 2821/18500 [12:24:47<67:53:56, 15.59s/it]

training loss: 1.0304434299468994


training:  15%|█▌        | 2822/18500 [12:25:02<67:55:09, 15.60s/it]

training loss: 0.6364477276802063


training:  15%|█▌        | 2823/18500 [12:25:18<67:55:04, 15.60s/it]

training loss: 1.014419436454773


training:  15%|█▌        | 2824/18500 [12:25:34<67:55:40, 15.60s/it]

training loss: 0.9683110117912292


training:  15%|█▌        | 2825/18500 [12:25:49<67:54:39, 15.60s/it]

training loss: 0.7982513308525085


training:  15%|█▌        | 2826/18500 [12:26:05<67:53:53, 15.59s/it]

training loss: 0.709185779094696


training:  15%|█▌        | 2827/18500 [12:26:20<67:54:10, 15.60s/it]

training loss: 0.903431236743927


training:  15%|█▌        | 2828/18500 [12:26:36<67:53:44, 15.60s/it]

training loss: 0.8454058170318604


training:  15%|█▌        | 2829/18500 [12:26:52<67:53:05, 15.59s/it]

training loss: 0.6444092988967896


training:  15%|█▌        | 2830/18500 [12:27:07<67:51:28, 15.59s/it]

training loss: 0.8848991990089417


training:  15%|█▌        | 2831/18500 [12:27:23<67:51:03, 15.59s/it]

training loss: 0.7177336812019348


training:  15%|█▌        | 2832/18500 [12:27:38<67:50:02, 15.59s/it]

training loss: 1.0422959327697754


training:  15%|█▌        | 2833/18500 [12:27:54<67:49:11, 15.58s/it]

training loss: 0.7650184035301208


training:  15%|█▌        | 2834/18500 [12:28:09<67:49:08, 15.58s/it]

training loss: 0.9311990737915039


training:  15%|█▌        | 2835/18500 [12:28:25<67:49:02, 15.59s/it]

training loss: 0.5261452198028564


training:  15%|█▌        | 2836/18500 [12:28:41<67:48:45, 15.59s/it]

training loss: 1.0413466691970825


training:  15%|█▌        | 2837/18500 [12:28:56<67:48:34, 15.59s/it]

training loss: 0.7947441935539246


training:  15%|█▌        | 2838/18500 [12:29:12<67:47:29, 15.58s/it]

training loss: 0.8461953401565552


training:  15%|█▌        | 2839/18500 [12:29:27<67:48:29, 15.59s/it]

training loss: 0.6067202091217041


training:  15%|█▌        | 2840/18500 [12:29:43<67:47:45, 15.59s/it]

training loss: 0.7549775242805481


training:  15%|█▌        | 2841/18500 [12:29:59<67:48:08, 15.59s/it]

training loss: 0.8043550252914429


training:  15%|█▌        | 2842/18500 [12:30:14<67:46:57, 15.58s/it]

training loss: 1.0392799377441406


training:  15%|█▌        | 2843/18500 [12:30:30<67:48:00, 15.59s/it]

training loss: 0.7546346783638


training:  15%|█▌        | 2844/18500 [12:30:45<67:46:38, 15.58s/it]

training loss: 0.7158125638961792


training:  15%|█▌        | 2845/18500 [12:31:01<67:46:37, 15.59s/it]

training loss: 0.8837031722068787


training:  15%|█▌        | 2846/18500 [12:31:16<67:45:54, 15.58s/it]

training loss: 0.5576496124267578


training:  15%|█▌        | 2847/18500 [12:31:32<67:47:45, 15.59s/it]

training loss: 0.9983000755310059


training:  15%|█▌        | 2848/18500 [12:31:48<67:46:21, 15.59s/it]

training loss: 0.589572548866272


training:  15%|█▌        | 2849/18500 [12:32:03<67:45:57, 15.59s/it]

training loss: 0.8212665915489197


training:  15%|█▌        | 2850/18500 [12:32:19<67:45:00, 15.58s/it]

training loss: 0.9842048287391663


training:  15%|█▌        | 2851/18500 [12:32:34<67:44:49, 15.59s/it]

training loss: 1.0605648756027222


training:  15%|█▌        | 2852/18500 [12:32:50<67:43:35, 15.58s/it]

training loss: 1.0177409648895264


training:  15%|█▌        | 2853/18500 [12:33:06<67:43:33, 15.58s/it]

training loss: 0.8193348050117493


training:  15%|█▌        | 2854/18500 [12:33:21<67:44:11, 15.59s/it]

training loss: 0.5868957042694092


training:  15%|█▌        | 2855/18500 [12:33:37<67:43:42, 15.58s/it]

training loss: 0.47182080149650574


training:  15%|█▌        | 2856/18500 [12:33:52<67:42:51, 15.58s/it]

training loss: 0.4750821590423584


training:  15%|█▌        | 2857/18500 [12:34:08<67:41:51, 15.58s/it]

training loss: 0.9168940186500549


training:  15%|█▌        | 2858/18500 [12:34:23<67:43:17, 15.59s/it]

training loss: 0.9642763137817383


training:  15%|█▌        | 2859/18500 [12:34:39<67:42:44, 15.58s/it]

training loss: 0.7976664304733276


training:  15%|█▌        | 2860/18500 [12:34:55<67:42:52, 15.59s/it]

training loss: 1.001663327217102


training:  15%|█▌        | 2861/18500 [12:35:10<67:42:01, 15.58s/it]

training loss: 0.8384950757026672


training:  15%|█▌        | 2862/18500 [12:35:26<67:42:26, 15.59s/it]

training loss: 0.3547595143318176


training:  15%|█▌        | 2863/18500 [12:35:41<67:41:19, 15.58s/it]

training loss: 0.5500391721725464


training:  15%|█▌        | 2864/18500 [12:35:57<67:41:20, 15.58s/it]

training loss: 0.6595786213874817


training:  15%|█▌        | 2865/18500 [12:36:13<67:40:15, 15.58s/it]

training loss: 0.8967174291610718


training:  15%|█▌        | 2866/18500 [12:36:28<67:40:59, 15.59s/it]

training loss: 0.9085912108421326


training:  15%|█▌        | 2867/18500 [12:36:44<67:41:05, 15.59s/it]

training loss: 1.1188108921051025


training:  16%|█▌        | 2868/18500 [12:36:59<67:40:55, 15.59s/it]

training loss: 0.6762654781341553


training:  16%|█▌        | 2869/18500 [12:37:15<67:41:04, 15.59s/it]

training loss: 0.7521114349365234


training:  16%|█▌        | 2870/18500 [12:37:30<67:41:04, 15.59s/it]

training loss: 0.9081286787986755


training:  16%|█▌        | 2871/18500 [12:37:46<67:39:42, 15.59s/it]

training loss: 0.6191686987876892


training:  16%|█▌        | 2872/18500 [12:38:02<67:39:29, 15.59s/it]

training loss: 0.6978223323822021


training:  16%|█▌        | 2873/18500 [12:38:17<67:39:06, 15.58s/it]

training loss: 0.7398905158042908


training:  16%|█▌        | 2874/18500 [12:38:33<67:41:02, 15.59s/it]

training loss: 0.6404105424880981


training:  16%|█▌        | 2875/18500 [12:38:48<67:39:53, 15.59s/it]

training loss: 0.5147902965545654


training:  16%|█▌        | 2876/18500 [12:39:04<67:39:11, 15.59s/it]

training loss: 0.7616148591041565


training:  16%|█▌        | 2877/18500 [12:39:20<67:39:08, 15.59s/it]

training loss: 0.7292035818099976


training:  16%|█▌        | 2878/18500 [12:39:35<67:39:30, 15.59s/it]

training loss: 0.8360032439231873


training:  16%|█▌        | 2879/18500 [12:39:51<67:39:00, 15.59s/it]

training loss: 0.8264358043670654


training:  16%|█▌        | 2880/18500 [12:40:06<67:37:54, 15.59s/it]

training loss: 0.9524020552635193


training:  16%|█▌        | 2881/18500 [12:40:22<67:38:01, 15.59s/it]

training loss: 1.0233204364776611


training:  16%|█▌        | 2882/18500 [12:40:38<67:37:02, 15.59s/it]

training loss: 0.7672698497772217


training:  16%|█▌        | 2883/18500 [12:40:53<67:35:37, 15.58s/it]

training loss: 0.7675129175186157


training:  16%|█▌        | 2884/18500 [12:41:09<67:35:50, 15.58s/it]

training loss: 0.6705684065818787


training:  16%|█▌        | 2885/18500 [12:41:24<67:37:06, 15.59s/it]

training loss: 1.1047170162200928


training:  16%|█▌        | 2886/18500 [12:41:40<67:35:40, 15.58s/it]

training loss: 1.078285574913025


training:  16%|█▌        | 2887/18500 [12:41:55<67:34:57, 15.58s/it]

training loss: 0.6238797307014465


training:  16%|█▌        | 2888/18500 [12:42:11<67:34:10, 15.58s/it]

training loss: 0.9485368132591248


training:  16%|█▌        | 2889/18500 [12:42:27<67:36:06, 15.59s/it]

training loss: 1.1209166049957275


training:  16%|█▌        | 2890/18500 [12:42:42<67:34:20, 15.58s/it]

training loss: 0.9710124135017395


training:  16%|█▌        | 2891/18500 [12:42:58<67:34:37, 15.59s/it]

training loss: 0.713839590549469


training:  16%|█▌        | 2892/18500 [12:43:13<67:33:40, 15.58s/it]

training loss: 0.7630460262298584


training:  16%|█▌        | 2893/18500 [12:43:29<67:34:32, 15.59s/it]

training loss: 0.7156234383583069


training:  16%|█▌        | 2894/18500 [12:43:45<67:33:24, 15.58s/it]

training loss: 0.813250720500946


training:  16%|█▌        | 2895/18500 [12:44:00<67:33:28, 15.59s/it]

training loss: 0.704457700252533


training:  16%|█▌        | 2896/18500 [12:44:16<67:32:27, 15.58s/it]

training loss: 0.8814366459846497


training:  16%|█▌        | 2897/18500 [12:44:31<67:32:36, 15.58s/it]

training loss: 0.723588228225708


training:  16%|█▌        | 2898/18500 [12:44:47<67:31:48, 15.58s/it]

training loss: 0.6922030448913574


training:  16%|█▌        | 2899/18500 [12:45:02<67:31:19, 15.58s/it]

training loss: 0.631776750087738


training:  16%|█▌        | 2900/18500 [12:45:18<67:37:41, 15.61s/it]

training loss: 0.8885517120361328
training loss: 0.25778335332870483


training:  16%|█▌        | 2901/18500 [12:45:35<69:30:51, 16.04s/it]

validation loss: 1.589439868927002


training:  16%|█▌        | 2902/18500 [12:45:51<69:01:54, 15.93s/it]

training loss: 0.45245087146759033


training:  16%|█▌        | 2903/18500 [12:46:07<68:41:24, 15.85s/it]

training loss: 0.9717212915420532


training:  16%|█▌        | 2904/18500 [12:46:22<68:27:46, 15.80s/it]

training loss: 0.4503382742404938


training:  16%|█▌        | 2905/18500 [12:46:38<68:14:36, 15.75s/it]

training loss: 1.1276588439941406


training:  16%|█▌        | 2906/18500 [12:46:54<68:06:58, 15.73s/it]

training loss: 0.7546025514602661


training:  16%|█▌        | 2907/18500 [12:47:09<68:02:10, 15.71s/it]

training loss: 0.34355977177619934


training:  16%|█▌        | 2908/18500 [12:47:25<67:59:22, 15.70s/it]

training loss: 0.9009042978286743


training:  16%|█▌        | 2909/18500 [12:47:40<67:51:52, 15.67s/it]

training loss: 0.9912813305854797


training:  16%|█▌        | 2910/18500 [12:47:56<67:46:32, 15.65s/it]

training loss: 1.1288079023361206


training:  16%|█▌        | 2911/18500 [12:48:12<67:41:19, 15.63s/it]

training loss: 0.5421358346939087


training:  16%|█▌        | 2912/18500 [12:48:27<67:39:22, 15.62s/it]

training loss: 0.6452622413635254


training:  16%|█▌        | 2913/18500 [12:48:43<67:35:19, 15.61s/it]

training loss: 0.6790379285812378


training:  16%|█▌        | 2914/18500 [12:48:58<67:33:49, 15.61s/it]

training loss: 0.9075418710708618


training:  16%|█▌        | 2915/18500 [12:49:14<67:31:30, 15.60s/it]

training loss: 0.47391265630722046


training:  16%|█▌        | 2916/18500 [12:49:30<67:30:45, 15.60s/it]

training loss: 1.0667345523834229


training:  16%|█▌        | 2917/18500 [12:49:45<67:29:06, 15.59s/it]

training loss: 0.31975793838500977


training:  16%|█▌        | 2918/18500 [12:50:01<67:28:21, 15.59s/it]

training loss: 0.5794949531555176


training:  16%|█▌        | 2919/18500 [12:50:16<67:27:41, 15.59s/it]

training loss: 0.699364185333252


training:  16%|█▌        | 2920/18500 [12:50:32<67:28:15, 15.59s/it]

training loss: 0.5359029173851013


training:  16%|█▌        | 2921/18500 [12:50:48<67:27:36, 15.59s/it]

training loss: 0.8697022199630737


training:  16%|█▌        | 2922/18500 [12:51:03<67:26:32, 15.59s/it]

training loss: 0.5409233570098877


training:  16%|█▌        | 2923/18500 [12:51:19<67:26:44, 15.59s/it]

training loss: 0.9659410119056702


training:  16%|█▌        | 2924/18500 [12:51:34<67:26:11, 15.59s/it]

training loss: 0.6884006261825562


training:  16%|█▌        | 2925/18500 [12:51:50<67:25:25, 15.58s/it]

training loss: 1.074541687965393


training:  16%|█▌        | 2926/18500 [12:52:05<67:25:45, 15.59s/it]

training loss: 1.0007814168930054


training:  16%|█▌        | 2927/18500 [12:52:21<67:26:37, 15.59s/it]

training loss: 0.4946275055408478


training:  16%|█▌        | 2928/18500 [12:52:37<67:25:11, 15.59s/it]

training loss: 0.9686130285263062


training:  16%|█▌        | 2929/18500 [12:52:52<67:24:31, 15.58s/it]

training loss: 0.7019065618515015


training:  16%|█▌        | 2930/18500 [12:53:08<67:23:38, 15.58s/it]

training loss: 0.4775611162185669


training:  16%|█▌        | 2931/18500 [12:53:23<67:24:26, 15.59s/it]

training loss: 0.6804990768432617


training:  16%|█▌        | 2932/18500 [12:53:39<67:23:02, 15.58s/it]

training loss: 1.0526185035705566


training:  16%|█▌        | 2933/18500 [12:53:55<67:23:29, 15.58s/it]

training loss: 0.5866817831993103


training:  16%|█▌        | 2934/18500 [12:54:10<67:22:51, 15.58s/it]

training loss: 0.7561680674552917


training:  16%|█▌        | 2935/18500 [12:54:26<67:23:07, 15.59s/it]

training loss: 0.8950130343437195


training:  16%|█▌        | 2936/18500 [12:54:41<67:22:03, 15.58s/it]

training loss: 0.6000667810440063


training:  16%|█▌        | 2937/18500 [12:54:57<67:22:02, 15.58s/it]

training loss: 0.8880155086517334


training:  16%|█▌        | 2938/18500 [12:55:12<67:21:07, 15.58s/it]

training loss: 1.0296941995620728


training:  16%|█▌        | 2939/18500 [12:55:28<67:22:41, 15.59s/it]

training loss: 0.7767890095710754


training:  16%|█▌        | 2940/18500 [12:55:44<67:21:39, 15.58s/it]

training loss: 1.095038890838623


training:  16%|█▌        | 2941/18500 [12:55:59<67:21:42, 15.59s/it]

training loss: 1.0068429708480835


training:  16%|█▌        | 2942/18500 [12:56:15<67:20:54, 15.58s/it]

training loss: 0.7093542814254761


training:  16%|█▌        | 2943/18500 [12:56:30<67:20:56, 15.59s/it]

training loss: 1.0399024486541748


training:  16%|█▌        | 2944/18500 [12:56:46<67:20:11, 15.58s/it]

training loss: 0.5084304213523865


training:  16%|█▌        | 2945/18500 [12:57:02<67:19:55, 15.58s/it]

training loss: 0.9607092142105103


training:  16%|█▌        | 2946/18500 [12:57:17<67:19:45, 15.58s/it]

training loss: 0.8131756782531738


training:  16%|█▌        | 2947/18500 [12:57:33<67:19:54, 15.59s/it]

training loss: 0.7501807808876038


training:  16%|█▌        | 2948/18500 [12:57:48<67:19:20, 15.58s/it]

training loss: 0.9522944092750549


training:  16%|█▌        | 2949/18500 [12:58:04<67:19:37, 15.59s/it]

training loss: 1.0883004665374756


training:  16%|█▌        | 2950/18500 [12:58:20<67:19:46, 15.59s/it]

training loss: 0.6024738550186157


training:  16%|█▌        | 2951/18500 [12:58:35<67:19:21, 15.59s/it]

training loss: 0.9770702719688416


training:  16%|█▌        | 2952/18500 [12:58:51<67:18:41, 15.59s/it]

training loss: 1.0938713550567627


training:  16%|█▌        | 2953/18500 [12:59:06<67:17:58, 15.58s/it]

training loss: 0.945875883102417


training:  16%|█▌        | 2954/18500 [12:59:22<67:18:40, 15.59s/it]

training loss: 0.7508373260498047


training:  16%|█▌        | 2955/18500 [12:59:37<67:17:15, 15.58s/it]

training loss: 0.8395257592201233


training:  16%|█▌        | 2956/18500 [12:59:53<67:16:19, 15.58s/it]

training loss: 0.6739343404769897


training:  16%|█▌        | 2957/18500 [13:00:09<67:16:50, 15.58s/it]

training loss: 0.9175401329994202


training:  16%|█▌        | 2958/18500 [13:00:24<67:18:10, 15.59s/it]

training loss: 1.1345205307006836


training:  16%|█▌        | 2959/18500 [13:00:40<67:16:37, 15.58s/it]

training loss: 0.7985320687294006


training:  16%|█▌        | 2960/18500 [13:00:55<67:16:13, 15.58s/it]

training loss: 0.8766970634460449


training:  16%|█▌        | 2961/18500 [13:01:11<67:15:48, 15.58s/it]

training loss: 0.75051349401474


training:  16%|█▌        | 2962/18500 [13:01:27<67:16:19, 15.59s/it]

training loss: 0.5779242515563965


training:  16%|█▌        | 2963/18500 [13:01:42<67:15:08, 15.58s/it]

training loss: 1.1657527685165405


training:  16%|█▌        | 2964/18500 [13:01:58<67:15:16, 15.58s/it]

training loss: 0.909859299659729


training:  16%|█▌        | 2965/18500 [13:02:13<67:14:05, 15.58s/it]

training loss: 0.6361374855041504


training:  16%|█▌        | 2966/18500 [13:02:29<67:14:52, 15.58s/it]

training loss: 1.0633902549743652


training:  16%|█▌        | 2967/18500 [13:02:44<67:14:08, 15.58s/it]

training loss: 0.7766468524932861


training:  16%|█▌        | 2968/18500 [13:03:00<67:14:12, 15.58s/it]

training loss: 0.8453084826469421


training:  16%|█▌        | 2969/18500 [13:03:16<67:13:37, 15.58s/it]

training loss: 0.5979769825935364


training:  16%|█▌        | 2970/18500 [13:03:31<67:13:50, 15.58s/it]

training loss: 0.5150017142295837


training:  16%|█▌        | 2971/18500 [13:03:47<67:12:51, 15.58s/it]

training loss: 0.6084530353546143


training:  16%|█▌        | 2972/18500 [13:04:02<67:13:18, 15.58s/it]

training loss: 0.7565267086029053


training:  16%|█▌        | 2973/18500 [13:04:18<67:13:24, 15.59s/it]

training loss: 0.7647860646247864


training:  16%|█▌        | 2974/18500 [13:04:34<67:12:39, 15.58s/it]

training loss: 0.43170732259750366


training:  16%|█▌        | 2975/18500 [13:04:49<67:11:55, 15.58s/it]

training loss: 0.713919997215271


training:  16%|█▌        | 2976/18500 [13:05:05<67:11:44, 15.58s/it]

training loss: 0.8073344230651855


training:  16%|█▌        | 2977/18500 [13:05:20<67:12:42, 15.59s/it]

training loss: 0.6710636019706726


training:  16%|█▌        | 2978/18500 [13:05:36<67:11:29, 15.58s/it]

training loss: 0.9162404537200928


training:  16%|█▌        | 2979/18500 [13:05:51<67:10:30, 15.58s/it]

training loss: 0.531328558921814


training:  16%|█▌        | 2980/18500 [13:06:07<67:09:46, 15.58s/it]

training loss: 0.7017198801040649


training:  16%|█▌        | 2981/18500 [13:06:23<67:10:35, 15.58s/it]

training loss: 0.8209480047225952


training:  16%|█▌        | 2982/18500 [13:06:38<67:10:13, 15.58s/it]

training loss: 1.0599536895751953


training:  16%|█▌        | 2983/18500 [13:06:54<67:10:04, 15.58s/it]

training loss: 0.7418279647827148


training:  16%|█▌        | 2984/18500 [13:07:09<67:09:12, 15.58s/it]

training loss: 0.7297837138175964


training:  16%|█▌        | 2985/18500 [13:07:25<67:09:43, 15.58s/it]

training loss: 0.8991978764533997


training:  16%|█▌        | 2986/18500 [13:07:40<67:07:53, 15.58s/it]

training loss: 0.5452818870544434


training:  16%|█▌        | 2987/18500 [13:07:56<67:07:58, 15.58s/it]

training loss: 0.4291146397590637


training:  16%|█▌        | 2988/18500 [13:08:12<67:07:45, 15.58s/it]

training loss: 0.8080727458000183


training:  16%|█▌        | 2989/18500 [13:08:27<67:08:49, 15.58s/it]

training loss: 0.741011917591095


training:  16%|█▌        | 2990/18500 [13:08:43<67:07:14, 15.58s/it]

training loss: 0.6202034950256348


training:  16%|█▌        | 2991/18500 [13:08:58<67:07:01, 15.58s/it]

training loss: 0.9604361057281494


training:  16%|█▌        | 2992/18500 [13:09:14<67:06:24, 15.58s/it]

training loss: 0.907365083694458


training:  16%|█▌        | 2993/18500 [13:09:30<67:07:04, 15.58s/it]

training loss: 0.8304861783981323


training:  16%|█▌        | 2994/18500 [13:09:45<67:06:13, 15.58s/it]

training loss: 0.9771605134010315


training:  16%|█▌        | 2995/18500 [13:10:01<67:06:14, 15.58s/it]

training loss: 0.6261352896690369


training:  16%|█▌        | 2996/18500 [13:10:16<67:06:29, 15.58s/it]

training loss: 0.6908560395240784


training:  16%|█▌        | 2997/18500 [13:10:32<67:06:52, 15.58s/it]

training loss: 1.2060749530792236


training:  16%|█▌        | 2998/18500 [13:10:47<67:06:00, 15.58s/it]

training loss: 0.7389918565750122


training:  16%|█▌        | 2999/18500 [13:11:03<67:05:07, 15.58s/it]

training loss: 0.820663571357727


training:  16%|█▌        | 3000/18500 [13:11:19<67:05:15, 15.58s/it]

training loss: 0.855211079120636
training loss: 0.9274355173110962



generating:   0%|          | 0/512 [00:00<?, ?it/s][A

validation loss: 1.5341715812683105
 nad 100 000. Afganskej armade v tazeni proti
islamistom z hnutia Taliban pomaha okrem americkych vojakov dalsich
5 000 vojakov z krajin NATO.
Citajte viac
Afganske
a koalicne sily vraj zabili 80 clenov al-Kajdy
Trump:
Obama netelefonoval rodinam padlych vojakov
Rusko
udajne dodava mesacne Talibanu palivo za 2,5 miliona dolarov
Americania
zintenzivnili bombardovanie AfganistanuTomas Prouza
Autor: Archiv T.
Prouzu
Urcite pojde o zjednodusenie. Ale kam sa po volbach posunulo Cesko na
urovni EU. Smerom k niecomu, co nazyvame jadro unie, alebo k periferii.
Ci sa nic prilis nezmenilo?
Po volbach sa Cesko pohlo vyrazne k periferii. Strany, ktore aktivne
alebo aspon pasivne podporovali nas posun do jadra, teda TOP 09, Starostovia
a nezavisli a tak tichsie aj CSSD, dostali menej ako tretinu hlasov. Vidiet
teda, ze v Cesku podpora proeuropskych tem chyba.
Preco je to tak, kde je hlavny problem?
Chyba niekto, kto by pozitivne propagoval E


generating:   0%|          | 1/512 [00:00<01:51,  4.57it/s][A
generating:   0%|          | 2/512 [00:00<01:50,  4.63it/s][A
generating:   1%|          | 3/512 [00:00<01:49,  4.65it/s][A
generating:   1%|          | 4/512 [00:00<01:51,  4.57it/s][A
generating:   1%|          | 5/512 [00:01<01:50,  4.61it/s][A
generating:   1%|          | 6/512 [00:01<01:48,  4.66it/s][A
generating:   1%|▏         | 7/512 [00:01<01:47,  4.70it/s][A
generating:   2%|▏         | 8/512 [00:01<01:47,  4.70it/s][A
generating:   2%|▏         | 9/512 [00:01<01:48,  4.65it/s][A
generating:   2%|▏         | 10/512 [00:02<01:46,  4.70it/s][A
generating:   2%|▏         | 11/512 [00:02<01:45,  4.74it/s][A
generating:   2%|▏         | 12/512 [00:02<01:44,  4.77it/s][A
generating:   3%|▎         | 13/512 [00:02<01:44,  4.77it/s][A
generating:   3%|▎         | 14/512 [00:02<01:44,  4.75it/s][A
generating:   3%|▎         | 15/512 [00:03<01:44,  4.77it/s][A
generating:   3%|▎         | 16/512 [00:03<01:43

m povedat, ze by bol na mnohe
naplanovany osvietske obchod v hmotnosti mozno americka spolocne
v sucasnosti.
Zakladnou strategie sa to vyzerat?
Radovanie vsak rozdelenie zivota. Vela to prinieslo na prisla. Ak priputat.
Lidl som tom pomohlo posilnenie slabsi, samozrejme,
mozno nad tejto chvili na to, ze stoji pred rakuske videli iba ako zastavit iste od
apelpkovych miestach. Mohlo vidite. Uvalit vysetruju.
Zosobasto akciove deti, co sa rezim to si musia vo vecia, ked bola
dejina kricky vyrobkov s v


training:  16%|█▌        | 3002/18500 [13:13:40<166:38:44, 38.71s/it]

training loss: 0.7863378524780273


training:  16%|█▌        | 3003/18500 [13:13:56<136:46:10, 31.77s/it]

training loss: 0.6566120982170105


training:  16%|█▌        | 3004/18500 [13:14:11<115:51:03, 26.91s/it]

training loss: 0.9264304637908936


training:  16%|█▌        | 3005/18500 [13:14:27<101:13:41, 23.52s/it]

training loss: 0.7074880599975586


training:  16%|█▌        | 3006/18500 [13:14:42<90:58:44, 21.14s/it] 

training loss: 0.6520355939865112


training:  16%|█▋        | 3007/18500 [13:14:58<83:48:36, 19.47s/it]

training loss: 0.8674379587173462


training:  16%|█▋        | 3008/18500 [13:15:13<78:47:08, 18.31s/it]

training loss: 0.5216037631034851


training:  16%|█▋        | 3009/18500 [13:15:29<75:16:40, 17.49s/it]

training loss: 1.0163888931274414


training:  16%|█▋        | 3010/18500 [13:15:45<72:47:48, 16.92s/it]

training loss: 0.645770251750946


training:  16%|█▋        | 3011/18500 [13:16:00<71:04:41, 16.52s/it]

training loss: 1.0148096084594727


training:  16%|█▋        | 3012/18500 [13:16:16<69:51:37, 16.24s/it]

training loss: 0.8725739121437073


training:  16%|█▋        | 3013/18500 [13:16:31<69:00:51, 16.04s/it]

training loss: 0.9137808084487915


training:  16%|█▋        | 3014/18500 [13:16:47<68:24:53, 15.90s/it]

training loss: 0.8484586477279663


training:  16%|█▋        | 3015/18500 [13:17:03<68:00:20, 15.81s/it]

training loss: 0.5806695818901062


training:  16%|█▋        | 3016/18500 [13:17:18<67:47:09, 15.76s/it]

training loss: 0.8193826079368591


training:  16%|█▋        | 3017/18500 [13:17:34<67:39:14, 15.73s/it]

training loss: 0.8529183864593506


training:  16%|█▋        | 3018/18500 [13:17:50<67:33:56, 15.71s/it]

training loss: 0.890335202217102


training:  16%|█▋        | 3019/18500 [13:18:05<67:31:46, 15.70s/it]

training loss: 0.46960511803627014


training:  16%|█▋        | 3020/18500 [13:18:21<67:30:13, 15.70s/it]

training loss: 0.7032044529914856


training:  16%|█▋        | 3021/18500 [13:18:37<67:28:09, 15.69s/it]

training loss: 0.6158875823020935


training:  16%|█▋        | 3022/18500 [13:18:52<67:27:17, 15.69s/it]

training loss: 0.6826934814453125


training:  16%|█▋        | 3023/18500 [13:19:08<67:24:13, 15.68s/it]

training loss: 0.715599536895752


training:  16%|█▋        | 3024/18500 [13:19:24<67:24:38, 15.68s/it]

training loss: 0.6770169138908386


training:  16%|█▋        | 3025/18500 [13:19:39<67:19:14, 15.66s/it]

training loss: 0.6026736497879028


training:  16%|█▋        | 3026/18500 [13:19:55<67:15:48, 15.65s/it]

training loss: 0.6100527048110962


training:  16%|█▋        | 3027/18500 [13:20:10<67:11:06, 15.63s/it]

training loss: 0.9060510396957397


training:  16%|█▋        | 3028/18500 [13:20:26<67:07:51, 15.62s/it]

training loss: 0.919879674911499


training:  16%|█▋        | 3029/18500 [13:20:42<67:04:11, 15.61s/it]

training loss: 0.8073241114616394


training:  16%|█▋        | 3030/18500 [13:20:57<67:03:42, 15.61s/it]

training loss: 0.5293419361114502


training:  16%|█▋        | 3031/18500 [13:21:13<67:01:27, 15.60s/it]

training loss: 0.704392671585083


training:  16%|█▋        | 3032/18500 [13:21:28<67:01:29, 15.60s/it]

training loss: 0.5321601629257202


training:  16%|█▋        | 3033/18500 [13:21:44<66:59:25, 15.59s/it]

training loss: 1.050299882888794


training:  16%|█▋        | 3034/18500 [13:22:00<66:59:55, 15.60s/it]

training loss: 0.59996098279953


training:  16%|█▋        | 3035/18500 [13:22:15<66:58:08, 15.59s/it]

training loss: 1.039039134979248


training:  16%|█▋        | 3036/18500 [13:22:31<66:57:56, 15.59s/it]

training loss: 0.9136490821838379


training:  16%|█▋        | 3037/18500 [13:22:46<66:56:51, 15.59s/it]

training loss: 0.822839081287384


training:  16%|█▋        | 3038/18500 [13:23:02<66:56:59, 15.59s/it]

training loss: 1.2083855867385864


training:  16%|█▋        | 3039/18500 [13:23:17<66:56:51, 15.59s/it]

training loss: 0.5836857557296753


training:  16%|█▋        | 3040/18500 [13:23:33<66:56:59, 15.59s/it]

training loss: 0.7003883719444275


training:  16%|█▋        | 3041/18500 [13:23:49<66:56:53, 15.59s/it]

training loss: 0.49427610635757446


training:  16%|█▋        | 3042/18500 [13:24:04<66:57:21, 15.59s/it]

training loss: 0.7122053503990173


training:  16%|█▋        | 3043/18500 [13:24:20<66:56:38, 15.59s/it]

training loss: 0.9185805320739746


training:  16%|█▋        | 3044/18500 [13:24:35<66:55:25, 15.59s/it]

training loss: 0.7300419807434082


training:  16%|█▋        | 3045/18500 [13:24:51<66:55:55, 15.59s/it]

training loss: 0.8550466299057007


training:  16%|█▋        | 3046/18500 [13:25:07<66:54:38, 15.59s/it]

training loss: 0.9665303826332092


training:  16%|█▋        | 3047/18500 [13:25:22<66:54:53, 15.59s/it]

training loss: 0.44222158193588257


training:  16%|█▋        | 3048/18500 [13:25:38<66:53:47, 15.59s/it]

training loss: 0.9504991769790649


training:  16%|█▋        | 3049/18500 [13:25:53<66:53:51, 15.59s/it]

training loss: 0.8231746554374695


training:  16%|█▋        | 3050/18500 [13:26:09<66:53:41, 15.59s/it]

training loss: 1.0501493215560913


training:  16%|█▋        | 3051/18500 [13:26:25<66:55:06, 15.59s/it]

training loss: 0.49902060627937317


training:  16%|█▋        | 3052/18500 [13:26:40<66:53:37, 15.59s/it]

training loss: 1.2279558181762695


training:  17%|█▋        | 3053/18500 [13:26:56<66:54:39, 15.59s/it]

training loss: 0.802156925201416


training:  17%|█▋        | 3054/18500 [13:27:11<66:53:22, 15.59s/it]

training loss: 0.5650197863578796


training:  17%|█▋        | 3055/18500 [13:27:27<66:53:22, 15.59s/it]

training loss: 0.9568524360656738


training:  17%|█▋        | 3056/18500 [13:27:42<66:52:09, 15.59s/it]

training loss: 0.8500053882598877


training:  17%|█▋        | 3057/18500 [13:27:58<66:53:26, 15.59s/it]

training loss: 0.37211111187934875


training:  17%|█▋        | 3058/18500 [13:28:14<66:52:21, 15.59s/it]

training loss: 0.9616774320602417


training:  17%|█▋        | 3059/18500 [13:28:29<66:52:33, 15.59s/it]

training loss: 0.6096867918968201


training:  17%|█▋        | 3060/18500 [13:28:45<66:51:28, 15.59s/it]

training loss: 0.6445284485816956


training:  17%|█▋        | 3061/18500 [13:29:00<66:53:03, 15.60s/it]

training loss: 0.7096596956253052


training:  17%|█▋        | 3062/18500 [13:29:16<66:52:03, 15.59s/it]

training loss: 0.5988307595252991


training:  17%|█▋        | 3063/18500 [13:29:32<66:52:31, 15.60s/it]

training loss: 0.8933030366897583


training:  17%|█▋        | 3064/18500 [13:29:47<66:50:57, 15.59s/it]

training loss: 0.7174015045166016


training:  17%|█▋        | 3065/18500 [13:30:03<66:50:50, 15.59s/it]

training loss: 0.5300209522247314


training:  17%|█▋        | 3066/18500 [13:30:18<66:50:33, 15.59s/it]

training loss: 0.2022927701473236


training:  17%|█▋        | 3067/18500 [13:30:34<66:49:40, 15.59s/it]

training loss: 0.7784450054168701


training:  17%|█▋        | 3068/18500 [13:30:50<66:49:38, 15.59s/it]

training loss: 0.9978040456771851


training:  17%|█▋        | 3069/18500 [13:31:05<66:49:56, 15.59s/it]

training loss: 0.6973980069160461


training:  17%|█▋        | 3070/18500 [13:31:21<66:50:47, 15.60s/it]

training loss: 0.8973175883293152


training:  17%|█▋        | 3071/18500 [13:31:36<66:49:12, 15.59s/it]

training loss: 0.821766197681427


training:  17%|█▋        | 3072/18500 [13:31:52<66:49:49, 15.59s/it]

training loss: 0.8452435731887817


training:  17%|█▋        | 3073/18500 [13:32:08<66:49:10, 15.59s/it]

training loss: 0.8331279754638672


training:  17%|█▋        | 3074/18500 [13:32:23<66:49:18, 15.59s/it]

training loss: 0.8737558722496033


training:  17%|█▋        | 3075/18500 [13:32:39<66:47:13, 15.59s/it]

training loss: 0.4937800168991089


training:  17%|█▋        | 3076/18500 [13:32:54<66:48:09, 15.59s/it]

training loss: 0.821423351764679


training:  17%|█▋        | 3077/18500 [13:33:10<66:47:34, 15.59s/it]

training loss: 0.6238089203834534


training:  17%|█▋        | 3078/18500 [13:33:26<66:48:05, 15.59s/it]

training loss: 0.8654695749282837


training:  17%|█▋        | 3079/18500 [13:33:41<66:46:18, 15.59s/it]

training loss: 1.1743535995483398


training:  17%|█▋        | 3080/18500 [13:33:57<66:46:47, 15.59s/it]

training loss: 0.9071742296218872


training:  17%|█▋        | 3081/18500 [13:34:12<66:46:30, 15.59s/it]

training loss: 1.0125720500946045


training:  17%|█▋        | 3082/18500 [13:34:28<66:46:31, 15.59s/it]

training loss: 0.6897276043891907


training:  17%|█▋        | 3083/18500 [13:34:43<66:45:31, 15.59s/it]

training loss: 1.153895378112793


training:  17%|█▋        | 3084/18500 [13:34:59<66:44:51, 15.59s/it]

training loss: 0.9870197176933289


training:  17%|█▋        | 3085/18500 [13:35:15<66:43:29, 15.58s/it]

training loss: 0.9046179056167603


training:  17%|█▋        | 3086/18500 [13:35:30<66:43:15, 15.58s/it]

training loss: 0.8069902658462524


training:  17%|█▋        | 3087/18500 [13:35:46<66:42:21, 15.58s/it]

training loss: 0.8546560406684875


training:  17%|█▋        | 3088/18500 [13:36:01<66:43:02, 15.58s/it]

training loss: 1.014406681060791


training:  17%|█▋        | 3089/18500 [13:36:17<66:42:51, 15.58s/it]

training loss: 0.61395663022995


training:  17%|█▋        | 3090/18500 [13:36:33<66:43:22, 15.59s/it]

training loss: 0.8464117646217346


training:  17%|█▋        | 3091/18500 [13:36:48<66:42:33, 15.59s/it]

training loss: 0.7084802985191345


training:  17%|█▋        | 3092/18500 [13:37:04<66:43:04, 15.59s/it]

training loss: 0.7731698751449585


training:  17%|█▋        | 3093/18500 [13:37:19<66:42:51, 15.59s/it]

training loss: 0.89090895652771


training:  17%|█▋        | 3094/18500 [13:37:35<66:42:25, 15.59s/it]

training loss: 1.155120611190796


training:  17%|█▋        | 3095/18500 [13:37:50<66:42:01, 15.59s/it]

training loss: 0.5910166501998901


training:  17%|█▋        | 3096/18500 [13:38:06<66:41:31, 15.59s/it]

training loss: 0.8998826146125793


training:  17%|█▋        | 3097/18500 [13:38:22<66:42:00, 15.59s/it]

training loss: 0.6277533769607544


training:  17%|█▋        | 3098/18500 [13:38:37<66:40:19, 15.58s/it]

training loss: 0.6166824102401733


training:  17%|█▋        | 3099/18500 [13:38:53<66:41:13, 15.59s/it]

training loss: 0.9017834067344666


training:  17%|█▋        | 3100/18500 [13:39:08<66:41:06, 15.59s/it]

training loss: 0.7589727640151978
training loss: 0.8390502333641052


training:  17%|█▋        | 3101/18500 [13:39:25<68:28:59, 16.01s/it]

validation loss: 1.5365335941314697


training:  17%|█▋        | 3102/18500 [13:39:41<67:56:46, 15.89s/it]

training loss: 0.4978671669960022


training:  17%|█▋        | 3103/18500 [13:39:57<67:34:23, 15.80s/it]

training loss: 1.1634572744369507


training:  17%|█▋        | 3104/18500 [13:40:12<67:17:06, 15.73s/it]

training loss: 0.31343892216682434


training:  17%|█▋        | 3105/18500 [13:40:28<67:06:51, 15.69s/it]

training loss: 0.6228829622268677


training:  17%|█▋        | 3106/18500 [13:40:43<66:57:41, 15.66s/it]

training loss: 0.47657251358032227


training:  17%|█▋        | 3107/18500 [13:40:59<66:52:27, 15.64s/it]

training loss: 0.8347611427307129


training:  17%|█▋        | 3108/18500 [13:41:15<66:47:23, 15.62s/it]

training loss: 0.9568203687667847


training:  17%|█▋        | 3109/18500 [13:41:30<66:44:51, 15.61s/it]

training loss: 0.5890476107597351


training:  17%|█▋        | 3110/18500 [13:41:46<66:41:59, 15.60s/it]

training loss: 0.7132965922355652


training:  17%|█▋        | 3111/18500 [13:42:01<66:41:25, 15.60s/it]

training loss: 0.9599430561065674


training:  17%|█▋        | 3112/18500 [13:42:17<66:39:50, 15.60s/it]

training loss: 0.7873430848121643


training:  17%|█▋        | 3113/18500 [13:42:33<66:39:30, 15.60s/it]

training loss: 0.9492496252059937


training:  17%|█▋        | 3114/18500 [13:42:48<66:39:02, 15.59s/it]

training loss: 0.9060232639312744


training:  17%|█▋        | 3115/18500 [13:43:04<66:38:27, 15.59s/it]

training loss: 0.8488191366195679


training:  17%|█▋        | 3116/18500 [13:43:19<66:38:33, 15.59s/it]

training loss: 0.9523082971572876


training:  17%|█▋        | 3117/18500 [13:43:35<66:36:00, 15.59s/it]

training loss: 0.6161259412765503


training:  17%|█▋        | 3118/18500 [13:43:50<66:34:24, 15.58s/it]

training loss: 0.9094802141189575


training:  17%|█▋        | 3119/18500 [13:44:06<66:32:51, 15.58s/it]

training loss: 0.9127994775772095


training:  17%|█▋        | 3120/18500 [13:44:22<66:34:07, 15.58s/it]

training loss: 0.8719263076782227


training:  17%|█▋        | 3121/18500 [13:44:37<66:32:25, 15.58s/it]

training loss: 0.7920053005218506


training:  17%|█▋        | 3122/18500 [13:44:53<66:33:03, 15.58s/it]

training loss: 0.957080066204071


training:  17%|█▋        | 3123/18500 [13:45:08<66:31:37, 15.58s/it]

training loss: 1.040730595588684


training:  17%|█▋        | 3124/18500 [13:45:24<66:33:03, 15.58s/it]

training loss: 0.32020294666290283


training:  17%|█▋        | 3125/18500 [13:45:39<66:34:33, 15.59s/it]

training loss: 0.3741525411605835


training:  17%|█▋        | 3126/18500 [13:45:55<66:40:33, 15.61s/it]

training loss: 0.4719298183917999


training:  17%|█▋        | 3127/18500 [13:46:11<66:41:01, 15.62s/it]

training loss: 1.048458456993103


training:  17%|█▋        | 3128/18500 [13:46:26<66:45:33, 15.63s/it]

training loss: 1.2414441108703613


training:  17%|█▋        | 3129/18500 [13:46:42<66:46:59, 15.64s/it]

training loss: 0.2971159815788269


training:  17%|█▋        | 3130/18500 [13:46:58<66:51:11, 15.66s/it]

training loss: 0.7523560523986816


training:  17%|█▋        | 3131/18500 [13:47:13<66:51:22, 15.66s/it]

training loss: 0.7621878385543823


training:  17%|█▋        | 3132/18500 [13:47:29<66:53:18, 15.67s/it]

training loss: 0.7007153034210205


training:  17%|█▋        | 3133/18500 [13:47:45<66:51:25, 15.66s/it]

training loss: 0.853127658367157


training:  17%|█▋        | 3134/18500 [13:48:00<66:48:54, 15.65s/it]

training loss: 0.4574039578437805


training:  17%|█▋        | 3135/18500 [13:48:16<66:45:59, 15.64s/it]

training loss: 0.6621401906013489


training:  17%|█▋        | 3136/18500 [13:48:32<66:44:57, 15.64s/it]

training loss: 0.6321531534194946


training:  17%|█▋        | 3137/18500 [13:48:47<66:40:58, 15.63s/it]

training loss: 0.8610795736312866


training:  17%|█▋        | 3138/18500 [13:49:03<66:37:43, 15.61s/it]

training loss: 0.7605153918266296


training:  17%|█▋        | 3139/18500 [13:49:18<66:35:52, 15.61s/it]

training loss: 0.8589816689491272


training:  17%|█▋        | 3140/18500 [13:49:34<66:34:06, 15.60s/it]

training loss: 1.0353220701217651


training:  17%|█▋        | 3141/18500 [13:49:50<66:33:04, 15.60s/it]

training loss: 1.1102709770202637


training:  17%|█▋        | 3142/18500 [13:50:05<66:31:31, 15.59s/it]

training loss: 1.3187894821166992


training:  17%|█▋        | 3143/18500 [13:50:21<66:31:39, 15.60s/it]

training loss: 0.7065158486366272


training:  17%|█▋        | 3144/18500 [13:50:36<66:29:50, 15.59s/it]

training loss: 0.6856731176376343


training:  17%|█▋        | 3145/18500 [13:50:52<66:29:47, 15.59s/it]

training loss: 0.43740907311439514


training:  17%|█▋        | 3146/18500 [13:51:08<66:28:32, 15.59s/it]

training loss: 0.763766884803772


training:  17%|█▋        | 3147/18500 [13:51:23<66:28:59, 15.59s/it]

training loss: 0.7542224526405334


training:  17%|█▋        | 3148/18500 [13:51:39<66:27:30, 15.58s/it]

training loss: 0.6638330817222595


training:  17%|█▋        | 3149/18500 [13:51:54<66:27:54, 15.59s/it]

training loss: 0.6886264681816101


training:  17%|█▋        | 3150/18500 [13:52:10<66:27:22, 15.59s/it]

training loss: 0.8096662163734436


training:  17%|█▋        | 3151/18500 [13:52:26<66:28:30, 15.59s/it]

training loss: 0.9034402966499329


training:  17%|█▋        | 3152/18500 [13:52:41<66:27:11, 15.59s/it]

training loss: 0.6927167177200317


training:  17%|█▋        | 3153/18500 [13:52:57<66:28:10, 15.59s/it]

training loss: 0.5619558095932007


training:  17%|█▋        | 3154/18500 [13:53:12<66:26:31, 15.59s/it]

training loss: 0.8644720911979675


training:  17%|█▋        | 3155/18500 [13:53:28<66:27:32, 15.59s/it]

training loss: 0.7839687466621399


training:  17%|█▋        | 3156/18500 [13:53:43<66:26:12, 15.59s/it]

training loss: 0.8631421327590942


training:  17%|█▋        | 3157/18500 [13:53:59<66:25:56, 15.59s/it]

training loss: 0.8073203563690186


training:  17%|█▋        | 3158/18500 [13:54:15<66:24:54, 15.58s/it]

training loss: 0.4434790313243866


training:  17%|█▋        | 3159/18500 [13:54:30<66:24:42, 15.58s/it]

training loss: 0.8468549847602844


training:  17%|█▋        | 3160/18500 [13:54:46<66:24:25, 15.58s/it]

training loss: 0.9619813561439514


training:  17%|█▋        | 3161/18500 [13:55:01<66:25:17, 15.59s/it]

training loss: 0.6429105997085571


training:  17%|█▋        | 3162/18500 [13:55:17<66:24:20, 15.59s/it]

training loss: 0.5726571083068848


training:  17%|█▋        | 3163/18500 [13:55:33<66:24:40, 15.59s/it]

training loss: 0.8544952869415283


training:  17%|█▋        | 3164/18500 [13:55:48<66:23:42, 15.59s/it]

training loss: 0.482217937707901


training:  17%|█▋        | 3165/18500 [13:56:04<66:23:25, 15.59s/it]

training loss: 0.6074851155281067


training:  17%|█▋        | 3166/18500 [13:56:19<66:23:07, 15.59s/it]

training loss: 0.5542060732841492


training:  17%|█▋        | 3167/18500 [13:56:35<66:22:07, 15.58s/it]

training loss: 0.6094141602516174


training:  17%|█▋        | 3168/18500 [13:56:50<66:22:13, 15.58s/it]

training loss: 0.5567660927772522


training:  17%|█▋        | 3169/18500 [13:57:06<66:21:45, 15.58s/it]

training loss: 0.8428260087966919


training:  17%|█▋        | 3170/18500 [13:57:22<66:22:25, 15.59s/it]

training loss: 0.6451164484024048


training:  17%|█▋        | 3171/18500 [13:57:37<66:21:46, 15.59s/it]

training loss: 0.8643479347229004


training:  17%|█▋        | 3172/18500 [13:57:53<66:21:21, 15.58s/it]

training loss: 0.4565586745738983


training:  17%|█▋        | 3173/18500 [13:58:08<66:20:39, 15.58s/it]

training loss: 0.7470378875732422


training:  17%|█▋        | 3174/18500 [13:58:24<66:21:53, 15.59s/it]

training loss: 1.0002001523971558


training:  17%|█▋        | 3175/18500 [13:58:40<66:20:10, 15.58s/it]

training loss: 0.6366748809814453


training:  17%|█▋        | 3176/18500 [13:58:55<66:20:56, 15.59s/it]

training loss: 1.039345383644104


training:  17%|█▋        | 3177/18500 [13:59:11<66:19:38, 15.58s/it]

training loss: 0.8773832321166992


training:  17%|█▋        | 3178/18500 [13:59:26<66:19:24, 15.58s/it]

training loss: 0.8432697057723999


training:  17%|█▋        | 3179/18500 [13:59:42<66:18:32, 15.58s/it]

training loss: 0.4591178894042969


training:  17%|█▋        | 3180/18500 [13:59:58<66:20:43, 15.59s/it]

training loss: 1.1369786262512207


training:  17%|█▋        | 3181/18500 [14:00:13<66:20:20, 15.59s/it]

training loss: 0.8630191087722778


training:  17%|█▋        | 3182/18500 [14:00:29<66:20:37, 15.59s/it]

training loss: 0.5971174836158752


training:  17%|█▋        | 3183/18500 [14:00:44<66:19:04, 15.59s/it]

training loss: 0.44802382588386536


training:  17%|█▋        | 3184/18500 [14:01:00<66:19:42, 15.59s/it]

training loss: 0.5592865943908691


training:  17%|█▋        | 3185/18500 [14:01:15<66:18:42, 15.59s/it]

training loss: 0.5282403826713562


training:  17%|█▋        | 3186/18500 [14:01:31<66:19:04, 15.59s/it]

training loss: 1.0337657928466797


training:  17%|█▋        | 3187/18500 [14:01:47<66:18:21, 15.59s/it]

training loss: 0.6627902984619141


training:  17%|█▋        | 3188/18500 [14:02:02<66:18:26, 15.59s/it]

training loss: 0.5766490697860718


training:  17%|█▋        | 3189/18500 [14:02:18<66:17:39, 15.59s/it]

training loss: 0.8597652912139893


training:  17%|█▋        | 3190/18500 [14:02:33<66:17:37, 15.59s/it]

training loss: 0.9497736096382141


training:  17%|█▋        | 3191/18500 [14:02:49<66:17:10, 15.59s/it]

training loss: 1.1022058725357056


training:  17%|█▋        | 3192/18500 [14:03:05<66:16:59, 15.59s/it]

training loss: 0.7468163967132568


training:  17%|█▋        | 3193/18500 [14:03:20<66:17:34, 15.59s/it]

training loss: 0.7257967591285706


training:  17%|█▋        | 3194/18500 [14:03:36<66:16:39, 15.59s/it]

training loss: 0.6857687830924988


training:  17%|█▋        | 3195/18500 [14:03:51<66:16:15, 15.59s/it]

training loss: 0.8185070157051086


training:  17%|█▋        | 3196/18500 [14:04:07<66:15:26, 15.59s/it]

training loss: 0.42847344279289246


training:  17%|█▋        | 3197/18500 [14:04:23<66:16:01, 15.59s/it]

training loss: 1.0207234621047974


training:  17%|█▋        | 3198/18500 [14:04:38<66:14:01, 15.58s/it]

training loss: 1.2144718170166016


training:  17%|█▋        | 3199/18500 [14:04:54<66:13:42, 15.58s/it]

training loss: 0.7112041711807251


training:  17%|█▋        | 3200/18500 [14:05:09<66:14:11, 15.59s/it]

training loss: 0.9021633863449097
training loss: 0.6220299005508423


training:  17%|█▋        | 3201/18500 [14:05:26<67:59:40, 16.00s/it]

validation loss: 1.5070748329162598


training:  17%|█▋        | 3202/18500 [14:05:42<67:28:28, 15.88s/it]

training loss: 0.9884517788887024


training:  17%|█▋        | 3203/18500 [14:05:57<67:06:05, 15.79s/it]

training loss: 1.146206021308899


training:  17%|█▋        | 3204/18500 [14:06:13<66:50:13, 15.73s/it]

training loss: 0.49734869599342346


training:  17%|█▋        | 3205/18500 [14:06:29<66:40:49, 15.69s/it]

training loss: 1.0059783458709717


training:  17%|█▋        | 3206/18500 [14:06:44<66:32:34, 15.66s/it]

training loss: 0.5903940200805664


training:  17%|█▋        | 3207/18500 [14:07:00<66:27:22, 15.64s/it]

training loss: 1.0165458917617798


training:  17%|█▋        | 3208/18500 [14:07:15<66:23:04, 15.63s/it]

training loss: 0.7918421030044556


training:  17%|█▋        | 3209/18500 [14:07:31<66:19:47, 15.62s/it]

training loss: 0.8113518953323364


training:  17%|█▋        | 3210/18500 [14:07:47<66:17:38, 15.61s/it]

training loss: 0.7526833415031433


training:  17%|█▋        | 3211/18500 [14:08:02<66:15:58, 15.60s/it]

training loss: 0.4620668292045593


training:  17%|█▋        | 3212/18500 [14:08:18<66:15:19, 15.60s/it]

training loss: 0.9794397354125977


training:  17%|█▋        | 3213/18500 [14:08:33<66:14:42, 15.60s/it]

training loss: 0.30352234840393066


training:  17%|█▋        | 3214/18500 [14:08:49<66:12:58, 15.59s/it]

training loss: 0.9238689541816711


training:  17%|█▋        | 3215/18500 [14:09:05<66:11:13, 15.59s/it]

training loss: 0.7543149590492249


training:  17%|█▋        | 3216/18500 [14:09:20<66:11:28, 15.59s/it]

training loss: 1.0423489809036255


training:  17%|█▋        | 3217/18500 [14:09:36<66:12:23, 15.60s/it]

training loss: 0.6024132966995239


training:  17%|█▋        | 3218/18500 [14:09:51<66:10:59, 15.59s/it]

training loss: 0.8932182192802429


training:  17%|█▋        | 3219/18500 [14:10:07<66:09:33, 15.59s/it]

training loss: 1.0122078657150269


training:  17%|█▋        | 3220/18500 [14:10:22<66:09:34, 15.59s/it]

training loss: 0.6477612853050232


training:  17%|█▋        | 3221/18500 [14:10:38<66:07:40, 15.58s/it]

training loss: 0.814914345741272


training:  17%|█▋        | 3222/18500 [14:10:54<66:06:37, 15.58s/it]

training loss: 0.5301604866981506


training:  17%|█▋        | 3223/18500 [14:11:09<66:06:44, 15.58s/it]

training loss: 0.9846519827842712


training:  17%|█▋        | 3224/18500 [14:11:25<66:07:37, 15.58s/it]

training loss: 1.1440941095352173


training:  17%|█▋        | 3225/18500 [14:11:40<66:06:15, 15.58s/it]

training loss: 0.47084158658981323


training:  17%|█▋        | 3226/18500 [14:11:56<66:06:11, 15.58s/it]

training loss: 0.7503229379653931


training:  17%|█▋        | 3227/18500 [14:12:11<66:04:23, 15.57s/it]

training loss: 0.5604742169380188


training:  17%|█▋        | 3228/18500 [14:12:27<66:05:17, 15.58s/it]

training loss: 1.0005302429199219


training:  17%|█▋        | 3229/18500 [14:12:43<66:05:56, 15.58s/it]

training loss: 0.6841651797294617


training:  17%|█▋        | 3230/18500 [14:12:58<66:06:04, 15.58s/it]

training loss: 0.47056135535240173


training:  17%|█▋        | 3231/18500 [14:13:14<66:06:03, 15.58s/it]

training loss: 0.8849992752075195


training:  17%|█▋        | 3232/18500 [14:13:29<66:07:49, 15.59s/it]

training loss: 0.8280951380729675


training:  17%|█▋        | 3233/18500 [14:13:45<66:07:25, 15.59s/it]

training loss: 0.45202773809432983


training:  17%|█▋        | 3234/18500 [14:14:01<66:06:53, 15.59s/it]

training loss: 0.8114319443702698


training:  17%|█▋        | 3235/18500 [14:14:16<66:06:14, 15.59s/it]

training loss: 0.6129071116447449


training:  17%|█▋        | 3236/18500 [14:14:32<66:06:49, 15.59s/it]

training loss: 0.2912333011627197


training:  17%|█▋        | 3237/18500 [14:14:47<66:06:54, 15.59s/it]

training loss: 0.550504744052887


training:  18%|█▊        | 3238/18500 [14:15:03<66:06:23, 15.59s/it]

training loss: 0.745476484298706


training:  18%|█▊        | 3239/18500 [14:15:19<66:05:54, 15.59s/it]

training loss: 0.8906263709068298


training:  18%|█▊        | 3240/18500 [14:15:34<66:06:09, 15.59s/it]

training loss: 0.5709418058395386


training:  18%|█▊        | 3241/18500 [14:15:50<66:05:17, 15.59s/it]

training loss: 1.0901719331741333


training:  18%|█▊        | 3242/18500 [14:16:05<66:04:35, 15.59s/it]

training loss: 1.1490062475204468


training:  18%|█▊        | 3243/18500 [14:16:21<66:06:24, 15.60s/it]

training loss: 0.6206961870193481


training:  18%|█▊        | 3244/18500 [14:16:37<66:04:54, 15.59s/it]

training loss: 0.5877489447593689


training:  18%|█▊        | 3245/18500 [14:16:52<66:03:25, 15.59s/it]

training loss: 0.8235008120536804


training:  18%|█▊        | 3246/18500 [14:17:08<66:02:11, 15.58s/it]

training loss: 1.0740623474121094


training:  18%|█▊        | 3247/18500 [14:17:23<66:03:51, 15.59s/it]

training loss: 0.5434597134590149


training:  18%|█▊        | 3248/18500 [14:17:39<66:04:28, 15.60s/it]

training loss: 0.8108561038970947


training:  18%|█▊        | 3249/18500 [14:17:55<66:08:24, 15.61s/it]

training loss: 1.1866226196289062


training:  18%|█▊        | 3250/18500 [14:18:10<66:10:27, 15.62s/it]

training loss: 0.8174453973770142


training:  18%|█▊        | 3251/18500 [14:18:26<66:12:59, 15.63s/it]

training loss: 0.8850712776184082


training:  18%|█▊        | 3252/18500 [14:18:42<66:12:03, 15.63s/it]

training loss: 0.7628402709960938


training:  18%|█▊        | 3253/18500 [14:18:57<66:11:32, 15.63s/it]

training loss: 0.7477083206176758


training:  18%|█▊        | 3254/18500 [14:19:13<66:11:21, 15.63s/it]

training loss: 0.6520015001296997


training:  18%|█▊        | 3255/18500 [14:19:28<66:13:21, 15.64s/it]

training loss: 0.4241645932197571


training:  18%|█▊        | 3256/18500 [14:19:44<66:12:56, 15.64s/it]

training loss: 0.5173138380050659


training:  18%|█▊        | 3257/18500 [14:20:00<66:11:34, 15.63s/it]

training loss: 0.536968469619751


training:  18%|█▊        | 3258/18500 [14:20:15<66:09:50, 15.63s/it]

training loss: 0.8732439279556274


training:  18%|█▊        | 3259/18500 [14:20:31<66:08:38, 15.62s/it]

training loss: 0.6455562710762024


training:  18%|█▊        | 3260/18500 [14:20:47<66:06:28, 15.62s/it]

training loss: 0.7335515022277832


training:  18%|█▊        | 3261/18500 [14:21:02<66:03:52, 15.61s/it]

training loss: 0.3922390937805176


training:  18%|█▊        | 3262/18500 [14:21:18<66:02:08, 15.60s/it]

training loss: 0.4267318546772003


training:  18%|█▊        | 3263/18500 [14:21:33<66:01:10, 15.60s/it]

training loss: 0.5794003009796143


training:  18%|█▊        | 3264/18500 [14:21:49<65:59:54, 15.59s/it]

training loss: 0.8057655096054077


training:  18%|█▊        | 3265/18500 [14:22:04<65:58:47, 15.59s/it]

training loss: 0.5774760842323303


training:  18%|█▊        | 3266/18500 [14:22:20<65:59:32, 15.59s/it]

training loss: 0.7347158789634705


training:  18%|█▊        | 3267/18500 [14:22:36<66:07:02, 15.63s/it]

training loss: 1.061389684677124


training:  18%|█▊        | 3268/18500 [14:22:51<66:03:44, 15.61s/it]

training loss: 1.1116046905517578


training:  18%|█▊        | 3269/18500 [14:23:07<66:01:11, 15.60s/it]

training loss: 0.5480075478553772


training:  18%|█▊        | 3270/18500 [14:23:23<66:00:05, 15.60s/it]

training loss: 0.20682621002197266


training:  18%|█▊        | 3271/18500 [14:23:38<65:58:02, 15.59s/it]

training loss: 0.6422055959701538


training:  18%|█▊        | 3272/18500 [14:23:54<65:57:21, 15.59s/it]

training loss: 0.943192183971405


training:  18%|█▊        | 3273/18500 [14:24:09<65:56:34, 15.59s/it]

training loss: 0.7630024552345276


training:  18%|█▊        | 3274/18500 [14:24:25<65:57:15, 15.59s/it]

training loss: 0.5913692712783813


training:  18%|█▊        | 3275/18500 [14:24:40<65:56:05, 15.59s/it]

training loss: 0.6810781359672546


training:  18%|█▊        | 3276/18500 [14:24:56<65:55:37, 15.59s/it]

training loss: 0.6381039619445801


training:  18%|█▊        | 3277/18500 [14:25:12<65:55:10, 15.59s/it]

training loss: 0.8766608834266663


training:  18%|█▊        | 3278/18500 [14:25:27<65:56:11, 15.59s/it]

training loss: 0.853351891040802


training:  18%|█▊        | 3279/18500 [14:25:43<65:55:26, 15.59s/it]

training loss: 0.6749693751335144


training:  18%|█▊        | 3280/18500 [14:25:58<65:54:50, 15.59s/it]

training loss: 0.45953935384750366


training:  18%|█▊        | 3281/18500 [14:26:14<65:52:44, 15.58s/it]

training loss: 0.5972771644592285


training:  18%|█▊        | 3282/18500 [14:26:30<65:52:21, 15.58s/it]

training loss: 0.5592988133430481


training:  18%|█▊        | 3283/18500 [14:26:45<65:50:58, 15.58s/it]

training loss: 0.6445401906967163


training:  18%|█▊        | 3284/18500 [14:27:01<65:50:30, 15.58s/it]

training loss: 0.5971299409866333


training:  18%|█▊        | 3285/18500 [14:27:16<65:49:16, 15.57s/it]

training loss: 0.4901714324951172


training:  18%|█▊        | 3286/18500 [14:27:32<65:49:22, 15.58s/it]

training loss: 0.8356989026069641


training:  18%|█▊        | 3287/18500 [14:27:47<65:48:53, 15.57s/it]

training loss: 0.7011978626251221


training:  18%|█▊        | 3288/18500 [14:28:03<65:48:17, 15.57s/it]

training loss: 0.7947319746017456


training:  18%|█▊        | 3289/18500 [14:28:19<65:48:06, 15.57s/it]

training loss: 0.3261805474758148


training:  18%|█▊        | 3290/18500 [14:28:34<65:48:07, 15.57s/it]

training loss: 0.5811600089073181


training:  18%|█▊        | 3291/18500 [14:28:50<65:46:41, 15.57s/it]

training loss: 0.5022612810134888


training:  18%|█▊        | 3292/18500 [14:29:05<65:46:42, 15.57s/it]

training loss: 0.6717320680618286


training:  18%|█▊        | 3293/18500 [14:29:21<65:48:00, 15.58s/it]

training loss: 0.6337162852287292


training:  18%|█▊        | 3294/18500 [14:29:36<65:47:25, 15.58s/it]

training loss: 0.4750690460205078


training:  18%|█▊        | 3295/18500 [14:29:52<65:46:19, 15.57s/it]

training loss: 1.040369987487793


training:  18%|█▊        | 3296/18500 [14:30:08<65:45:42, 15.57s/it]

training loss: 1.0622001886367798


training:  18%|█▊        | 3297/18500 [14:30:23<65:46:28, 15.58s/it]

training loss: 0.7162745594978333


training:  18%|█▊        | 3298/18500 [14:30:39<65:45:28, 15.57s/it]

training loss: 0.44125616550445557


training:  18%|█▊        | 3299/18500 [14:30:54<65:45:52, 15.57s/it]

training loss: 0.8387935161590576


training:  18%|█▊        | 3300/18500 [14:31:10<65:45:52, 15.58s/it]

training loss: 1.1211944818496704
training loss: 0.8102508187294006


training:  18%|█▊        | 3301/18500 [14:31:27<67:31:26, 15.99s/it]

validation loss: 1.5672430992126465


training:  18%|█▊        | 3302/18500 [14:31:42<67:00:02, 15.87s/it]

training loss: 0.7996786236763


training:  18%|█▊        | 3303/18500 [14:31:58<66:37:11, 15.78s/it]

training loss: 0.9653801321983337


training:  18%|█▊        | 3304/18500 [14:32:14<66:20:12, 15.72s/it]

training loss: 0.5151744484901428


training:  18%|█▊        | 3305/18500 [14:32:29<66:10:01, 15.68s/it]

training loss: 0.6916492581367493


training:  18%|█▊        | 3306/18500 [14:32:45<66:02:00, 15.65s/it]

training loss: 1.0312749147415161


training:  18%|█▊        | 3307/18500 [14:33:00<65:55:42, 15.62s/it]

training loss: 0.761719822883606


training:  18%|█▊        | 3308/18500 [14:33:16<65:50:50, 15.60s/it]

training loss: 0.8787156939506531


training:  18%|█▊        | 3309/18500 [14:33:31<65:48:38, 15.60s/it]

training loss: 0.5178496837615967


training:  18%|█▊        | 3310/18500 [14:33:47<65:47:37, 15.59s/it]

training loss: 0.5360555648803711


training:  18%|█▊        | 3311/18500 [14:34:03<65:45:29, 15.59s/it]

training loss: 0.5175950527191162


training:  18%|█▊        | 3312/18500 [14:34:18<65:44:41, 15.58s/it]

training loss: 1.0617903470993042


training:  18%|█▊        | 3313/18500 [14:34:34<65:43:59, 15.58s/it]

training loss: 0.5763373374938965


training:  18%|█▊        | 3314/18500 [14:34:49<65:45:19, 15.59s/it]

training loss: 0.469199538230896


training:  18%|█▊        | 3315/18500 [14:35:05<65:45:54, 15.59s/it]

training loss: 0.7336445450782776


training:  18%|█▊        | 3316/18500 [14:35:21<65:46:26, 15.59s/it]

training loss: 0.8033793568611145


training:  18%|█▊        | 3317/18500 [14:35:36<65:46:27, 15.60s/it]

training loss: 0.5647926330566406


training:  18%|█▊        | 3318/18500 [14:35:52<65:45:07, 15.59s/it]

training loss: 0.6093670725822449


training:  18%|█▊        | 3319/18500 [14:36:07<65:44:33, 15.59s/it]

training loss: 0.679198145866394


training:  18%|█▊        | 3320/18500 [14:36:23<65:44:07, 15.59s/it]

training loss: 1.0365923643112183


training:  18%|█▊        | 3321/18500 [14:36:38<65:42:22, 15.58s/it]

training loss: 0.7083871364593506


training:  18%|█▊        | 3322/18500 [14:36:54<65:41:47, 15.58s/it]

training loss: 0.9045826196670532


training:  18%|█▊        | 3323/18500 [14:37:10<65:41:33, 15.58s/it]

training loss: 0.914426326751709


training:  18%|█▊        | 3324/18500 [14:37:25<65:41:35, 15.58s/it]

training loss: 0.8341387510299683


training:  18%|█▊        | 3325/18500 [14:37:41<65:40:44, 15.58s/it]

training loss: 0.6958128809928894


training:  18%|█▊        | 3326/18500 [14:37:56<65:39:59, 15.58s/it]

training loss: 0.5648576021194458


training:  18%|█▊        | 3327/18500 [14:38:12<65:39:40, 15.58s/it]

training loss: 0.8245829343795776


training:  18%|█▊        | 3328/18500 [14:38:28<65:39:17, 15.58s/it]

training loss: 0.5652347803115845


training:  18%|█▊        | 3329/18500 [14:38:43<65:39:10, 15.58s/it]

training loss: 0.6985138058662415


training:  18%|█▊        | 3330/18500 [14:38:59<65:39:10, 15.58s/it]

training loss: 0.9491134881973267


training:  18%|█▊        | 3331/18500 [14:39:14<65:39:09, 15.58s/it]

training loss: 0.7007126808166504


training:  18%|█▊        | 3332/18500 [14:39:30<65:39:08, 15.58s/it]

training loss: 0.5939949750900269


training:  18%|█▊        | 3333/18500 [14:39:45<65:38:28, 15.58s/it]

training loss: 0.6821562051773071


training:  18%|█▊        | 3334/18500 [14:40:01<65:40:11, 15.59s/it]

training loss: 0.8181567192077637


training:  18%|█▊        | 3335/18500 [14:40:17<65:39:04, 15.58s/it]

training loss: 0.5892060995101929


training:  18%|█▊        | 3336/18500 [14:40:32<65:40:11, 15.59s/it]

training loss: 0.7241988778114319


training:  18%|█▊        | 3337/18500 [14:40:48<65:40:20, 15.59s/it]

training loss: 1.1288785934448242


training:  18%|█▊        | 3338/18500 [14:41:03<65:41:11, 15.60s/it]

training loss: 0.5588631629943848


training:  18%|█▊        | 3339/18500 [14:41:19<65:42:29, 15.60s/it]

training loss: 1.1160081624984741


training:  18%|█▊        | 3340/18500 [14:41:35<65:42:10, 15.60s/it]

training loss: 0.9855954647064209


training:  18%|█▊        | 3341/18500 [14:41:50<65:42:19, 15.60s/it]

training loss: 0.3328401744365692


training:  18%|█▊        | 3342/18500 [14:42:06<65:42:56, 15.61s/it]

training loss: 0.8933586478233337


training:  18%|█▊        | 3343/18500 [14:42:21<65:42:08, 15.61s/it]

training loss: 0.7957346439361572


training:  18%|█▊        | 3344/18500 [14:42:37<65:40:13, 15.60s/it]

training loss: 0.6051583886146545


training:  18%|█▊        | 3345/18500 [14:42:53<65:38:55, 15.59s/it]

training loss: 0.4230644702911377


training:  18%|█▊        | 3346/18500 [14:43:08<65:39:37, 15.60s/it]

training loss: 0.8404719233512878


training:  18%|█▊        | 3347/18500 [14:43:24<65:39:52, 15.60s/it]

training loss: 0.9201571941375732


training:  18%|█▊        | 3348/18500 [14:43:39<65:39:15, 15.60s/it]

training loss: 0.8760746121406555


training:  18%|█▊        | 3349/18500 [14:43:55<65:39:24, 15.60s/it]

training loss: 0.5100175738334656


training:  18%|█▊        | 3350/18500 [14:44:11<65:37:41, 15.59s/it]

training loss: 1.0602524280548096


training:  18%|█▊        | 3351/18500 [14:44:26<65:37:23, 15.59s/it]

training loss: 0.726884126663208


training:  18%|█▊        | 3352/18500 [14:44:42<65:36:55, 15.59s/it]

training loss: 0.656460702419281


training:  18%|█▊        | 3353/18500 [14:44:57<65:37:05, 15.60s/it]

training loss: 1.0094386339187622


training:  18%|█▊        | 3354/18500 [14:45:13<65:36:06, 15.59s/it]

training loss: 0.7389112710952759


training:  18%|█▊        | 3355/18500 [14:45:29<65:35:50, 15.59s/it]

training loss: 0.8430637717247009


training:  18%|█▊        | 3356/18500 [14:45:44<65:38:11, 15.60s/it]

training loss: 0.4416361153125763


training:  18%|█▊        | 3357/18500 [14:46:00<65:44:02, 15.63s/it]

training loss: 0.7256930470466614


training:  18%|█▊        | 3358/18500 [14:46:16<65:48:46, 15.65s/it]

training loss: 0.5768478512763977


training:  18%|█▊        | 3359/18500 [14:46:31<65:50:47, 15.66s/it]

training loss: 0.6071624159812927


training:  18%|█▊        | 3360/18500 [14:46:47<65:53:32, 15.67s/it]

training loss: 0.36631205677986145


training:  18%|█▊        | 3361/18500 [14:47:03<65:56:25, 15.68s/it]

training loss: 0.5701848268508911


training:  18%|█▊        | 3362/18500 [14:47:18<65:56:54, 15.68s/it]

training loss: 0.6180850863456726


training:  18%|█▊        | 3363/18500 [14:47:34<65:56:29, 15.68s/it]

training loss: 0.701222836971283


training:  18%|█▊        | 3364/18500 [14:47:50<65:55:11, 15.68s/it]

training loss: 0.7255294919013977


training:  18%|█▊        | 3365/18500 [14:48:05<65:54:57, 15.68s/it]

training loss: 0.6182451844215393


training:  18%|█▊        | 3366/18500 [14:48:21<65:54:56, 15.68s/it]

training loss: 0.9924664497375488


training:  18%|█▊        | 3367/18500 [14:48:37<65:53:04, 15.67s/it]

training loss: 0.6195366978645325


training:  18%|█▊        | 3368/18500 [14:48:52<65:46:11, 15.65s/it]

training loss: 0.9328364133834839


training:  18%|█▊        | 3369/18500 [14:49:08<65:42:14, 15.63s/it]

training loss: 0.8198044300079346


training:  18%|█▊        | 3370/18500 [14:49:24<65:39:24, 15.62s/it]

training loss: 1.0010523796081543


training:  18%|█▊        | 3371/18500 [14:49:39<65:36:24, 15.61s/it]

training loss: 0.7033199667930603


training:  18%|█▊        | 3372/18500 [14:49:55<65:34:48, 15.61s/it]

training loss: 0.392658531665802


training:  18%|█▊        | 3373/18500 [14:50:10<65:32:53, 15.60s/it]

training loss: 0.6216697692871094


training:  18%|█▊        | 3374/18500 [14:50:26<65:32:28, 15.60s/it]

training loss: 0.8804017901420593


training:  18%|█▊        | 3375/18500 [14:50:41<65:31:31, 15.60s/it]

training loss: 0.5195554494857788


training:  18%|█▊        | 3376/18500 [14:50:57<65:31:36, 15.60s/it]

training loss: 0.4903641641139984


training:  18%|█▊        | 3377/18500 [14:51:13<65:31:07, 15.60s/it]

training loss: 0.6973240375518799


training:  18%|█▊        | 3378/18500 [14:51:28<65:30:52, 15.60s/it]

training loss: 0.8170633316040039


training:  18%|█▊        | 3379/18500 [14:51:44<65:31:20, 15.60s/it]

training loss: 0.7385373711585999


training:  18%|█▊        | 3380/18500 [14:51:59<65:30:22, 15.60s/it]

training loss: 0.8998873233795166


training:  18%|█▊        | 3381/18500 [14:52:15<65:29:56, 15.60s/it]

training loss: 0.9736067056655884


training:  18%|█▊        | 3382/18500 [14:52:31<65:29:25, 15.59s/it]

training loss: 0.3034302592277527


training:  18%|█▊        | 3383/18500 [14:52:46<65:28:22, 15.59s/it]

training loss: 0.5345381498336792


training:  18%|█▊        | 3384/18500 [14:53:02<65:28:26, 15.59s/it]

training loss: 0.9244489669799805


training:  18%|█▊        | 3385/18500 [14:53:17<65:27:54, 15.59s/it]

training loss: 1.1030420064926147


training:  18%|█▊        | 3386/18500 [14:53:33<65:28:00, 15.59s/it]

training loss: 0.8103675842285156


training:  18%|█▊        | 3387/18500 [14:53:49<65:26:44, 15.59s/it]

training loss: 1.0817488431930542


training:  18%|█▊        | 3388/18500 [14:54:04<65:27:12, 15.59s/it]

training loss: 0.9837319254875183


training:  18%|█▊        | 3389/18500 [14:54:20<65:26:52, 15.59s/it]

training loss: 0.5103512406349182


training:  18%|█▊        | 3390/18500 [14:54:35<65:25:46, 15.59s/it]

training loss: 1.0384807586669922


training:  18%|█▊        | 3391/18500 [14:54:51<65:25:02, 15.59s/it]

training loss: 1.1906061172485352


training:  18%|█▊        | 3392/18500 [14:55:07<65:24:53, 15.59s/it]

training loss: 0.48898187279701233


training:  18%|█▊        | 3393/18500 [14:55:22<65:25:03, 15.59s/it]

training loss: 0.6131043434143066


training:  18%|█▊        | 3394/18500 [14:55:38<65:23:50, 15.59s/it]

training loss: 0.5479305386543274


training:  18%|█▊        | 3395/18500 [14:55:53<65:22:58, 15.58s/it]

training loss: 0.83572918176651


training:  18%|█▊        | 3396/18500 [14:56:09<65:23:48, 15.59s/it]

training loss: 0.6902432441711426


training:  18%|█▊        | 3397/18500 [14:56:24<65:24:25, 15.59s/it]

training loss: 0.2748177945613861


training:  18%|█▊        | 3398/18500 [14:56:40<65:23:48, 15.59s/it]

training loss: 1.0210325717926025


training:  18%|█▊        | 3399/18500 [14:56:56<65:24:23, 15.59s/it]

training loss: 0.5502626895904541


training:  18%|█▊        | 3400/18500 [14:57:11<65:23:00, 15.59s/it]

training loss: 0.7027280330657959
training loss: 0.5976521372795105


training:  18%|█▊        | 3401/18500 [14:57:28<67:07:16, 16.00s/it]

validation loss: 1.499350905418396


training:  18%|█▊        | 3402/18500 [14:57:44<66:37:29, 15.89s/it]

training loss: 0.5077615976333618


training:  18%|█▊        | 3403/18500 [14:57:59<66:15:21, 15.80s/it]

training loss: 1.1635562181472778


training:  18%|█▊        | 3404/18500 [14:58:15<65:58:55, 15.74s/it]

training loss: 0.989230751991272


training:  18%|█▊        | 3405/18500 [14:58:31<65:47:49, 15.69s/it]

training loss: 1.2287203073501587


training:  18%|█▊        | 3406/18500 [14:58:46<65:39:19, 15.66s/it]

training loss: 0.7205446362495422


training:  18%|█▊        | 3407/18500 [14:59:02<65:35:07, 15.64s/it]

training loss: 0.8311900496482849


training:  18%|█▊        | 3408/18500 [14:59:17<65:31:16, 15.63s/it]

training loss: 1.0241353511810303


training:  18%|█▊        | 3409/18500 [14:59:33<65:28:52, 15.62s/it]

training loss: 0.8604678511619568


training:  18%|█▊        | 3410/18500 [14:59:49<65:25:21, 15.61s/it]

training loss: 0.8618997931480408


training:  18%|█▊        | 3411/18500 [15:00:04<65:23:35, 15.60s/it]

training loss: 0.8189350366592407


training:  18%|█▊        | 3412/18500 [15:00:20<65:24:01, 15.60s/it]

training loss: 0.7566346526145935


training:  18%|█▊        | 3413/18500 [15:00:35<65:22:03, 15.60s/it]

training loss: 0.9267541766166687


training:  18%|█▊        | 3414/18500 [15:00:51<65:20:22, 15.59s/it]

training loss: 1.0348001718521118


training:  18%|█▊        | 3415/18500 [15:01:07<65:19:49, 15.59s/it]

training loss: 1.0120270252227783


training:  18%|█▊        | 3416/18500 [15:01:22<65:18:45, 15.59s/it]

training loss: 0.7417125105857849


training:  18%|█▊        | 3417/18500 [15:01:38<65:17:40, 15.58s/it]

training loss: 0.8806314468383789


training:  18%|█▊        | 3418/18500 [15:01:53<65:16:48, 15.58s/it]

training loss: 1.0413038730621338


training:  18%|█▊        | 3419/18500 [15:02:09<65:18:08, 15.59s/it]

training loss: 0.796271026134491


training:  18%|█▊        | 3420/18500 [15:02:24<65:18:00, 15.59s/it]

training loss: 0.9120457172393799


training:  18%|█▊        | 3421/18500 [15:02:40<65:16:58, 15.59s/it]

training loss: 0.7906842827796936


training:  18%|█▊        | 3422/18500 [15:02:56<65:16:32, 15.59s/it]

training loss: 0.7404796481132507


training:  19%|█▊        | 3423/18500 [15:03:11<65:15:51, 15.58s/it]

training loss: 0.7064976692199707


training:  19%|█▊        | 3424/18500 [15:03:27<65:15:26, 15.58s/it]

training loss: 0.819685161113739


training:  19%|█▊        | 3425/18500 [15:03:42<65:14:51, 15.58s/it]

training loss: 0.703533411026001


training:  19%|█▊        | 3426/18500 [15:03:58<65:14:54, 15.58s/it]

training loss: 0.6211205124855042


training:  19%|█▊        | 3427/18500 [15:04:13<65:14:10, 15.58s/it]

training loss: 0.7704840898513794


training:  19%|█▊        | 3428/18500 [15:04:29<65:14:49, 15.58s/it]

training loss: 0.7690497040748596


training:  19%|█▊        | 3429/18500 [15:04:45<65:14:06, 15.58s/it]

training loss: 1.0806047916412354


training:  19%|█▊        | 3430/18500 [15:05:00<65:13:28, 15.58s/it]

training loss: 0.7153924107551575


training:  19%|█▊        | 3431/18500 [15:05:16<65:13:25, 15.58s/it]

training loss: 1.1077042818069458


training:  19%|█▊        | 3432/18500 [15:05:31<65:12:25, 15.58s/it]

training loss: 1.0926586389541626


training:  19%|█▊        | 3433/18500 [15:05:47<65:13:06, 15.58s/it]

training loss: 0.9583472609519958


training:  19%|█▊        | 3434/18500 [15:06:03<65:13:04, 15.58s/it]

training loss: 0.7331068515777588


training:  19%|█▊        | 3435/18500 [15:06:18<65:12:41, 15.58s/it]

training loss: 0.8816781044006348


training:  19%|█▊        | 3436/18500 [15:06:34<65:12:10, 15.58s/it]

training loss: 0.6740416884422302


training:  19%|█▊        | 3437/18500 [15:06:49<65:11:02, 15.58s/it]

training loss: 0.7337908744812012


training:  19%|█▊        | 3438/18500 [15:07:05<65:12:15, 15.58s/it]

training loss: 0.8104633092880249


training:  19%|█▊        | 3439/18500 [15:07:21<65:13:13, 15.59s/it]

training loss: 0.8003835678100586


training:  19%|█▊        | 3440/18500 [15:07:36<65:12:14, 15.59s/it]

training loss: 0.9206461906433105


training:  19%|█▊        | 3441/18500 [15:07:52<65:10:45, 15.58s/it]

training loss: 0.8114679455757141


training:  19%|█▊        | 3442/18500 [15:08:07<65:10:40, 15.58s/it]

training loss: 0.8192846179008484


training:  19%|█▊        | 3443/18500 [15:08:23<65:11:00, 15.58s/it]

training loss: 0.9863585829734802


training:  19%|█▊        | 3444/18500 [15:08:38<65:10:25, 15.58s/it]

training loss: 0.7967487573623657


training:  19%|█▊        | 3445/18500 [15:08:54<65:10:54, 15.59s/it]

training loss: 0.9957943558692932


training:  19%|█▊        | 3446/18500 [15:09:10<65:10:14, 15.58s/it]

training loss: 0.7969908118247986


training:  19%|█▊        | 3447/18500 [15:09:25<65:10:11, 15.59s/it]

training loss: 0.5949138402938843


training:  19%|█▊        | 3448/18500 [15:09:41<65:10:20, 15.59s/it]

training loss: 1.3033148050308228


training:  19%|█▊        | 3449/18500 [15:09:56<65:10:31, 15.59s/it]

training loss: 0.9037331938743591


training:  19%|█▊        | 3450/18500 [15:10:12<65:09:41, 15.59s/it]

training loss: 0.7829321622848511


training:  19%|█▊        | 3451/18500 [15:10:28<65:10:26, 15.59s/it]

training loss: 0.761442244052887


training:  19%|█▊        | 3452/18500 [15:10:43<65:10:31, 15.59s/it]

training loss: 0.7026821374893188


training:  19%|█▊        | 3453/18500 [15:10:58<64:35:46, 15.45s/it]

training loss: 0.6202152967453003


training:  19%|█▊        | 3454/18500 [15:11:14<64:45:36, 15.49s/it]

training loss: 0.7887318730354309


training:  19%|█▊        | 3455/18500 [15:11:29<64:52:29, 15.52s/it]

training loss: 1.0595747232437134


training:  19%|█▊        | 3456/18500 [15:11:45<64:56:35, 15.54s/it]

training loss: 0.7817959785461426


training:  19%|█▊        | 3457/18500 [15:12:01<64:59:06, 15.55s/it]

training loss: 0.5906462669372559


training:  19%|█▊        | 3458/18500 [15:12:16<65:01:32, 15.56s/it]

training loss: 0.8288276195526123


training:  19%|█▊        | 3459/18500 [15:12:32<65:04:39, 15.58s/it]

training loss: 0.8558005094528198


training:  19%|█▊        | 3460/18500 [15:12:47<65:05:26, 15.58s/it]

training loss: 0.7965477705001831


training:  19%|█▊        | 3461/18500 [15:13:03<65:05:12, 15.58s/it]

training loss: 0.8742123246192932


training:  19%|█▊        | 3462/18500 [15:13:19<65:05:23, 15.58s/it]

training loss: 1.125938057899475


training:  19%|█▊        | 3463/18500 [15:13:34<65:04:27, 15.58s/it]

training loss: 1.0936908721923828


training:  19%|█▊        | 3464/18500 [15:13:50<65:03:46, 15.58s/it]

training loss: 0.838103711605072


training:  19%|█▊        | 3465/18500 [15:14:05<65:03:49, 15.58s/it]

training loss: 0.6151496767997742


training:  19%|█▊        | 3466/18500 [15:14:21<65:06:19, 15.59s/it]

training loss: 0.5908306241035461


training:  19%|█▊        | 3467/18500 [15:14:37<65:06:18, 15.59s/it]

training loss: 0.887182891368866


training:  19%|█▊        | 3468/18500 [15:14:52<65:05:36, 15.59s/it]

training loss: 0.8837775588035583


training:  19%|█▉        | 3469/18500 [15:15:08<65:05:13, 15.59s/it]

training loss: 1.0560643672943115


training:  19%|█▉        | 3470/18500 [15:15:23<65:07:33, 15.60s/it]

training loss: 0.4306001365184784


training:  19%|█▉        | 3471/18500 [15:15:39<65:07:00, 15.60s/it]

training loss: 0.9872099161148071


training:  19%|█▉        | 3472/18500 [15:15:54<65:07:00, 15.60s/it]

training loss: 0.7958581447601318


training:  19%|█▉        | 3473/18500 [15:16:10<65:11:55, 15.62s/it]

training loss: 0.736769437789917


training:  19%|█▉        | 3474/18500 [15:16:26<65:17:00, 15.64s/it]

training loss: 1.0292859077453613


training:  19%|█▉        | 3475/18500 [15:16:42<65:19:47, 15.65s/it]

training loss: 0.6806986331939697


training:  19%|█▉        | 3476/18500 [15:16:57<65:20:40, 15.66s/it]

training loss: 0.8687680959701538


training:  19%|█▉        | 3477/18500 [15:17:13<65:21:52, 15.66s/it]

training loss: 0.91140216588974


training:  19%|█▉        | 3478/18500 [15:17:29<65:23:42, 15.67s/it]

training loss: 0.533409595489502


training:  19%|█▉        | 3479/18500 [15:17:44<65:26:14, 15.68s/it]

training loss: 0.7293481826782227


training:  19%|█▉        | 3480/18500 [15:18:00<65:25:46, 15.68s/it]

training loss: 1.022500991821289


training:  19%|█▉        | 3481/18500 [15:18:16<65:24:20, 15.68s/it]

training loss: 0.6440114378929138


training:  19%|█▉        | 3482/18500 [15:18:31<65:23:19, 15.67s/it]

training loss: 0.8095685839653015


training:  19%|█▉        | 3483/18500 [15:18:47<65:22:46, 15.67s/it]

training loss: 0.8932999968528748


training:  19%|█▉        | 3484/18500 [15:19:03<65:19:53, 15.66s/it]

training loss: 0.6390427947044373


training:  19%|█▉        | 3485/18500 [15:19:18<65:15:27, 15.65s/it]

training loss: 0.42609134316444397


training:  19%|█▉        | 3486/18500 [15:19:34<65:09:38, 15.62s/it]

training loss: 0.9651671648025513


training:  19%|█▉        | 3487/18500 [15:19:49<65:05:04, 15.61s/it]

training loss: 0.72171550989151


training:  19%|█▉        | 3488/18500 [15:20:05<65:02:48, 15.60s/it]

training loss: 0.6756902933120728


training:  19%|█▉        | 3489/18500 [15:20:21<65:02:34, 15.60s/it]

training loss: 0.647616982460022


training:  19%|█▉        | 3490/18500 [15:20:36<65:01:15, 15.59s/it]

training loss: 1.0391590595245361


training:  19%|█▉        | 3491/18500 [15:20:52<64:59:03, 15.59s/it]

training loss: 1.0373228788375854


training:  19%|█▉        | 3492/18500 [15:21:07<64:58:03, 15.58s/it]

training loss: 0.8380764722824097


training:  19%|█▉        | 3493/18500 [15:21:23<64:58:15, 15.59s/it]

training loss: 0.9200610518455505


training:  19%|█▉        | 3494/18500 [15:21:38<64:57:03, 15.58s/it]

training loss: 0.4499042332172394


training:  19%|█▉        | 3495/18500 [15:21:54<64:57:22, 15.58s/it]

training loss: 0.6311010122299194


training:  19%|█▉        | 3496/18500 [15:22:10<64:56:50, 15.58s/it]

training loss: 0.9770994186401367


training:  19%|█▉        | 3497/18500 [15:22:25<64:57:19, 15.59s/it]

training loss: 0.48922964930534363


training:  19%|█▉        | 3498/18500 [15:22:41<64:56:15, 15.58s/it]

training loss: 0.8949732184410095


training:  19%|█▉        | 3499/18500 [15:22:56<64:55:58, 15.58s/it]

training loss: 0.8124778866767883


training:  19%|█▉        | 3500/18500 [15:23:12<64:55:38, 15.58s/it]

training loss: 0.42588403820991516
training loss: 0.866815447807312



generating:   0%|          | 0/512 [00:00<?, ?it/s][A

validation loss: 1.547590970993042
Rahman.
Letectvo vsetko systematicky nici, zonu po zone. Ak sa tomuto
letectvu nezakaze bombardovat, znici zvysok Aleppa, stvrt po stvrti,
povedal AFP predstavitel vzburencov v meste. Obvinil rezim a jeho spojencov
Rusko a Iran z umyselneho vyhladovania ludi v meste a dalsich praktik
v rozpore s medzinarodnym pravom.
Damask oznacuje vzburencov za teroristov a odmieta akukolvek zahranicnu
kritiku, ze bojom proti ozbrojenej opozicii sa dopusta vojnovych
zlocinov.
Od zaciatku ofenzivy vladnych sil pred 13 dnami podla exilovej
opozicie zahynulo najmenej 225 civilistov, z toho 27 deti. Dobytie Aleppa
by bolo pre syrskeho prezidenta Basara Asada velkym vitazstvom po
siestich a pol rokoch bojov, ktore si podla zdrojov AFP uz vyziadali viac
ako 300 000 mrtvych.
Na Aleppo tazko dopada humanitarna kriza sposobena dlhotrvajucou
vojnou. Podla odhadov je vo vychodnej casti Aleppa viac ako 250 000 ludi,
ktori nemaju pristup k zdroju potravin ani lie


generating:   0%|          | 1/512 [00:00<01:47,  4.74it/s][A
generating:   0%|          | 2/512 [00:00<01:47,  4.75it/s][A
generating:   1%|          | 3/512 [00:00<01:47,  4.75it/s][A
generating:   1%|          | 4/512 [00:00<01:46,  4.77it/s][A
generating:   1%|          | 5/512 [00:01<01:46,  4.75it/s][A
generating:   1%|          | 6/512 [00:01<01:45,  4.78it/s][A
generating:   1%|▏         | 7/512 [00:01<01:46,  4.75it/s][A
generating:   2%|▏         | 8/512 [00:01<01:46,  4.73it/s][A
generating:   2%|▏         | 9/512 [00:01<01:45,  4.75it/s][A
generating:   2%|▏         | 10/512 [00:02<01:45,  4.75it/s][A
generating:   2%|▏         | 11/512 [00:02<01:45,  4.77it/s][A
generating:   2%|▏         | 12/512 [00:02<01:45,  4.76it/s][A
generating:   3%|▎         | 13/512 [00:02<01:44,  4.77it/s][A
generating:   3%|▎         | 14/512 [00:02<01:47,  4.65it/s][A
generating:   3%|▎         | 15/512 [00:03<01:49,  4.54it/s][A
generating:   3%|▎         | 16/512 [00:03<01:50

 stanovili. Navrh iranskej ekonomiku
nasleduje ziskal v spolocnosti Haribo Datislav Kovana, ked vlada takato spravou a
najvacsimi pasiatku hrozbu jeho zadrzani ako
priestorom v oblasti siestich rokoch 1940 a na 23 v halestich. Po obchodnej elektrarni
v suvislosti sudove problemov pravidiel uchadza
k tohto roka a znizilo 11 00 pravne pripravovat telefonickeho parlamentnych partnerov a
v ramci umeny dosah po sireni. Na miesta spolu s nadvladanie sa aj v stredu poziadat
chcel uplne poziadat prevziat kon


training:  19%|█▉        | 3502/18500 [15:25:34<161:48:36, 38.84s/it]

training loss: 1.0598064661026


training:  19%|█▉        | 3503/18500 [15:25:49<132:43:04, 31.86s/it]

training loss: 0.6006196141242981


training:  19%|█▉        | 3504/18500 [15:26:05<112:22:07, 26.98s/it]

training loss: 0.8705264329910278


training:  19%|█▉        | 3505/18500 [15:26:21<98:08:46, 23.56s/it] 

training loss: 0.7792367935180664


training:  19%|█▉        | 3506/18500 [15:26:36<88:09:43, 21.17s/it]

training loss: 0.6853391528129578


training:  19%|█▉        | 3507/18500 [15:26:52<81:17:29, 19.52s/it]

training loss: 0.8423423767089844


training:  19%|█▉        | 3508/18500 [15:27:07<76:21:29, 18.34s/it]

training loss: 1.114918828010559


training:  19%|█▉        | 3509/18500 [15:27:23<72:55:36, 17.51s/it]

training loss: 0.7337333559989929


training:  19%|█▉        | 3510/18500 [15:27:39<70:30:11, 16.93s/it]

training loss: 0.789231538772583


training:  19%|█▉        | 3511/18500 [15:27:54<68:49:11, 16.53s/it]

training loss: 0.5405957102775574


training:  19%|█▉        | 3512/18500 [15:28:10<67:38:24, 16.25s/it]

training loss: 0.3890759348869324


training:  19%|█▉        | 3513/18500 [15:28:25<66:49:22, 16.05s/it]

training loss: 0.5756661295890808


training:  19%|█▉        | 3514/18500 [15:28:41<66:14:02, 15.91s/it]

training loss: 0.7082271575927734


training:  19%|█▉        | 3515/18500 [15:28:57<65:48:48, 15.81s/it]

training loss: 0.4074402153491974


training:  19%|█▉        | 3516/18500 [15:29:12<65:31:48, 15.74s/it]

training loss: 0.8204673528671265


training:  19%|█▉        | 3517/18500 [15:29:28<65:19:57, 15.70s/it]

training loss: 0.6456652283668518


training:  19%|█▉        | 3518/18500 [15:29:43<65:11:19, 15.66s/it]

training loss: 0.8414154052734375


training:  19%|█▉        | 3519/18500 [15:29:59<65:05:13, 15.64s/it]

training loss: 0.557572603225708


training:  19%|█▉        | 3520/18500 [15:30:14<65:00:12, 15.62s/it]

training loss: 0.6492317318916321


training:  19%|█▉        | 3521/18500 [15:30:30<64:56:25, 15.61s/it]

training loss: 0.4831485152244568


training:  19%|█▉        | 3522/18500 [15:30:46<64:54:00, 15.60s/it]

training loss: 0.7369334101676941


training:  19%|█▉        | 3523/18500 [15:31:01<64:52:03, 15.59s/it]

training loss: 0.8173126578330994


training:  19%|█▉        | 3524/18500 [15:31:17<64:51:42, 15.59s/it]

training loss: 0.6897526979446411


training:  19%|█▉        | 3525/18500 [15:31:32<64:51:52, 15.59s/it]

training loss: 0.921887993812561


training:  19%|█▉        | 3526/18500 [15:31:48<64:51:21, 15.59s/it]

training loss: 0.5692276954650879


training:  19%|█▉        | 3527/18500 [15:32:04<64:50:09, 15.59s/it]

training loss: 0.7959738969802856


training:  19%|█▉        | 3528/18500 [15:32:19<64:50:26, 15.59s/it]

training loss: 0.6463150382041931


training:  19%|█▉        | 3529/18500 [15:32:35<64:50:17, 15.59s/it]

training loss: 0.6110602021217346


training:  19%|█▉        | 3530/18500 [15:32:50<64:49:35, 15.59s/it]

training loss: 0.7871837615966797


training:  19%|█▉        | 3531/18500 [15:33:06<64:49:45, 15.59s/it]

training loss: 0.797842264175415


training:  19%|█▉        | 3532/18500 [15:33:22<64:50:28, 15.60s/it]

training loss: 0.5500227808952332


training:  19%|█▉        | 3533/18500 [15:33:37<64:48:53, 15.59s/it]

training loss: 0.6908316612243652


training:  19%|█▉        | 3534/18500 [15:33:53<64:47:32, 15.59s/it]

training loss: 0.9001517295837402


training:  19%|█▉        | 3535/18500 [15:34:08<64:47:24, 15.59s/it]

training loss: 0.5674886703491211


training:  19%|█▉        | 3536/18500 [15:34:24<64:47:41, 15.59s/it]

training loss: 0.690943717956543


training:  19%|█▉        | 3537/18500 [15:34:39<64:47:07, 15.59s/it]

training loss: 0.7655661702156067


training:  19%|█▉        | 3538/18500 [15:34:55<64:46:49, 15.59s/it]

training loss: 0.40837737917900085


training:  19%|█▉        | 3539/18500 [15:35:11<64:45:59, 15.58s/it]

training loss: 0.7290560603141785


training:  19%|█▉        | 3540/18500 [15:35:26<64:46:00, 15.59s/it]

training loss: 0.8801053166389465


training:  19%|█▉        | 3541/18500 [15:35:42<64:45:31, 15.58s/it]

training loss: 0.4648481607437134


training:  19%|█▉        | 3542/18500 [15:35:57<64:45:30, 15.59s/it]

training loss: 0.7418615818023682


training:  19%|█▉        | 3543/18500 [15:36:13<64:45:14, 15.59s/it]

training loss: 1.3368192911148071


training:  19%|█▉        | 3544/18500 [15:36:29<64:46:02, 15.59s/it]

training loss: 1.1126571893692017


training:  19%|█▉        | 3545/18500 [15:36:44<64:45:53, 15.59s/it]

training loss: 1.074901819229126


training:  19%|█▉        | 3546/18500 [15:37:00<64:45:40, 15.59s/it]

training loss: 0.8937666416168213


training:  19%|█▉        | 3547/18500 [15:37:15<64:44:41, 15.59s/it]

training loss: 1.0284526348114014


training:  19%|█▉        | 3548/18500 [15:37:31<64:43:43, 15.58s/it]

training loss: 0.7731838226318359


training:  19%|█▉        | 3549/18500 [15:37:46<64:43:43, 15.59s/it]

training loss: 0.6576703786849976


training:  19%|█▉        | 3550/18500 [15:38:02<64:43:50, 15.59s/it]

training loss: 0.8470162153244019


training:  19%|█▉        | 3551/18500 [15:38:18<64:45:42, 15.60s/it]

training loss: 0.6045041084289551


training:  19%|█▉        | 3552/18500 [15:38:33<64:45:22, 15.60s/it]

training loss: 1.088096022605896


training:  19%|█▉        | 3553/18500 [15:38:49<64:44:05, 15.59s/it]

training loss: 0.9400553107261658


training:  19%|█▉        | 3554/18500 [15:39:04<64:43:30, 15.59s/it]

training loss: 0.8025325536727905


training:  19%|█▉        | 3555/18500 [15:39:20<64:44:51, 15.60s/it]

training loss: 1.074601411819458


training:  19%|█▉        | 3556/18500 [15:39:36<64:43:29, 15.59s/it]

training loss: 0.6524146795272827


training:  19%|█▉        | 3557/18500 [15:39:51<64:42:41, 15.59s/it]

training loss: 1.0942949056625366


training:  19%|█▉        | 3558/18500 [15:40:07<64:42:20, 15.59s/it]

training loss: 1.049867868423462


training:  19%|█▉        | 3559/18500 [15:40:22<64:43:08, 15.59s/it]

training loss: 1.0132297277450562


training:  19%|█▉        | 3560/18500 [15:40:38<64:43:06, 15.59s/it]

training loss: 0.6727476716041565


training:  19%|█▉        | 3561/18500 [15:40:54<64:42:40, 15.59s/it]

training loss: 0.5302700996398926


training:  19%|█▉        | 3562/18500 [15:41:09<64:42:58, 15.60s/it]

training loss: 0.8897562623023987


training:  19%|█▉        | 3563/18500 [15:41:25<64:43:56, 15.60s/it]

training loss: 0.46013331413269043


training:  19%|█▉        | 3564/18500 [15:41:40<64:43:01, 15.60s/it]

training loss: 0.7670812606811523


training:  19%|█▉        | 3565/18500 [15:41:56<64:42:34, 15.60s/it]

training loss: 0.8333081603050232


training:  19%|█▉        | 3566/18500 [15:42:12<64:41:45, 15.60s/it]

training loss: 0.7985725402832031


training:  19%|█▉        | 3567/18500 [15:42:27<64:41:20, 15.60s/it]

training loss: 0.9111447930335999


training:  19%|█▉        | 3568/18500 [15:42:43<64:40:30, 15.59s/it]

training loss: 0.6447205543518066


training:  19%|█▉        | 3569/18500 [15:42:58<64:40:51, 15.60s/it]

training loss: 0.9167429208755493


training:  19%|█▉        | 3570/18500 [15:43:14<64:39:14, 15.59s/it]

training loss: 0.6902068853378296


training:  19%|█▉        | 3571/18500 [15:43:30<64:40:14, 15.59s/it]

training loss: 0.5594512224197388


training:  19%|█▉        | 3572/18500 [15:43:45<64:39:33, 15.59s/it]

training loss: 0.5419621467590332


training:  19%|█▉        | 3573/18500 [15:44:01<64:38:46, 15.59s/it]

training loss: 0.5601731538772583


training:  19%|█▉        | 3574/18500 [15:44:16<64:38:26, 15.59s/it]

training loss: 0.8474259376525879


training:  19%|█▉        | 3575/18500 [15:44:32<64:38:26, 15.59s/it]

training loss: 0.7185230255126953


training:  19%|█▉        | 3576/18500 [15:44:48<64:38:16, 15.59s/it]

training loss: 0.8357294201850891


training:  19%|█▉        | 3577/18500 [15:45:03<64:37:14, 15.59s/it]

training loss: 0.6107919812202454


training:  19%|█▉        | 3578/18500 [15:45:19<64:37:03, 15.59s/it]

training loss: 0.6568777561187744


training:  19%|█▉        | 3579/18500 [15:45:34<64:36:10, 15.59s/it]

training loss: 0.692937433719635


training:  19%|█▉        | 3580/18500 [15:45:50<64:35:35, 15.59s/it]

training loss: 0.8222808837890625


training:  19%|█▉        | 3581/18500 [15:46:05<64:38:47, 15.60s/it]

training loss: 0.5921415090560913


training:  19%|█▉        | 3582/18500 [15:46:21<64:48:04, 15.64s/it]

training loss: 0.8085394501686096


training:  19%|█▉        | 3583/18500 [15:46:37<64:52:53, 15.66s/it]

training loss: 0.7691394090652466


training:  19%|█▉        | 3584/18500 [15:46:53<64:54:32, 15.67s/it]

training loss: 1.1106961965560913


training:  19%|█▉        | 3585/18500 [15:47:08<64:56:53, 15.68s/it]

training loss: 0.7481832504272461


training:  19%|█▉        | 3586/18500 [15:47:24<65:00:52, 15.69s/it]

training loss: 0.8618403673171997


training:  19%|█▉        | 3587/18500 [15:47:40<65:01:22, 15.70s/it]

training loss: 0.7056465744972229


training:  19%|█▉        | 3588/18500 [15:47:55<65:01:39, 15.70s/it]

training loss: 0.918816089630127


training:  19%|█▉        | 3589/18500 [15:48:11<65:01:51, 15.70s/it]

training loss: 0.7456275224685669


training:  19%|█▉        | 3590/18500 [15:48:27<64:57:41, 15.68s/it]

training loss: 0.8199824690818787


training:  19%|█▉        | 3591/18500 [15:48:42<64:53:02, 15.67s/it]

training loss: 0.90557461977005


training:  19%|█▉        | 3592/18500 [15:48:58<64:49:23, 15.65s/it]

training loss: 0.8700994253158569


training:  19%|█▉        | 3593/18500 [15:49:14<64:44:09, 15.63s/it]

training loss: 0.6855742335319519


training:  19%|█▉        | 3594/18500 [15:49:29<64:41:13, 15.62s/it]

training loss: 1.2086045742034912


training:  19%|█▉        | 3595/18500 [15:49:45<64:39:03, 15.62s/it]

training loss: 0.7746601104736328


training:  19%|█▉        | 3596/18500 [15:50:00<64:36:30, 15.61s/it]

training loss: 0.8051217198371887


training:  19%|█▉        | 3597/18500 [15:50:16<64:34:44, 15.60s/it]

training loss: 1.0020757913589478


training:  19%|█▉        | 3598/18500 [15:50:32<64:33:15, 15.59s/it]

training loss: 0.9172677993774414


training:  19%|█▉        | 3599/18500 [15:50:47<64:33:03, 15.60s/it]

training loss: 0.7022368907928467


training:  19%|█▉        | 3600/18500 [15:51:03<64:31:29, 15.59s/it]

training loss: 0.8069306015968323
training loss: 0.830337643623352


training:  19%|█▉        | 3601/18500 [15:51:20<66:30:13, 16.07s/it]

validation loss: 1.5153887271881104


training:  19%|█▉        | 3602/18500 [15:51:36<65:54:47, 15.93s/it]

training loss: 0.9390186667442322


training:  19%|█▉        | 3603/18500 [15:51:51<65:29:02, 15.82s/it]

training loss: 0.6336609721183777


training:  19%|█▉        | 3604/18500 [15:52:07<65:10:00, 15.75s/it]

training loss: 0.628645658493042


training:  19%|█▉        | 3605/18500 [15:52:22<64:58:01, 15.70s/it]

training loss: 0.7465850710868835


training:  19%|█▉        | 3606/18500 [15:52:38<64:48:51, 15.67s/it]

training loss: 0.7203320860862732


training:  19%|█▉        | 3607/18500 [15:52:53<64:42:08, 15.64s/it]

training loss: 1.0260828733444214


training:  20%|█▉        | 3608/18500 [15:53:09<64:38:32, 15.63s/it]

training loss: 0.5198202133178711


training:  20%|█▉        | 3609/18500 [15:53:25<64:36:34, 15.62s/it]

training loss: 0.8165357708930969


training:  20%|█▉        | 3610/18500 [15:53:40<64:33:23, 15.61s/it]

training loss: 1.1784288883209229


training:  20%|█▉        | 3611/18500 [15:53:56<64:33:17, 15.61s/it]

training loss: 0.9022305011749268


training:  20%|█▉        | 3612/18500 [15:54:11<64:30:40, 15.60s/it]

training loss: 1.140835165977478


training:  20%|█▉        | 3613/18500 [15:54:27<64:30:48, 15.60s/it]

training loss: 0.6290578842163086


training:  20%|█▉        | 3614/18500 [15:54:43<64:28:48, 15.59s/it]

training loss: 0.8924956917762756


training:  20%|█▉        | 3615/18500 [15:54:58<64:27:56, 15.59s/it]

training loss: 0.9095051288604736


training:  20%|█▉        | 3616/18500 [15:55:14<64:27:07, 15.59s/it]

training loss: 0.577761709690094


training:  20%|█▉        | 3617/18500 [15:55:29<64:27:34, 15.59s/it]

training loss: 0.9723027944564819


training:  20%|█▉        | 3618/18500 [15:55:45<64:26:10, 15.59s/it]

training loss: 1.1631540060043335


training:  20%|█▉        | 3619/18500 [15:56:01<64:25:06, 15.58s/it]

training loss: 0.6004878878593445


training:  20%|█▉        | 3620/18500 [15:56:16<64:24:35, 15.58s/it]

training loss: 0.8417174816131592


training:  20%|█▉        | 3621/18500 [15:56:32<64:24:13, 15.58s/it]

training loss: 0.8437506556510925


training:  20%|█▉        | 3622/18500 [15:56:47<64:25:15, 15.59s/it]

training loss: 0.922784686088562


training:  20%|█▉        | 3623/18500 [15:57:03<64:24:21, 15.59s/it]

training loss: 0.9301736354827881


training:  20%|█▉        | 3624/18500 [15:57:18<64:24:09, 15.59s/it]

training loss: 0.3302496671676636


training:  20%|█▉        | 3625/18500 [15:57:34<64:22:55, 15.58s/it]

training loss: 0.6417836546897888


training:  20%|█▉        | 3626/18500 [15:57:50<64:22:01, 15.58s/it]

training loss: 0.7662416696548462


training:  20%|█▉        | 3627/18500 [15:58:05<64:22:23, 15.58s/it]

training loss: 1.0064257383346558


training:  20%|█▉        | 3628/18500 [15:58:21<64:23:44, 15.59s/it]

training loss: 0.44416356086730957


training:  20%|█▉        | 3629/18500 [15:58:36<64:22:40, 15.58s/it]

training loss: 0.6915001273155212


training:  20%|█▉        | 3630/18500 [15:58:52<64:21:52, 15.58s/it]

training loss: 1.0281223058700562


training:  20%|█▉        | 3631/18500 [15:59:08<64:22:47, 15.59s/it]

training loss: 1.0268634557724


training:  20%|█▉        | 3632/18500 [15:59:23<64:23:38, 15.59s/it]

training loss: 0.9267762303352356


training:  20%|█▉        | 3633/18500 [15:59:39<64:23:27, 15.59s/it]

training loss: 0.7037312984466553


training:  20%|█▉        | 3634/18500 [15:59:54<64:22:48, 15.59s/it]

training loss: 0.5184009075164795


training:  20%|█▉        | 3635/18500 [16:00:10<64:22:34, 15.59s/it]

training loss: 0.645167350769043


training:  20%|█▉        | 3636/18500 [16:00:25<64:22:31, 15.59s/it]

training loss: 0.926927924156189


training:  20%|█▉        | 3637/18500 [16:00:41<64:22:27, 15.59s/it]

training loss: 0.6527762413024902


training:  20%|█▉        | 3638/18500 [16:00:57<64:22:33, 15.59s/it]

training loss: 0.3936421275138855


training:  20%|█▉        | 3639/18500 [16:01:12<64:21:34, 15.59s/it]

training loss: 0.4082101583480835


training:  20%|█▉        | 3640/18500 [16:01:28<64:21:25, 15.59s/it]

training loss: 0.8196238875389099


training:  20%|█▉        | 3641/18500 [16:01:43<64:21:25, 15.59s/it]

training loss: 1.1026397943496704


training:  20%|█▉        | 3642/18500 [16:01:59<64:21:37, 15.59s/it]

training loss: 0.5910520553588867


training:  20%|█▉        | 3643/18500 [16:02:15<64:21:24, 15.59s/it]

training loss: 0.7301797270774841


training:  20%|█▉        | 3644/18500 [16:02:30<64:20:50, 15.59s/it]

training loss: 0.8543645143508911


training:  20%|█▉        | 3645/18500 [16:02:46<64:19:56, 15.59s/it]

training loss: 0.9140470027923584


training:  20%|█▉        | 3646/18500 [16:03:01<64:20:33, 15.59s/it]

training loss: 0.8486226797103882


training:  20%|█▉        | 3647/18500 [16:03:17<64:20:03, 15.59s/it]

training loss: 0.8253284096717834


training:  20%|█▉        | 3648/18500 [16:03:33<64:20:33, 15.60s/it]

training loss: 0.8316763043403625


training:  20%|█▉        | 3649/18500 [16:03:48<64:19:40, 15.59s/it]

training loss: 0.4687744975090027


training:  20%|█▉        | 3650/18500 [16:04:04<64:18:33, 15.59s/it]

training loss: 0.6293753385543823


training:  20%|█▉        | 3651/18500 [16:04:19<64:20:04, 15.60s/it]

training loss: 0.5075335502624512


training:  20%|█▉        | 3652/18500 [16:04:35<64:19:36, 15.60s/it]

training loss: 0.8433600664138794


training:  20%|█▉        | 3653/18500 [16:04:51<64:19:00, 15.60s/it]

training loss: 0.45112144947052


training:  20%|█▉        | 3654/18500 [16:05:06<64:18:23, 15.59s/it]

training loss: 0.713088870048523


training:  20%|█▉        | 3655/18500 [16:05:22<64:18:47, 15.60s/it]

training loss: 0.7839197516441345


training:  20%|█▉        | 3656/18500 [16:05:37<64:18:07, 15.59s/it]

training loss: 0.42131343483924866


training:  20%|█▉        | 3657/18500 [16:05:53<64:17:16, 15.59s/it]

training loss: 0.856033444404602


training:  20%|█▉        | 3658/18500 [16:06:09<64:17:27, 15.59s/it]

training loss: 0.9856539964675903


training:  20%|█▉        | 3659/18500 [16:06:24<64:18:00, 15.60s/it]

training loss: 0.7355672717094421


training:  20%|█▉        | 3660/18500 [16:06:40<64:17:12, 15.60s/it]

training loss: 0.6423807144165039


training:  20%|█▉        | 3661/18500 [16:06:55<64:16:56, 15.60s/it]

training loss: 0.9962765574455261


training:  20%|█▉        | 3662/18500 [16:07:11<64:16:32, 15.59s/it]

training loss: 0.9342314004898071


training:  20%|█▉        | 3663/18500 [16:07:27<64:16:50, 15.60s/it]

training loss: 0.6339437961578369


training:  20%|█▉        | 3664/18500 [16:07:42<64:16:38, 15.60s/it]

training loss: 0.5892738699913025


training:  20%|█▉        | 3665/18500 [16:07:58<64:16:56, 15.60s/it]

training loss: 0.7992061376571655


training:  20%|█▉        | 3666/18500 [16:08:13<64:16:04, 15.60s/it]

training loss: 0.7982613444328308


training:  20%|█▉        | 3667/18500 [16:08:29<64:16:03, 15.60s/it]

training loss: 0.6949618458747864


training:  20%|█▉        | 3668/18500 [16:08:45<64:15:19, 15.60s/it]

training loss: 1.000112533569336


training:  20%|█▉        | 3669/18500 [16:09:00<64:14:52, 15.60s/it]

training loss: 0.7958969473838806


training:  20%|█▉        | 3670/18500 [16:09:16<64:15:31, 15.60s/it]

training loss: 0.9966236352920532


training:  20%|█▉        | 3671/18500 [16:09:31<64:16:15, 15.60s/it]

training loss: 0.7410487532615662


training:  20%|█▉        | 3672/18500 [16:09:47<64:16:02, 15.60s/it]

training loss: 0.7164319753646851


training:  20%|█▉        | 3673/18500 [16:10:03<64:16:45, 15.61s/it]

training loss: 0.4683550000190735


training:  20%|█▉        | 3674/18500 [16:10:18<64:18:22, 15.61s/it]

training loss: 1.1249995231628418


training:  20%|█▉        | 3675/18500 [16:10:34<64:16:58, 15.61s/it]

training loss: 0.8601946234703064


training:  20%|█▉        | 3676/18500 [16:10:49<64:16:00, 15.61s/it]

training loss: 0.7324735522270203


training:  20%|█▉        | 3677/18500 [16:11:05<64:15:05, 15.60s/it]

training loss: 0.7252293825149536


training:  20%|█▉        | 3678/18500 [16:11:21<64:15:48, 15.61s/it]

training loss: 0.7237224578857422


training:  20%|█▉        | 3679/18500 [16:11:36<64:14:57, 15.61s/it]

training loss: 0.5746456384658813


training:  20%|█▉        | 3680/18500 [16:11:52<64:14:23, 15.60s/it]

training loss: 0.5242091417312622


training:  20%|█▉        | 3681/18500 [16:12:07<64:13:14, 15.60s/it]

training loss: 1.050012469291687


training:  20%|█▉        | 3682/18500 [16:12:23<64:13:31, 15.60s/it]

training loss: 0.4687168598175049


training:  20%|█▉        | 3683/18500 [16:12:39<64:13:02, 15.60s/it]

training loss: 0.5748660564422607


training:  20%|█▉        | 3684/18500 [16:12:54<64:13:16, 15.60s/it]

training loss: 0.7839045524597168


training:  20%|█▉        | 3685/18500 [16:13:10<64:13:08, 15.61s/it]

training loss: 0.8163422346115112


training:  20%|█▉        | 3686/18500 [16:13:25<64:13:13, 15.61s/it]

training loss: 0.7658792734146118


training:  20%|█▉        | 3687/18500 [16:13:41<64:12:35, 15.60s/it]

training loss: 0.6090944409370422


training:  20%|█▉        | 3688/18500 [16:13:57<64:12:19, 15.60s/it]

training loss: 0.8926457166671753


training:  20%|█▉        | 3689/18500 [16:14:12<64:12:18, 15.61s/it]

training loss: 1.1316756010055542


training:  20%|█▉        | 3690/18500 [16:14:28<64:12:28, 15.61s/it]

training loss: 1.2414193153381348


training:  20%|█▉        | 3691/18500 [16:14:43<64:13:22, 15.61s/it]

training loss: 0.8738417625427246


training:  20%|█▉        | 3692/18500 [16:14:59<64:13:33, 15.61s/it]

training loss: 1.036827564239502


training:  20%|█▉        | 3693/18500 [16:15:15<64:13:59, 15.62s/it]

training loss: 0.9499196410179138


training:  20%|█▉        | 3694/18500 [16:15:30<64:13:23, 15.62s/it]

training loss: 0.9201509952545166


training:  20%|█▉        | 3695/18500 [16:15:46<64:12:44, 15.61s/it]

training loss: 1.0314964056015015


training:  20%|█▉        | 3696/18500 [16:16:02<64:12:19, 15.61s/it]

training loss: 1.0554438829421997


training:  20%|█▉        | 3697/18500 [16:16:17<64:12:10, 15.61s/it]

training loss: 0.5520996451377869


training:  20%|█▉        | 3698/18500 [16:16:33<64:12:58, 15.62s/it]

training loss: 0.5980530977249146


training:  20%|█▉        | 3699/18500 [16:16:48<64:11:58, 15.62s/it]

training loss: 1.0611841678619385


training:  20%|██        | 3700/18500 [16:17:04<64:10:19, 15.61s/it]

training loss: 0.5940835475921631
training loss: 0.6485398411750793


training:  20%|██        | 3701/18500 [16:17:21<65:52:12, 16.02s/it]

validation loss: 1.5506290197372437


training:  20%|██        | 3702/18500 [16:17:37<65:23:56, 15.91s/it]

training loss: 1.1406002044677734


training:  20%|██        | 3703/18500 [16:17:52<65:08:02, 15.85s/it]

training loss: 0.2864697575569153


training:  20%|██        | 3704/18500 [16:18:08<64:56:37, 15.80s/it]

training loss: 0.8934049606323242


training:  20%|██        | 3705/18500 [16:18:24<64:51:22, 15.78s/it]

training loss: 0.483499139547348


training:  20%|██        | 3706/18500 [16:18:39<64:45:41, 15.76s/it]

training loss: 0.8047354817390442


training:  20%|██        | 3707/18500 [16:18:55<64:41:49, 15.74s/it]

training loss: 0.8894157409667969


training:  20%|██        | 3708/18500 [16:19:11<64:40:11, 15.74s/it]

training loss: 1.0382517576217651


training:  20%|██        | 3709/18500 [16:19:27<64:39:04, 15.74s/it]

training loss: 0.504375696182251


training:  20%|██        | 3710/18500 [16:19:42<64:38:16, 15.73s/it]

training loss: 0.8108750581741333


training:  20%|██        | 3711/18500 [16:19:58<64:33:20, 15.71s/it]

training loss: 0.9162241816520691


training:  20%|██        | 3712/18500 [16:20:14<64:24:28, 15.68s/it]

training loss: 0.9289402961730957


training:  20%|██        | 3713/18500 [16:20:29<64:19:22, 15.66s/it]

training loss: 0.9127862453460693


training:  20%|██        | 3714/18500 [16:20:45<64:15:14, 15.64s/it]

training loss: 0.4408983886241913


training:  20%|██        | 3715/18500 [16:21:00<64:10:46, 15.63s/it]

training loss: 0.6768549680709839


training:  20%|██        | 3716/18500 [16:21:16<64:08:21, 15.62s/it]

training loss: 0.9025580883026123


training:  20%|██        | 3717/18500 [16:21:32<64:06:17, 15.61s/it]

training loss: 0.7413986921310425


training:  20%|██        | 3718/18500 [16:21:47<64:05:15, 15.61s/it]

training loss: 0.6879945993423462


training:  20%|██        | 3719/18500 [16:22:03<64:03:52, 15.60s/it]

training loss: 0.4121713936328888


training:  20%|██        | 3720/18500 [16:22:18<64:03:58, 15.60s/it]

training loss: 0.5959694385528564


training:  20%|██        | 3721/18500 [16:22:34<64:03:51, 15.61s/it]

training loss: 0.9641698598861694


training:  20%|██        | 3722/18500 [16:22:50<64:02:48, 15.60s/it]

training loss: 0.9639198184013367


training:  20%|██        | 3723/18500 [16:23:05<64:02:05, 15.60s/it]

training loss: 0.4634970426559448


training:  20%|██        | 3724/18500 [16:23:21<64:03:30, 15.61s/it]

training loss: 0.7053135633468628


training:  20%|██        | 3725/18500 [16:23:36<64:03:28, 15.61s/it]

training loss: 0.740715742111206


training:  20%|██        | 3726/18500 [16:23:52<64:02:26, 15.60s/it]

training loss: 1.0197080373764038


training:  20%|██        | 3727/18500 [16:24:08<64:01:25, 15.60s/it]

training loss: 0.7285315990447998


training:  20%|██        | 3728/18500 [16:24:23<64:02:30, 15.61s/it]

training loss: 0.7905299067497253


training:  20%|██        | 3729/18500 [16:24:39<64:02:05, 15.61s/it]

training loss: 0.7285513877868652


training:  20%|██        | 3730/18500 [16:24:55<64:02:59, 15.61s/it]

training loss: 0.5853452682495117


training:  20%|██        | 3731/18500 [16:25:10<64:02:28, 15.61s/it]

training loss: 0.7717249393463135


training:  20%|██        | 3732/18500 [16:25:26<64:02:04, 15.61s/it]

training loss: 0.793845534324646


training:  20%|██        | 3733/18500 [16:25:41<64:00:58, 15.61s/it]

training loss: 0.8037326335906982


training:  20%|██        | 3734/18500 [16:25:57<64:00:50, 15.61s/it]

training loss: 0.9943602681159973


training:  20%|██        | 3735/18500 [16:26:13<64:00:39, 15.61s/it]

training loss: 0.8678379654884338


training:  20%|██        | 3736/18500 [16:26:28<64:00:37, 15.61s/it]

training loss: 0.5032438039779663


training:  20%|██        | 3737/18500 [16:26:44<63:59:17, 15.60s/it]

training loss: 0.6323041915893555


training:  20%|██        | 3738/18500 [16:26:59<63:59:31, 15.61s/it]

training loss: 1.0900425910949707


training:  20%|██        | 3739/18500 [16:27:15<63:59:00, 15.60s/it]

training loss: 0.7487985491752625


training:  20%|██        | 3740/18500 [16:27:31<63:58:36, 15.60s/it]

training loss: 0.9952977895736694


training:  20%|██        | 3741/18500 [16:27:46<63:57:54, 15.60s/it]

training loss: 0.9607341885566711


training:  20%|██        | 3742/18500 [16:28:02<63:58:14, 15.60s/it]

training loss: 0.3922445476055145


training:  20%|██        | 3743/18500 [16:28:17<63:58:07, 15.61s/it]

training loss: 0.8937386274337769


training:  20%|██        | 3744/18500 [16:28:33<63:58:37, 15.61s/it]

training loss: 0.791632890701294


training:  20%|██        | 3745/18500 [16:28:49<63:58:21, 15.61s/it]

training loss: 0.8977118134498596


training:  20%|██        | 3746/18500 [16:29:04<63:57:05, 15.60s/it]

training loss: 0.8489757776260376


training:  20%|██        | 3747/18500 [16:29:20<63:57:45, 15.61s/it]

training loss: 0.8085010051727295


training:  20%|██        | 3748/18500 [16:29:35<63:57:44, 15.61s/it]

training loss: 0.8464429378509521


training:  20%|██        | 3749/18500 [16:29:51<63:57:12, 15.61s/it]

training loss: 0.7963833808898926


training:  20%|██        | 3750/18500 [16:30:07<63:58:00, 15.61s/it]

training loss: 1.0317033529281616


training:  20%|██        | 3751/18500 [16:30:22<63:57:47, 15.61s/it]

training loss: 0.7723290324211121


training:  20%|██        | 3752/18500 [16:30:38<63:56:44, 15.61s/it]

training loss: 0.9794564247131348


training:  20%|██        | 3753/18500 [16:30:53<63:55:15, 15.60s/it]

training loss: 0.7485219240188599


training:  20%|██        | 3754/18500 [16:31:09<63:55:21, 15.61s/it]

training loss: 0.45401349663734436


training:  20%|██        | 3755/18500 [16:31:25<63:55:48, 15.61s/it]

training loss: 0.8751790523529053


training:  20%|██        | 3756/18500 [16:31:40<63:55:03, 15.61s/it]

training loss: 0.5033668875694275


training:  20%|██        | 3757/18500 [16:31:56<63:54:19, 15.60s/it]

training loss: 0.8091551065444946


training:  20%|██        | 3758/18500 [16:32:11<63:52:50, 15.60s/it]

training loss: 0.8457865715026855


training:  20%|██        | 3759/18500 [16:32:27<63:54:02, 15.61s/it]

training loss: 1.0124000310897827


training:  20%|██        | 3760/18500 [16:32:43<63:54:04, 15.61s/it]

training loss: 1.0473155975341797


training:  20%|██        | 3761/18500 [16:32:58<63:53:38, 15.61s/it]

training loss: 0.758746325969696


training:  20%|██        | 3762/18500 [16:33:14<63:52:49, 15.60s/it]

training loss: 0.9370214343070984


training:  20%|██        | 3763/18500 [16:33:30<63:53:42, 15.61s/it]

training loss: 0.5418615341186523


training:  20%|██        | 3764/18500 [16:33:45<63:53:02, 15.61s/it]

training loss: 1.100014090538025


training:  20%|██        | 3765/18500 [16:34:01<63:52:22, 15.61s/it]

training loss: 0.5269226431846619


training:  20%|██        | 3766/18500 [16:34:16<63:51:20, 15.60s/it]

training loss: 0.8001988530158997


training:  20%|██        | 3767/18500 [16:34:32<63:51:38, 15.60s/it]

training loss: 0.834770143032074


training:  20%|██        | 3768/18500 [16:34:48<63:51:50, 15.61s/it]

training loss: 0.791634202003479


training:  20%|██        | 3769/18500 [16:35:03<63:51:34, 15.61s/it]

training loss: 0.658044695854187


training:  20%|██        | 3770/18500 [16:35:19<63:52:17, 15.61s/it]

training loss: 0.5092240571975708


training:  20%|██        | 3771/18500 [16:35:34<63:51:25, 15.61s/it]

training loss: 0.9311012625694275


training:  20%|██        | 3772/18500 [16:35:50<63:50:03, 15.60s/it]

training loss: 0.8311898112297058


training:  20%|██        | 3773/18500 [16:36:06<63:49:32, 15.60s/it]

training loss: 0.7631681561470032


training:  20%|██        | 3774/18500 [16:36:21<63:50:00, 15.61s/it]

training loss: 0.8639130592346191


training:  20%|██        | 3775/18500 [16:36:37<63:50:04, 15.61s/it]

training loss: 1.0522286891937256


training:  20%|██        | 3776/18500 [16:36:52<63:49:15, 15.60s/it]

training loss: 0.952717661857605


training:  20%|██        | 3777/18500 [16:37:08<63:47:59, 15.60s/it]

training loss: 1.0429502725601196


training:  20%|██        | 3778/18500 [16:37:24<63:48:31, 15.60s/it]

training loss: 0.5838508009910583


training:  20%|██        | 3779/18500 [16:37:39<63:48:48, 15.61s/it]

training loss: 1.2120025157928467


training:  20%|██        | 3780/18500 [16:37:55<63:48:12, 15.60s/it]

training loss: 0.8666577339172363


training:  20%|██        | 3781/18500 [16:38:10<63:47:00, 15.60s/it]

training loss: 0.5633072257041931


training:  20%|██        | 3782/18500 [16:38:26<63:47:17, 15.60s/it]

training loss: 0.6126874089241028


training:  20%|██        | 3783/18500 [16:38:42<63:47:49, 15.61s/it]

training loss: 1.039151668548584


training:  20%|██        | 3784/18500 [16:38:57<63:48:14, 15.61s/it]

training loss: 0.3083603084087372


training:  20%|██        | 3785/18500 [16:39:13<63:46:48, 15.60s/it]

training loss: 0.6108019351959229


training:  20%|██        | 3786/18500 [16:39:28<63:47:18, 15.61s/it]

training loss: 0.7244518399238586


training:  20%|██        | 3787/18500 [16:39:44<63:46:52, 15.61s/it]

training loss: 0.5506024956703186


training:  20%|██        | 3788/18500 [16:40:00<63:45:46, 15.60s/it]

training loss: 1.1269547939300537


training:  20%|██        | 3789/18500 [16:40:15<63:44:59, 15.60s/it]

training loss: 0.7938533425331116


training:  20%|██        | 3790/18500 [16:40:31<63:45:49, 15.60s/it]

training loss: 0.821016252040863


training:  20%|██        | 3791/18500 [16:40:46<63:45:29, 15.60s/it]

training loss: 1.1367061138153076


training:  20%|██        | 3792/18500 [16:41:02<63:44:58, 15.60s/it]

training loss: 0.518621563911438


training:  21%|██        | 3793/18500 [16:41:18<63:44:42, 15.60s/it]

training loss: 0.9506207704544067


training:  21%|██        | 3794/18500 [16:41:33<63:44:58, 15.61s/it]

training loss: 0.9218125343322754


training:  21%|██        | 3795/18500 [16:41:49<63:43:58, 15.60s/it]

training loss: 0.9416155219078064


training:  21%|██        | 3796/18500 [16:42:05<63:51:47, 15.64s/it]

training loss: 0.7745559811592102


training:  21%|██        | 3797/18500 [16:42:20<63:49:24, 15.63s/it]

training loss: 0.6300840973854065


training:  21%|██        | 3798/18500 [16:42:36<63:47:51, 15.62s/it]

training loss: 0.5123598575592041


training:  21%|██        | 3799/18500 [16:42:51<63:45:49, 15.61s/it]

training loss: 1.2211495637893677


training:  21%|██        | 3800/18500 [16:43:07<63:44:16, 15.61s/it]

training loss: 1.0994372367858887
training loss: 0.9374538064002991


training:  21%|██        | 3801/18500 [16:43:24<65:26:49, 16.03s/it]

validation loss: 1.5606223344802856


training:  21%|██        | 3802/18500 [16:43:40<64:57:31, 15.91s/it]

training loss: 0.9133711457252502


training:  21%|██        | 3803/18500 [16:43:55<64:34:19, 15.82s/it]

training loss: 1.0845762491226196


training:  21%|██        | 3804/18500 [16:44:11<64:16:55, 15.75s/it]

training loss: 1.298074722290039


training:  21%|██        | 3805/18500 [16:44:26<64:06:22, 15.70s/it]

training loss: 0.9339361786842346


training:  21%|██        | 3806/18500 [16:44:42<63:58:53, 15.68s/it]

training loss: 0.7317965030670166


training:  21%|██        | 3807/18500 [16:44:58<63:53:29, 15.65s/it]

training loss: 0.8152901530265808


training:  21%|██        | 3808/18500 [16:45:13<63:48:32, 15.64s/it]

training loss: 0.5464197993278503


training:  21%|██        | 3809/18500 [16:45:29<63:48:00, 15.63s/it]

training loss: 0.725903332233429


training:  21%|██        | 3810/18500 [16:45:45<63:51:21, 15.65s/it]

training loss: 0.7622677683830261


training:  21%|██        | 3811/18500 [16:46:00<63:55:00, 15.66s/it]

training loss: 0.922691822052002


training:  21%|██        | 3812/18500 [16:46:16<63:56:51, 15.67s/it]

training loss: 0.6864729523658752


training:  21%|██        | 3813/18500 [16:46:32<64:00:05, 15.69s/it]

training loss: 0.9492843747138977


training:  21%|██        | 3814/18500 [16:46:47<64:03:09, 15.70s/it]

training loss: 0.9489538669586182


training:  21%|██        | 3815/18500 [16:47:03<64:03:30, 15.70s/it]

training loss: 0.9309753179550171


training:  21%|██        | 3816/18500 [16:47:19<64:04:49, 15.71s/it]

training loss: 0.8452008962631226


training:  21%|██        | 3817/18500 [16:47:35<64:05:26, 15.71s/it]

training loss: 0.8724337220191956


training:  21%|██        | 3818/18500 [16:47:50<64:04:16, 15.71s/it]

training loss: 1.217103362083435


training:  21%|██        | 3819/18500 [16:48:06<64:04:28, 15.71s/it]

training loss: 0.6331239938735962


training:  21%|██        | 3820/18500 [16:48:22<64:04:40, 15.71s/it]

training loss: 0.7223289608955383


training:  21%|██        | 3821/18500 [16:48:37<64:02:55, 15.71s/it]

training loss: 0.8516526222229004


training:  21%|██        | 3822/18500 [16:48:53<63:56:45, 15.68s/it]

training loss: 0.7625279426574707


training:  21%|██        | 3823/18500 [16:49:09<63:51:26, 15.66s/it]

training loss: 0.8599435091018677


training:  21%|██        | 3824/18500 [16:49:24<63:47:04, 15.65s/it]

training loss: 0.7444008588790894


training:  21%|██        | 3825/18500 [16:49:40<63:43:55, 15.63s/it]

training loss: 0.6046438217163086


training:  21%|██        | 3826/18500 [16:49:55<63:41:19, 15.62s/it]

training loss: 0.8356640338897705


training:  21%|██        | 3827/18500 [16:50:11<63:38:24, 15.61s/it]

training loss: 1.1622645854949951


training:  21%|██        | 3828/18500 [16:50:27<63:37:48, 15.61s/it]

training loss: 0.7757286429405212


training:  21%|██        | 3829/18500 [16:50:42<63:37:05, 15.61s/it]

training loss: 0.7085880041122437


training:  21%|██        | 3830/18500 [16:50:58<63:35:55, 15.61s/it]

training loss: 1.1387616395950317


training:  21%|██        | 3831/18500 [16:51:13<63:33:58, 15.60s/it]

training loss: 0.5694237351417542


training:  21%|██        | 3832/18500 [16:51:29<63:35:03, 15.61s/it]

training loss: 0.8941395282745361


training:  21%|██        | 3833/18500 [16:51:45<63:34:00, 15.60s/it]

training loss: 0.7987825274467468


training:  21%|██        | 3834/18500 [16:52:00<63:32:55, 15.60s/it]

training loss: 0.8238126635551453


training:  21%|██        | 3835/18500 [16:52:16<63:31:30, 15.59s/it]

training loss: 0.6808931827545166


training:  21%|██        | 3836/18500 [16:52:31<63:31:56, 15.60s/it]

training loss: 0.6237358450889587


training:  21%|██        | 3837/18500 [16:52:47<63:33:20, 15.60s/it]

training loss: 0.7227123975753784


training:  21%|██        | 3838/18500 [16:53:03<63:32:10, 15.60s/it]

training loss: 1.1263309717178345


training:  21%|██        | 3839/18500 [16:53:18<63:31:54, 15.60s/it]

training loss: 0.7237054705619812


training:  21%|██        | 3840/18500 [16:53:34<63:32:14, 15.60s/it]

training loss: 0.6426626443862915


training:  21%|██        | 3841/18500 [16:53:49<63:31:46, 15.60s/it]

training loss: 0.889582633972168


training:  21%|██        | 3842/18500 [16:54:05<63:31:08, 15.60s/it]

training loss: 0.6479063034057617


training:  21%|██        | 3843/18500 [16:54:21<63:31:42, 15.60s/it]

training loss: 0.9381954073905945


training:  21%|██        | 3844/18500 [16:54:36<63:31:30, 15.60s/it]

training loss: 0.855380654335022


training:  21%|██        | 3845/18500 [16:54:52<63:30:22, 15.60s/it]

training loss: 0.6145882606506348


training:  21%|██        | 3846/18500 [16:55:07<63:30:07, 15.60s/it]

training loss: 1.0920913219451904


training:  21%|██        | 3847/18500 [16:55:23<63:30:15, 15.60s/it]

training loss: 0.6423488855361938


training:  21%|██        | 3848/18500 [16:55:39<63:30:11, 15.60s/it]

training loss: 0.7277960777282715


training:  21%|██        | 3849/18500 [16:55:54<63:30:24, 15.60s/it]

training loss: 0.6827467083930969


training:  21%|██        | 3850/18500 [16:56:10<63:29:41, 15.60s/it]

training loss: 0.9393740892410278


training:  21%|██        | 3851/18500 [16:56:25<63:29:48, 15.60s/it]

training loss: 1.0189518928527832


training:  21%|██        | 3852/18500 [16:56:41<63:30:30, 15.61s/it]

training loss: 1.0513901710510254


training:  21%|██        | 3853/18500 [16:56:57<63:29:56, 15.61s/it]

training loss: 0.9111201167106628


training:  21%|██        | 3854/18500 [16:57:12<63:28:47, 15.60s/it]

training loss: 0.5273393392562866


training:  21%|██        | 3855/18500 [16:57:28<63:29:08, 15.61s/it]

training loss: 0.8199381232261658


training:  21%|██        | 3856/18500 [16:57:43<63:28:28, 15.60s/it]

training loss: 0.29703590273857117


training:  21%|██        | 3857/18500 [16:57:59<63:27:44, 15.60s/it]

training loss: 1.1776000261306763


training:  21%|██        | 3858/18500 [16:58:15<63:27:08, 15.60s/it]

training loss: 0.6524302959442139


training:  21%|██        | 3859/18500 [16:58:30<63:27:45, 15.60s/it]

training loss: 0.7395797967910767


training:  21%|██        | 3860/18500 [16:58:46<63:27:35, 15.60s/it]

training loss: 0.7236840128898621


training:  21%|██        | 3861/18500 [16:59:01<63:26:43, 15.60s/it]

training loss: 0.5433424711227417


training:  21%|██        | 3862/18500 [16:59:17<63:26:17, 15.60s/it]

training loss: 0.6585646867752075


training:  21%|██        | 3863/18500 [16:59:33<63:27:37, 15.61s/it]

training loss: 0.95465087890625


training:  21%|██        | 3864/18500 [16:59:48<63:26:48, 15.61s/it]

training loss: 0.8810955882072449


training:  21%|██        | 3865/18500 [17:00:04<63:26:04, 15.60s/it]

training loss: 0.6966096758842468


training:  21%|██        | 3866/18500 [17:00:20<63:26:05, 15.61s/it]

training loss: 0.9989016056060791


training:  21%|██        | 3867/18500 [17:00:35<63:26:11, 15.61s/it]

training loss: 0.7852518558502197


training:  21%|██        | 3868/18500 [17:00:51<63:25:56, 15.61s/it]

training loss: 0.8500880002975464


training:  21%|██        | 3869/18500 [17:01:06<63:25:35, 15.61s/it]

training loss: 0.7747451066970825


training:  21%|██        | 3870/18500 [17:01:22<63:24:54, 15.60s/it]

training loss: 0.8987215757369995


training:  21%|██        | 3871/18500 [17:01:38<63:25:23, 15.61s/it]

training loss: 1.2047696113586426


training:  21%|██        | 3872/18500 [17:01:53<63:23:48, 15.60s/it]

training loss: 0.9339204430580139


training:  21%|██        | 3873/18500 [17:02:09<63:23:02, 15.60s/it]

training loss: 0.7080088257789612


training:  21%|██        | 3874/18500 [17:02:24<63:23:14, 15.60s/it]

training loss: 0.7851519584655762


training:  21%|██        | 3875/18500 [17:02:40<63:23:16, 15.60s/it]

training loss: 0.5496305823326111


training:  21%|██        | 3876/18500 [17:02:56<63:23:38, 15.61s/it]

training loss: 0.8155224323272705


training:  21%|██        | 3877/18500 [17:03:11<63:21:54, 15.60s/it]

training loss: 0.8256915807723999


training:  21%|██        | 3878/18500 [17:03:27<63:22:17, 15.60s/it]

training loss: 0.7335009574890137


training:  21%|██        | 3879/18500 [17:03:42<63:21:43, 15.60s/it]

training loss: 0.9613293409347534


training:  21%|██        | 3880/18500 [17:03:58<63:21:26, 15.60s/it]

training loss: 0.6865047216415405


training:  21%|██        | 3881/18500 [17:04:14<63:19:51, 15.60s/it]

training loss: 0.9363625049591064


training:  21%|██        | 3882/18500 [17:04:29<63:20:43, 15.60s/it]

training loss: 0.5475086569786072


training:  21%|██        | 3883/18500 [17:04:45<63:20:08, 15.60s/it]

training loss: 0.8782140016555786


training:  21%|██        | 3884/18500 [17:05:00<63:20:00, 15.60s/it]

training loss: 0.956372857093811


training:  21%|██        | 3885/18500 [17:05:16<63:18:30, 15.59s/it]

training loss: 0.5085940361022949


training:  21%|██        | 3886/18500 [17:05:32<63:18:31, 15.60s/it]

training loss: 0.8186894655227661


training:  21%|██        | 3887/18500 [17:05:47<63:19:13, 15.60s/it]

training loss: 1.2038884162902832


training:  21%|██        | 3888/18500 [17:06:03<63:18:45, 15.60s/it]

training loss: 0.876522958278656


training:  21%|██        | 3889/18500 [17:06:18<63:19:44, 15.60s/it]

training loss: 0.7351397275924683


training:  21%|██        | 3890/18500 [17:06:34<63:19:19, 15.60s/it]

training loss: 1.147128701210022


training:  21%|██        | 3891/18500 [17:06:50<63:18:01, 15.60s/it]

training loss: 0.526199221611023


training:  21%|██        | 3892/18500 [17:07:05<63:17:31, 15.60s/it]

training loss: 0.7229187488555908


training:  21%|██        | 3893/18500 [17:07:21<63:17:09, 15.60s/it]

training loss: 0.47603678703308105


training:  21%|██        | 3894/18500 [17:07:36<63:16:48, 15.60s/it]

training loss: 0.8306097984313965


training:  21%|██        | 3895/18500 [17:07:52<63:16:49, 15.60s/it]

training loss: 0.8742932677268982


training:  21%|██        | 3896/18500 [17:08:08<63:16:19, 15.60s/it]

training loss: 0.9851502180099487


training:  21%|██        | 3897/18500 [17:08:23<63:15:56, 15.60s/it]

training loss: 0.9336764216423035


training:  21%|██        | 3898/18500 [17:08:39<63:15:50, 15.60s/it]

training loss: 0.9955428838729858


training:  21%|██        | 3899/18500 [17:08:54<63:16:05, 15.60s/it]

training loss: 0.8830381035804749


training:  21%|██        | 3900/18500 [17:09:10<63:15:51, 15.60s/it]

training loss: 0.7397245168685913
training loss: 0.9526305794715881


training:  21%|██        | 3901/18500 [17:09:27<65:00:39, 16.03s/it]

validation loss: 1.501994252204895


training:  21%|██        | 3902/18500 [17:09:43<64:30:05, 15.91s/it]

training loss: 0.6869415640830994


training:  21%|██        | 3903/18500 [17:09:58<64:07:52, 15.82s/it]

training loss: 0.9913496375083923


training:  21%|██        | 3904/18500 [17:10:14<63:51:07, 15.75s/it]

training loss: 0.642431378364563


training:  21%|██        | 3905/18500 [17:10:29<63:41:42, 15.71s/it]

training loss: 0.387959748506546


training:  21%|██        | 3906/18500 [17:10:45<63:33:32, 15.68s/it]

training loss: 1.093574047088623


training:  21%|██        | 3907/18500 [17:11:01<63:28:03, 15.66s/it]

training loss: 0.9682009220123291


training:  21%|██        | 3908/18500 [17:11:16<63:23:16, 15.64s/it]

training loss: 1.2038745880126953


training:  21%|██        | 3909/18500 [17:11:32<63:22:03, 15.63s/it]

training loss: 0.9277696013450623


training:  21%|██        | 3910/18500 [17:11:47<63:19:19, 15.62s/it]

training loss: 1.1008598804473877


training:  21%|██        | 3911/18500 [17:12:03<63:15:51, 15.61s/it]

training loss: 1.0424270629882812


training:  21%|██        | 3912/18500 [17:12:19<63:14:21, 15.61s/it]

training loss: 0.797258734703064


training:  21%|██        | 3913/18500 [17:12:34<63:13:20, 15.60s/it]

training loss: 1.2470141649246216


training:  21%|██        | 3914/18500 [17:12:50<63:12:14, 15.60s/it]

training loss: 0.5716156363487244


training:  21%|██        | 3915/18500 [17:13:05<63:12:06, 15.60s/it]

training loss: 0.7057854533195496


training:  21%|██        | 3916/18500 [17:13:21<63:12:16, 15.60s/it]

training loss: 0.5348044633865356


training:  21%|██        | 3917/18500 [17:13:37<63:12:01, 15.60s/it]

training loss: 0.6792293190956116


training:  21%|██        | 3918/18500 [17:13:52<63:10:52, 15.60s/it]

training loss: 0.7584639191627502


training:  21%|██        | 3919/18500 [17:14:08<63:09:52, 15.60s/it]

training loss: 0.6832389235496521


training:  21%|██        | 3920/18500 [17:14:23<63:09:56, 15.60s/it]

training loss: 1.245646595954895


training:  21%|██        | 3921/18500 [17:14:39<63:09:48, 15.60s/it]

training loss: 0.7393043041229248


training:  21%|██        | 3922/18500 [17:14:55<63:11:08, 15.60s/it]

training loss: 0.7709975838661194


training:  21%|██        | 3923/18500 [17:15:10<63:10:40, 15.60s/it]

training loss: 0.9283117055892944


training:  21%|██        | 3924/18500 [17:15:26<63:11:37, 15.61s/it]

training loss: 0.8777554631233215


training:  21%|██        | 3925/18500 [17:15:41<63:12:10, 15.61s/it]

training loss: 0.7148013710975647


training:  21%|██        | 3926/18500 [17:15:57<63:11:54, 15.61s/it]

training loss: 0.9475873708724976


training:  21%|██        | 3927/18500 [17:16:13<63:10:40, 15.61s/it]

training loss: 0.3086005747318268


training:  21%|██        | 3928/18500 [17:16:28<63:11:28, 15.61s/it]

training loss: 0.8866280317306519


training:  21%|██        | 3929/18500 [17:16:44<63:12:43, 15.62s/it]

training loss: 0.910516083240509


training:  21%|██        | 3930/18500 [17:17:00<63:11:57, 15.62s/it]

training loss: 0.8128594756126404


training:  21%|██        | 3931/18500 [17:17:15<63:17:25, 15.64s/it]

training loss: 0.3639887869358063


training:  21%|██▏       | 3932/18500 [17:17:31<63:23:13, 15.66s/it]

training loss: 0.547235369682312


training:  21%|██▏       | 3933/18500 [17:17:47<63:26:02, 15.68s/it]

training loss: 0.5353718400001526


training:  21%|██▏       | 3934/18500 [17:18:02<63:28:05, 15.69s/it]

training loss: 0.72083979845047


training:  21%|██▏       | 3935/18500 [17:18:18<63:29:14, 15.69s/it]

training loss: 0.8742119073867798


training:  21%|██▏       | 3936/18500 [17:18:34<63:31:18, 15.70s/it]

training loss: 0.6599442362785339


training:  21%|██▏       | 3937/18500 [17:18:49<63:31:26, 15.70s/it]

training loss: 0.6739232540130615


training:  21%|██▏       | 3938/18500 [17:19:05<63:31:04, 15.70s/it]

training loss: 0.6596449613571167


training:  21%|██▏       | 3939/18500 [17:19:21<63:30:13, 15.70s/it]

training loss: 0.348667711019516


training:  21%|██▏       | 3940/18500 [17:19:37<63:27:26, 15.69s/it]

training loss: 0.8195104598999023


training:  21%|██▏       | 3941/18500 [17:19:52<63:23:29, 15.67s/it]

training loss: 0.5015518069267273


training:  21%|██▏       | 3942/18500 [17:20:08<63:19:31, 15.66s/it]

training loss: 0.8299686312675476


training:  21%|██▏       | 3943/18500 [17:20:23<63:15:15, 15.64s/it]

training loss: 0.6711094379425049


training:  21%|██▏       | 3944/18500 [17:20:39<63:11:51, 15.63s/it]

training loss: 1.1571604013442993


training:  21%|██▏       | 3945/18500 [17:20:55<63:10:37, 15.63s/it]

training loss: 0.6887321472167969


training:  21%|██▏       | 3946/18500 [17:21:10<63:07:39, 15.61s/it]

training loss: 1.2142043113708496


training:  21%|██▏       | 3947/18500 [17:21:26<63:06:31, 15.61s/it]

training loss: 0.5460590124130249


training:  21%|██▏       | 3948/18500 [17:21:41<63:06:14, 15.61s/it]

training loss: 1.0328598022460938


training:  21%|██▏       | 3949/18500 [17:21:57<63:04:42, 15.61s/it]

training loss: 0.8714764714241028


training:  21%|██▏       | 3950/18500 [17:22:13<63:02:12, 15.60s/it]

training loss: 0.9963481426239014


training:  21%|██▏       | 3951/18500 [17:22:28<63:02:09, 15.60s/it]

training loss: 0.6849004626274109


training:  21%|██▏       | 3952/18500 [17:22:44<63:01:29, 15.60s/it]

training loss: 0.932583749294281


training:  21%|██▏       | 3953/18500 [17:22:59<63:00:40, 15.59s/it]

training loss: 0.7424463033676147


training:  21%|██▏       | 3954/18500 [17:23:15<62:59:10, 15.59s/it]

training loss: 0.8655859231948853


training:  21%|██▏       | 3955/18500 [17:23:31<62:59:08, 15.59s/it]

training loss: 0.7515934109687805


training:  21%|██▏       | 3956/18500 [17:23:46<62:59:05, 15.59s/it]

training loss: 0.8910735249519348


training:  21%|██▏       | 3957/18500 [17:24:02<62:59:11, 15.59s/it]

training loss: 0.6790677905082703


training:  21%|██▏       | 3958/18500 [17:24:17<62:59:05, 15.59s/it]

training loss: 0.9623712301254272


training:  21%|██▏       | 3959/18500 [17:24:33<62:59:38, 15.60s/it]

training loss: 0.9255238771438599


training:  21%|██▏       | 3960/18500 [17:24:49<62:58:44, 15.59s/it]

training loss: 0.7515560388565063


training:  21%|██▏       | 3961/18500 [17:25:04<62:57:56, 15.59s/it]

training loss: 0.7606221437454224


training:  21%|██▏       | 3962/18500 [17:25:20<62:58:35, 15.59s/it]

training loss: 0.5934996604919434


training:  21%|██▏       | 3963/18500 [17:25:35<62:58:27, 15.60s/it]

training loss: 0.9615082740783691


training:  21%|██▏       | 3964/18500 [17:25:51<62:57:18, 15.59s/it]

training loss: 0.8527102470397949


training:  21%|██▏       | 3965/18500 [17:26:06<62:56:33, 15.59s/it]

training loss: 0.7952837944030762


training:  21%|██▏       | 3966/18500 [17:26:22<62:57:07, 15.59s/it]

training loss: 0.44292885065078735


training:  21%|██▏       | 3967/18500 [17:26:38<62:57:25, 15.60s/it]

training loss: 0.7849644422531128


training:  21%|██▏       | 3968/18500 [17:26:53<62:56:38, 15.59s/it]

training loss: 1.0243966579437256


training:  21%|██▏       | 3969/18500 [17:27:09<62:56:52, 15.60s/it]

training loss: 0.6291437149047852


training:  21%|██▏       | 3970/18500 [17:27:24<62:57:09, 15.60s/it]

training loss: 0.8689082264900208


training:  21%|██▏       | 3971/18500 [17:27:40<62:57:13, 15.60s/it]

training loss: 0.713604748249054


training:  21%|██▏       | 3972/18500 [17:27:56<62:57:02, 15.60s/it]

training loss: 0.7650231122970581


training:  21%|██▏       | 3973/18500 [17:28:11<62:55:36, 15.59s/it]

training loss: 0.6144229769706726


training:  21%|██▏       | 3974/18500 [17:28:27<62:55:56, 15.60s/it]

training loss: 0.8580677509307861


training:  21%|██▏       | 3975/18500 [17:28:42<62:55:21, 15.60s/it]

training loss: 0.6443557739257812


training:  21%|██▏       | 3976/18500 [17:28:58<62:55:39, 15.60s/it]

training loss: 0.7268087267875671


training:  21%|██▏       | 3977/18500 [17:29:14<62:53:59, 15.59s/it]

training loss: 0.9639788866043091


training:  22%|██▏       | 3978/18500 [17:29:29<62:54:32, 15.60s/it]

training loss: 0.460689514875412


training:  22%|██▏       | 3979/18500 [17:29:45<62:54:00, 15.59s/it]

training loss: 0.6631414294242859


training:  22%|██▏       | 3980/18500 [17:30:00<62:53:27, 15.59s/it]

training loss: 0.9537373185157776


training:  22%|██▏       | 3981/18500 [17:30:16<62:52:34, 15.59s/it]

training loss: 0.8898116946220398


training:  22%|██▏       | 3982/18500 [17:30:32<62:53:11, 15.59s/it]

training loss: 0.6294968128204346


training:  22%|██▏       | 3983/18500 [17:30:47<62:53:26, 15.60s/it]

training loss: 0.8068824410438538


training:  22%|██▏       | 3984/18500 [17:31:03<62:52:14, 15.59s/it]

training loss: 0.9725474119186401


training:  22%|██▏       | 3985/18500 [17:31:18<62:52:27, 15.59s/it]

training loss: 0.7870559692382812


training:  22%|██▏       | 3986/18500 [17:31:34<62:52:59, 15.60s/it]

training loss: 1.0875225067138672


training:  22%|██▏       | 3987/18500 [17:31:50<62:53:00, 15.60s/it]

training loss: 0.5362606644630432


training:  22%|██▏       | 3988/18500 [17:32:05<62:52:24, 15.60s/it]

training loss: 0.667948305606842


training:  22%|██▏       | 3989/18500 [17:32:21<62:52:47, 15.60s/it]

training loss: 0.769253671169281


training:  22%|██▏       | 3990/18500 [17:32:36<62:51:44, 15.60s/it]

training loss: 0.6805868148803711


training:  22%|██▏       | 3991/18500 [17:32:52<62:50:45, 15.59s/it]

training loss: 0.6993435025215149


training:  22%|██▏       | 3992/18500 [17:33:08<62:49:54, 15.59s/it]

training loss: 0.9804155230522156


training:  22%|██▏       | 3993/18500 [17:33:23<62:49:30, 15.59s/it]

training loss: 0.7840146422386169


training:  22%|██▏       | 3994/18500 [17:33:39<62:49:10, 15.59s/it]

training loss: 0.690467119216919


training:  22%|██▏       | 3995/18500 [17:33:54<62:48:21, 15.59s/it]

training loss: 1.03188955783844


training:  22%|██▏       | 3996/18500 [17:34:10<62:47:03, 15.58s/it]

training loss: 1.066907525062561


training:  22%|██▏       | 3997/18500 [17:34:25<62:47:25, 15.59s/it]

training loss: 0.6599966883659363


training:  22%|██▏       | 3998/18500 [17:34:41<62:47:45, 15.59s/it]

training loss: 0.6615020632743835


training:  22%|██▏       | 3999/18500 [17:34:57<62:47:06, 15.59s/it]

training loss: 0.7083238959312439


training:  22%|██▏       | 4000/18500 [17:35:12<62:45:21, 15.58s/it]

training loss: 0.9909002780914307
training loss: 0.5299426913261414



generating:   0%|          | 0/512 [00:00<?, ?it/s][A

validation loss: 1.474600076675415
zvlastny vysetrovatel Robert Mueller.Skoncil terorizmus? Nie neskoncil, ale skonci, ak budeme jednotni.
Nech ruske lietadlo poslal k zemi ktokolvek, co tym zamyslal? Chcel
zasiahnut turizmus a zasiahnut vztahy s Ruskom, povedal Sisi
v televiznom prihovore.
Ide o prve oficialne vyjadrenie egyptskeho predstavitela, ze za padom
lietadla je umyselny cin. Pri katastrofe z 31. oktobra zahynulo vsetkych
224 osob na palube lietadla. Airbus A321 ruskej spolocnosti Kogalymavia sa
zrutil asi pol hodiny po starte z egyptskeho letoviska Sarm as-Sajch.
Rusko uz v polovici novembra oznamilo, ze pricinou padu lietadla bola
bomba. K utoku sa navyse prihlasil Islamsky stat. Vybusninu vraj na
palubu prepasoval v uzavretej plechovke od nealkoholickeho napoja. Egyptske
urady vsak umysel odmietali potvrdit dovtedy, kym vysetrovanie neprinesie
dokazy.
Moskva po nestasti zastavila civilne lety do Egypta, ktory je
oblubenou destinaciou ruskych turistov.
Priznan


generating:   0%|          | 1/512 [00:00<01:45,  4.83it/s][A
generating:   0%|          | 2/512 [00:00<01:45,  4.82it/s][A
generating:   1%|          | 3/512 [00:00<01:46,  4.78it/s][A
generating:   1%|          | 4/512 [00:00<01:47,  4.73it/s][A
generating:   1%|          | 5/512 [00:01<01:47,  4.74it/s][A
generating:   1%|          | 6/512 [00:01<01:49,  4.63it/s][A
generating:   1%|▏         | 7/512 [00:01<01:48,  4.65it/s][A
generating:   2%|▏         | 8/512 [00:01<01:47,  4.67it/s][A
generating:   2%|▏         | 9/512 [00:01<01:47,  4.69it/s][A
generating:   2%|▏         | 10/512 [00:02<01:46,  4.73it/s][A
generating:   2%|▏         | 11/512 [00:02<01:45,  4.74it/s][A
generating:   2%|▏         | 12/512 [00:02<01:45,  4.73it/s][A
generating:   3%|▎         | 13/512 [00:02<01:49,  4.57it/s][A
generating:   3%|▎         | 14/512 [00:03<01:50,  4.50it/s][A
generating:   3%|▎         | 15/512 [00:03<01:52,  4.44it/s][A
generating:   3%|▎         | 16/512 [00:03<01:52

dla neho uplynulom case manipulatni
poplatnili na to viac spolocnosti. Nielen
dvoj-kilometrov a druhy objavitelnu cenu skor, ktora je podla odbera spojenych statov. Koncern ci slovenskych
ludi, ktore sa v oblasti
krajina, pretoze to povedal, ze krajiny je dlhodobom ukrajiny, urovni, tento rok na ine
temy nasimi najma podstatne cisto ceny
dolarov a potravinarstvo.
Co je najvacsich informovalo, ze v taktory objednavky z klietky. Zaroven slovenskej republiky.
Podla nasich dna, sa moze matky ceny
prote


training:  22%|██▏       | 4002/18500 [17:37:34<156:42:00, 38.91s/it]

training loss: 0.7706559300422668


training:  22%|██▏       | 4003/18500 [17:37:50<128:30:26, 31.91s/it]

training loss: 0.6992375254631042


training:  22%|██▏       | 4004/18500 [17:38:06<108:46:44, 27.01s/it]

training loss: 0.8425360321998596


training:  22%|██▏       | 4005/18500 [17:38:21<94:59:10, 23.59s/it] 

training loss: 0.8471746444702148


training:  22%|██▏       | 4006/18500 [17:38:37<85:19:51, 21.19s/it]

training loss: 0.7280020117759705


training:  22%|██▏       | 4007/18500 [17:38:52<78:32:36, 19.51s/it]

training loss: 0.8056727647781372


training:  22%|██▏       | 4008/18500 [17:39:08<73:47:18, 18.33s/it]

training loss: 0.4400915503501892


training:  22%|██▏       | 4009/18500 [17:39:24<70:28:15, 17.51s/it]

training loss: 0.6769083142280579


training:  22%|██▏       | 4010/18500 [17:39:39<68:09:30, 16.93s/it]

training loss: 0.44009658694267273


training:  22%|██▏       | 4011/18500 [17:39:55<66:32:12, 16.53s/it]

training loss: 0.8912805318832397


training:  22%|██▏       | 4012/18500 [17:40:10<65:22:57, 16.25s/it]

training loss: 0.6987113952636719


training:  22%|██▏       | 4013/18500 [17:40:26<64:35:41, 16.05s/it]

training loss: 0.7581688761711121


training:  22%|██▏       | 4014/18500 [17:40:42<64:02:21, 15.91s/it]

training loss: 0.9873442649841309


training:  22%|██▏       | 4015/18500 [17:40:57<63:39:00, 15.82s/it]

training loss: 1.1510573625564575


training:  22%|██▏       | 4016/18500 [17:41:13<63:21:49, 15.75s/it]

training loss: 0.4813474714756012


training:  22%|██▏       | 4017/18500 [17:41:28<63:11:24, 15.71s/it]

training loss: 0.7999058961868286


training:  22%|██▏       | 4018/18500 [17:41:44<63:03:13, 15.67s/it]

training loss: 0.6785569787025452


training:  22%|██▏       | 4019/18500 [17:42:00<62:56:46, 15.65s/it]

training loss: 0.796468198299408


training:  22%|██▏       | 4020/18500 [17:42:15<62:51:22, 15.63s/it]

training loss: 0.6719547510147095


training:  22%|██▏       | 4021/18500 [17:42:31<62:49:58, 15.62s/it]

training loss: 0.7991111278533936


training:  22%|██▏       | 4022/18500 [17:42:46<62:47:10, 15.61s/it]

training loss: 0.6101335883140564


training:  22%|██▏       | 4023/18500 [17:43:02<62:44:56, 15.60s/it]

training loss: 0.7816538214683533


training:  22%|██▏       | 4024/18500 [17:43:17<62:43:27, 15.60s/it]

training loss: 0.5055616497993469


training:  22%|██▏       | 4025/18500 [17:43:33<62:43:52, 15.60s/it]

training loss: 0.7573473453521729


training:  22%|██▏       | 4026/18500 [17:43:49<62:42:17, 15.60s/it]

training loss: 0.7559712529182434


training:  22%|██▏       | 4027/18500 [17:44:04<62:40:27, 15.59s/it]

training loss: 0.9376137256622314


training:  22%|██▏       | 4028/18500 [17:44:20<62:40:10, 15.59s/it]

training loss: 0.8830709457397461


training:  22%|██▏       | 4029/18500 [17:44:35<62:39:54, 15.59s/it]

training loss: 0.6935310363769531


training:  22%|██▏       | 4030/18500 [17:44:51<62:39:36, 15.59s/it]

training loss: 1.159543752670288


training:  22%|██▏       | 4031/18500 [17:45:07<62:38:59, 15.59s/it]

training loss: 0.8796631693840027


training:  22%|██▏       | 4032/18500 [17:45:22<62:41:15, 15.60s/it]

training loss: 0.7288402915000916


training:  22%|██▏       | 4033/18500 [17:45:38<62:46:58, 15.62s/it]

training loss: 0.580564022064209


training:  22%|██▏       | 4034/18500 [17:45:54<62:49:49, 15.64s/it]

training loss: 0.7250849604606628


training:  22%|██▏       | 4035/18500 [17:46:09<62:52:32, 15.65s/it]

training loss: 0.6155856847763062


training:  22%|██▏       | 4036/18500 [17:46:25<63:03:14, 15.69s/it]

training loss: 0.5838434100151062


training:  22%|██▏       | 4037/18500 [17:46:41<63:01:54, 15.69s/it]

training loss: 0.811913788318634


training:  22%|██▏       | 4038/18500 [17:46:56<63:00:19, 15.68s/it]

training loss: 0.7938045263290405


training:  22%|██▏       | 4039/18500 [17:47:12<62:57:48, 15.67s/it]

training loss: 0.4450483024120331


training:  22%|██▏       | 4040/18500 [17:47:28<62:58:32, 15.68s/it]

training loss: 1.06393563747406


training:  22%|██▏       | 4041/18500 [17:47:43<62:58:43, 15.68s/it]

training loss: 1.0794551372528076


training:  22%|██▏       | 4042/18500 [17:47:59<62:57:35, 15.68s/it]

training loss: 1.0423222780227661


training:  22%|██▏       | 4043/18500 [17:48:15<62:55:49, 15.67s/it]

training loss: 0.8037095069885254


training:  22%|██▏       | 4044/18500 [17:48:30<62:56:30, 15.67s/it]

training loss: 0.7901683449745178


training:  22%|██▏       | 4045/18500 [17:48:46<62:52:09, 15.66s/it]

training loss: 0.9065468311309814


training:  22%|██▏       | 4046/18500 [17:49:02<62:48:23, 15.64s/it]

training loss: 0.6498304009437561


training:  22%|██▏       | 4047/18500 [17:49:17<62:44:20, 15.63s/it]

training loss: 0.6061553955078125


training:  22%|██▏       | 4048/18500 [17:49:33<62:42:11, 15.62s/it]

training loss: 0.8974688053131104


training:  22%|██▏       | 4049/18500 [17:49:48<62:39:19, 15.61s/it]

training loss: 0.48870667815208435


training:  22%|██▏       | 4050/18500 [17:50:04<62:36:59, 15.60s/it]

training loss: 0.7082431316375732


training:  22%|██▏       | 4051/18500 [17:50:20<62:35:55, 15.60s/it]

training loss: 0.31792396306991577


training:  22%|██▏       | 4052/18500 [17:50:35<62:35:44, 15.60s/it]

training loss: 0.8333518505096436


training:  22%|██▏       | 4053/18500 [17:50:51<62:35:00, 15.59s/it]

training loss: 0.9648630619049072


training:  22%|██▏       | 4054/18500 [17:51:06<62:33:04, 15.59s/it]

training loss: 0.5840636491775513


training:  22%|██▏       | 4055/18500 [17:51:22<62:33:13, 15.59s/it]

training loss: 0.7667199969291687


training:  22%|██▏       | 4056/18500 [17:51:38<62:33:38, 15.59s/it]

training loss: 0.8288664221763611


training:  22%|██▏       | 4057/18500 [17:51:53<62:32:21, 15.59s/it]

training loss: 0.35133469104766846


training:  22%|██▏       | 4058/18500 [17:52:09<62:31:36, 15.59s/it]

training loss: 0.6644942760467529


training:  22%|██▏       | 4059/18500 [17:52:24<62:32:04, 15.59s/it]

training loss: 1.1755948066711426


training:  22%|██▏       | 4060/18500 [17:52:40<62:32:15, 15.59s/it]

training loss: 1.083412528038025


training:  22%|██▏       | 4061/18500 [17:52:55<62:32:36, 15.59s/it]

training loss: 1.1125588417053223


training:  22%|██▏       | 4062/18500 [17:53:11<62:30:50, 15.59s/it]

training loss: 0.8156546354293823


training:  22%|██▏       | 4063/18500 [17:53:27<62:31:09, 15.59s/it]

training loss: 0.7144702672958374


training:  22%|██▏       | 4064/18500 [17:53:42<62:30:22, 15.59s/it]

training loss: 0.6004160642623901


training:  22%|██▏       | 4065/18500 [17:53:58<62:29:55, 15.59s/it]

training loss: 1.0268399715423584


training:  22%|██▏       | 4066/18500 [17:54:13<62:28:34, 15.58s/it]

training loss: 1.4738304615020752


training:  22%|██▏       | 4067/18500 [17:54:29<62:30:02, 15.59s/it]

training loss: 1.2054036855697632


training:  22%|██▏       | 4068/18500 [17:54:45<62:29:12, 15.59s/it]

training loss: 0.7849145531654358


training:  22%|██▏       | 4069/18500 [17:55:00<62:28:46, 15.59s/it]

training loss: 0.8947104811668396


training:  22%|██▏       | 4070/18500 [17:55:16<62:28:04, 15.58s/it]

training loss: 0.653732180595398


training:  22%|██▏       | 4071/18500 [17:55:31<62:27:55, 15.58s/it]

training loss: 1.0139775276184082


training:  22%|██▏       | 4072/18500 [17:55:47<62:28:04, 15.59s/it]

training loss: 0.9964079856872559


training:  22%|██▏       | 4073/18500 [17:56:03<62:27:37, 15.59s/it]

training loss: 0.5345326662063599


training:  22%|██▏       | 4074/18500 [17:56:18<62:27:16, 15.59s/it]

training loss: 0.7787502408027649


training:  22%|██▏       | 4075/18500 [17:56:34<62:27:59, 15.59s/it]

training loss: 0.4621124267578125


training:  22%|██▏       | 4076/18500 [17:56:49<62:26:44, 15.59s/it]

training loss: 0.7976664304733276


training:  22%|██▏       | 4077/18500 [17:57:05<62:25:52, 15.58s/it]

training loss: 1.0428413152694702


training:  22%|██▏       | 4078/18500 [17:57:20<62:25:52, 15.58s/it]

training loss: 1.0484397411346436


training:  22%|██▏       | 4079/18500 [17:57:36<62:26:03, 15.59s/it]

training loss: 1.1081926822662354


training:  22%|██▏       | 4080/18500 [17:57:52<62:24:55, 15.58s/it]

training loss: 0.4117584228515625


training:  22%|██▏       | 4081/18500 [17:58:07<62:24:46, 15.58s/it]

training loss: 0.7080806493759155


training:  22%|██▏       | 4082/18500 [17:58:23<62:24:23, 15.58s/it]

training loss: 0.6384181976318359


training:  22%|██▏       | 4083/18500 [17:58:38<62:24:08, 15.58s/it]

training loss: 0.7587454319000244


training:  22%|██▏       | 4084/18500 [17:58:54<62:23:44, 15.58s/it]

training loss: 0.6959413886070251


training:  22%|██▏       | 4085/18500 [17:59:09<62:22:25, 15.58s/it]

training loss: 0.9449288845062256


training:  22%|██▏       | 4086/18500 [17:59:25<62:23:06, 15.58s/it]

training loss: 0.7714235782623291


training:  22%|██▏       | 4087/18500 [17:59:41<62:22:58, 15.58s/it]

training loss: 0.9705179333686829


training:  22%|██▏       | 4088/18500 [17:59:56<62:22:59, 15.58s/it]

training loss: 0.5737661719322205


training:  22%|██▏       | 4089/18500 [18:00:12<62:21:52, 15.58s/it]

training loss: 0.9542564749717712


training:  22%|██▏       | 4090/18500 [18:00:27<62:22:09, 15.58s/it]

training loss: 1.077157974243164


training:  22%|██▏       | 4091/18500 [18:00:43<62:21:28, 15.58s/it]

training loss: 0.9504485726356506


training:  22%|██▏       | 4092/18500 [18:00:59<62:21:06, 15.58s/it]

training loss: 0.9982864260673523


training:  22%|██▏       | 4093/18500 [18:01:14<62:19:48, 15.57s/it]

training loss: 1.1084609031677246


training:  22%|██▏       | 4094/18500 [18:01:30<62:21:25, 15.58s/it]

training loss: 0.914745569229126


training:  22%|██▏       | 4095/18500 [18:01:45<62:20:58, 15.58s/it]

training loss: 1.1570665836334229


training:  22%|██▏       | 4096/18500 [18:02:01<62:20:02, 15.58s/it]

training loss: 0.5593783855438232


training:  22%|██▏       | 4097/18500 [18:02:16<62:19:21, 15.58s/it]

training loss: 0.9245479106903076


training:  22%|██▏       | 4098/18500 [18:02:32<62:20:11, 15.58s/it]

training loss: 0.8834530115127563


training:  22%|██▏       | 4099/18500 [18:02:48<62:20:06, 15.58s/it]

training loss: 1.0392272472381592


training:  22%|██▏       | 4100/18500 [18:03:03<62:24:03, 15.60s/it]

training loss: 0.7966155409812927
training loss: 0.8366913795471191


training:  22%|██▏       | 4101/18500 [18:03:20<64:03:00, 16.01s/it]

validation loss: 1.4597405195236206


training:  22%|██▏       | 4102/18500 [18:03:36<63:32:23, 15.89s/it]

training loss: 1.1483681201934814


training:  22%|██▏       | 4103/18500 [18:03:51<63:09:42, 15.79s/it]

training loss: 1.0809252262115479


training:  22%|██▏       | 4104/18500 [18:04:07<62:54:34, 15.73s/it]

training loss: 0.30422765016555786


training:  22%|██▏       | 4105/18500 [18:04:23<62:43:47, 15.69s/it]

training loss: 0.7404918670654297


training:  22%|██▏       | 4106/18500 [18:04:38<62:34:50, 15.65s/it]

training loss: 0.8819319009780884


training:  22%|██▏       | 4107/18500 [18:04:54<62:29:44, 15.63s/it]

training loss: 0.8880342245101929


training:  22%|██▏       | 4108/18500 [18:05:09<62:26:19, 15.62s/it]

training loss: 0.7376683354377747


training:  22%|██▏       | 4109/18500 [18:05:25<62:23:41, 15.61s/it]

training loss: 0.4029381275177002


training:  22%|██▏       | 4110/18500 [18:05:40<62:20:50, 15.60s/it]

training loss: 0.5323711037635803


training:  22%|██▏       | 4111/18500 [18:05:56<62:19:29, 15.59s/it]

training loss: 0.34196072816848755


training:  22%|██▏       | 4112/18500 [18:06:12<62:17:46, 15.59s/it]

training loss: 1.1266361474990845


training:  22%|██▏       | 4113/18500 [18:06:27<62:16:35, 15.58s/it]

training loss: 1.1364754438400269


training:  22%|██▏       | 4114/18500 [18:06:43<62:16:49, 15.59s/it]

training loss: 0.6934646964073181


training:  22%|██▏       | 4115/18500 [18:06:58<62:16:17, 15.58s/it]

training loss: 0.6167710423469543


training:  22%|██▏       | 4116/18500 [18:07:14<62:15:48, 15.58s/it]

training loss: 0.4791489839553833


training:  22%|██▏       | 4117/18500 [18:07:30<62:16:11, 15.59s/it]

training loss: 0.9362566471099854


training:  22%|██▏       | 4118/18500 [18:07:45<62:15:59, 15.59s/it]

training loss: 1.0949804782867432


training:  22%|██▏       | 4119/18500 [18:08:01<62:15:02, 15.58s/it]

training loss: 0.8427324295043945


training:  22%|██▏       | 4120/18500 [18:08:16<62:13:37, 15.58s/it]

training loss: 0.5924571752548218


training:  22%|██▏       | 4121/18500 [18:08:32<62:14:19, 15.58s/it]

training loss: 0.8062769174575806


training:  22%|██▏       | 4122/18500 [18:08:47<62:15:27, 15.59s/it]

training loss: 0.6285887360572815


training:  22%|██▏       | 4123/18500 [18:09:03<62:13:55, 15.58s/it]

training loss: 0.611721396446228


training:  22%|██▏       | 4124/18500 [18:09:19<62:14:08, 15.58s/it]

training loss: 0.9724750518798828


training:  22%|██▏       | 4125/18500 [18:09:34<62:12:25, 15.58s/it]

training loss: 0.7931159734725952


training:  22%|██▏       | 4126/18500 [18:09:50<62:12:21, 15.58s/it]

training loss: 0.6773744821548462


training:  22%|██▏       | 4127/18500 [18:10:05<62:12:14, 15.58s/it]

training loss: 0.863940417766571


training:  22%|██▏       | 4128/18500 [18:10:21<62:12:29, 15.58s/it]

training loss: 1.1395710706710815


training:  22%|██▏       | 4129/18500 [18:10:37<62:11:40, 15.58s/it]

training loss: 0.7926915884017944


training:  22%|██▏       | 4130/18500 [18:10:52<62:11:10, 15.58s/it]

training loss: 0.6030827164649963


training:  22%|██▏       | 4131/18500 [18:11:08<62:10:16, 15.58s/it]

training loss: 0.5024364590644836


training:  22%|██▏       | 4132/18500 [18:11:23<62:10:06, 15.58s/it]

training loss: 0.8979838490486145


training:  22%|██▏       | 4133/18500 [18:11:39<62:09:39, 15.58s/it]

training loss: 0.8776261210441589


training:  22%|██▏       | 4134/18500 [18:11:54<62:10:45, 15.58s/it]

training loss: 0.7467896938323975


training:  22%|██▏       | 4135/18500 [18:12:10<62:09:44, 15.58s/it]

training loss: 0.6888046264648438


training:  22%|██▏       | 4136/18500 [18:12:26<62:09:28, 15.58s/it]

training loss: 0.8403272032737732


training:  22%|██▏       | 4137/18500 [18:12:41<62:09:07, 15.58s/it]

training loss: 0.7687438726425171


training:  22%|██▏       | 4138/18500 [18:12:57<62:09:16, 15.58s/it]

training loss: 0.9495837092399597


training:  22%|██▏       | 4139/18500 [18:13:12<62:08:00, 15.58s/it]

training loss: 0.6689352989196777


training:  22%|██▏       | 4140/18500 [18:13:28<62:07:52, 15.58s/it]

training loss: 1.0651535987854004


training:  22%|██▏       | 4141/18500 [18:13:43<62:09:37, 15.58s/it]

training loss: 0.6264102458953857


training:  22%|██▏       | 4142/18500 [18:13:59<62:08:28, 15.58s/it]

training loss: 0.9851425886154175


training:  22%|██▏       | 4143/18500 [18:14:15<62:07:08, 15.58s/it]

training loss: 0.4259048402309418


training:  22%|██▏       | 4144/18500 [18:14:30<62:06:41, 15.58s/it]

training loss: 1.0420266389846802


training:  22%|██▏       | 4145/18500 [18:14:46<62:06:33, 15.58s/it]

training loss: 0.6198698282241821


training:  22%|██▏       | 4146/18500 [18:15:01<62:07:28, 15.58s/it]

training loss: 0.9216759204864502


training:  22%|██▏       | 4147/18500 [18:15:17<62:07:12, 15.58s/it]

training loss: 0.679746150970459


training:  22%|██▏       | 4148/18500 [18:15:33<62:08:29, 15.59s/it]

training loss: 0.9971311092376709


training:  22%|██▏       | 4149/18500 [18:15:48<62:08:58, 15.59s/it]

training loss: 0.7148755788803101


training:  22%|██▏       | 4150/18500 [18:16:04<62:07:25, 15.59s/it]

training loss: 0.30220961570739746


training:  22%|██▏       | 4151/18500 [18:16:19<62:06:37, 15.58s/it]

training loss: 0.8307140469551086


training:  22%|██▏       | 4152/18500 [18:16:35<62:05:45, 15.58s/it]

training loss: 0.9777757525444031


training:  22%|██▏       | 4153/18500 [18:16:50<62:06:36, 15.58s/it]

training loss: 0.8960739374160767


training:  22%|██▏       | 4154/18500 [18:17:06<62:06:56, 15.59s/it]

training loss: 0.8790796399116516


training:  22%|██▏       | 4155/18500 [18:17:22<62:13:58, 15.62s/it]

training loss: 0.5578892230987549


training:  22%|██▏       | 4156/18500 [18:17:37<62:17:35, 15.63s/it]

training loss: 0.8345416784286499


training:  22%|██▏       | 4157/18500 [18:17:53<62:20:33, 15.65s/it]

training loss: 0.8104475736618042


training:  22%|██▏       | 4158/18500 [18:18:09<62:22:24, 15.66s/it]

training loss: 0.9621543884277344


training:  22%|██▏       | 4159/18500 [18:18:24<62:24:48, 15.67s/it]

training loss: 0.7261573672294617


training:  22%|██▏       | 4160/18500 [18:18:40<62:26:56, 15.68s/it]

training loss: 1.3709869384765625


training:  22%|██▏       | 4161/18500 [18:18:56<62:29:36, 15.69s/it]

training loss: 0.5720369219779968


training:  22%|██▏       | 4162/18500 [18:19:12<62:26:41, 15.68s/it]

training loss: 0.717835545539856


training:  23%|██▎       | 4163/18500 [18:19:27<62:26:42, 15.68s/it]

training loss: 0.919503390789032


training:  23%|██▎       | 4164/18500 [18:19:43<62:22:14, 15.66s/it]

training loss: 0.9336076974868774


training:  23%|██▎       | 4165/18500 [18:19:58<62:19:35, 15.65s/it]

training loss: 0.47348088026046753


training:  23%|██▎       | 4166/18500 [18:20:14<62:14:19, 15.63s/it]

training loss: 0.9879573583602905


training:  23%|██▎       | 4167/18500 [18:20:30<62:09:20, 15.61s/it]

training loss: 0.9172840714454651


training:  23%|██▎       | 4168/18500 [18:20:45<62:06:48, 15.60s/it]

training loss: 0.6355084776878357


training:  23%|██▎       | 4169/18500 [18:21:01<62:05:18, 15.60s/it]

training loss: 1.0330814123153687


training:  23%|██▎       | 4170/18500 [18:21:16<62:02:48, 15.59s/it]

training loss: 0.9808580279350281


training:  23%|██▎       | 4171/18500 [18:21:32<62:02:09, 15.59s/it]

training loss: 0.9074289798736572


training:  23%|██▎       | 4172/18500 [18:21:48<62:02:24, 15.59s/it]

training loss: 1.0135743618011475


training:  23%|██▎       | 4173/18500 [18:22:03<62:00:41, 15.58s/it]

training loss: 0.6354975700378418


training:  23%|██▎       | 4174/18500 [18:22:19<62:00:22, 15.58s/it]

training loss: 0.6873161196708679


training:  23%|██▎       | 4175/18500 [18:22:34<61:58:45, 15.58s/it]

training loss: 0.8700075745582581


training:  23%|██▎       | 4176/18500 [18:22:50<61:59:02, 15.58s/it]

training loss: 1.0882761478424072


training:  23%|██▎       | 4177/18500 [18:23:05<61:58:56, 15.58s/it]

training loss: 0.9797306656837463


training:  23%|██▎       | 4178/18500 [18:23:21<61:59:00, 15.58s/it]

training loss: 0.8045667409896851


training:  23%|██▎       | 4179/18500 [18:23:37<61:57:48, 15.58s/it]

training loss: 0.8737547993659973


training:  23%|██▎       | 4180/18500 [18:23:52<61:57:37, 15.58s/it]

training loss: 0.8768472671508789


training:  23%|██▎       | 4181/18500 [18:24:08<61:57:08, 15.58s/it]

training loss: 0.4903240501880646


training:  23%|██▎       | 4182/18500 [18:24:23<61:56:50, 15.58s/it]

training loss: 0.4244410991668701


training:  23%|██▎       | 4183/18500 [18:24:39<61:56:10, 15.57s/it]

training loss: 1.047814130783081


training:  23%|██▎       | 4184/18500 [18:24:54<61:57:01, 15.58s/it]

training loss: 0.3960372805595398


training:  23%|██▎       | 4185/18500 [18:25:10<61:55:39, 15.57s/it]

training loss: 0.772605836391449


training:  23%|██▎       | 4186/18500 [18:25:26<61:55:40, 15.58s/it]

training loss: 0.7700245976448059


training:  23%|██▎       | 4187/18500 [18:25:41<61:55:28, 15.58s/it]

training loss: 0.9611074924468994


training:  23%|██▎       | 4188/18500 [18:25:57<61:54:52, 15.57s/it]

training loss: 0.67686527967453


training:  23%|██▎       | 4189/18500 [18:26:12<61:53:20, 15.57s/it]

training loss: 0.9763002395629883


training:  23%|██▎       | 4190/18500 [18:26:28<61:53:21, 15.57s/it]

training loss: 0.9008364677429199


training:  23%|██▎       | 4191/18500 [18:26:43<61:54:22, 15.57s/it]

training loss: 0.6859762072563171


training:  23%|██▎       | 4192/18500 [18:26:59<61:54:14, 15.58s/it]

training loss: 0.991187572479248


training:  23%|██▎       | 4193/18500 [18:27:15<61:53:13, 15.57s/it]

training loss: 0.9132927656173706


training:  23%|██▎       | 4194/18500 [18:27:30<61:53:04, 15.57s/it]

training loss: 0.8938978910446167


training:  23%|██▎       | 4195/18500 [18:27:46<61:54:04, 15.58s/it]

training loss: 0.7928852438926697


training:  23%|██▎       | 4196/18500 [18:28:01<61:54:08, 15.58s/it]

training loss: 0.6227152347564697


training:  23%|██▎       | 4197/18500 [18:28:17<61:53:00, 15.58s/it]

training loss: 0.6769031882286072


training:  23%|██▎       | 4198/18500 [18:28:32<61:53:15, 15.58s/it]

training loss: 0.9952025413513184


training:  23%|██▎       | 4199/18500 [18:28:48<61:53:22, 15.58s/it]

training loss: 0.6947987079620361


training:  23%|██▎       | 4200/18500 [18:29:04<61:52:30, 15.58s/it]

training loss: 0.4205872714519501
training loss: 0.5912899374961853


training:  23%|██▎       | 4201/18500 [18:29:21<63:32:18, 16.00s/it]

validation loss: 1.5352742671966553


training:  23%|██▎       | 4202/18500 [18:29:36<63:03:35, 15.88s/it]

training loss: 0.7309406995773315


training:  23%|██▎       | 4203/18500 [18:29:52<62:42:22, 15.79s/it]

training loss: 0.7412899732589722


training:  23%|██▎       | 4204/18500 [18:30:07<62:26:21, 15.72s/it]

training loss: 0.7776844501495361


training:  23%|██▎       | 4205/18500 [18:30:23<62:15:39, 15.68s/it]

training loss: 0.6969232559204102


training:  23%|██▎       | 4206/18500 [18:30:39<62:07:55, 15.65s/it]

training loss: 0.5655368566513062


training:  23%|██▎       | 4207/18500 [18:30:54<62:03:42, 15.63s/it]

training loss: 1.012147068977356


training:  23%|██▎       | 4208/18500 [18:31:10<61:58:58, 15.61s/it]

training loss: 1.0622135400772095


training:  23%|██▎       | 4209/18500 [18:31:25<61:56:32, 15.60s/it]

training loss: 0.9784255027770996


training:  23%|██▎       | 4210/18500 [18:31:41<61:54:16, 15.60s/it]

training loss: 0.6692401170730591


training:  23%|██▎       | 4211/18500 [18:31:56<61:52:41, 15.59s/it]

training loss: 0.4958363175392151


training:  23%|██▎       | 4212/18500 [18:32:12<61:51:06, 15.58s/it]

training loss: 0.5238944292068481


training:  23%|██▎       | 4213/18500 [18:32:28<61:50:42, 15.58s/it]

training loss: 0.5757643580436707


training:  23%|██▎       | 4214/18500 [18:32:43<61:51:16, 15.59s/it]

training loss: 0.7650049328804016


training:  23%|██▎       | 4215/18500 [18:32:59<61:50:43, 15.59s/it]

training loss: 0.624164342880249


training:  23%|██▎       | 4216/18500 [18:33:14<61:49:53, 15.58s/it]

training loss: 0.78125


training:  23%|██▎       | 4217/18500 [18:33:30<61:49:52, 15.58s/it]

training loss: 0.7132320404052734


training:  23%|██▎       | 4218/18500 [18:33:46<61:50:20, 15.59s/it]

training loss: 0.9035646319389343


training:  23%|██▎       | 4219/18500 [18:34:01<61:49:59, 15.59s/it]

training loss: 1.3402131795883179


training:  23%|██▎       | 4220/18500 [18:34:17<61:49:53, 15.59s/it]

training loss: 0.529474675655365


training:  23%|██▎       | 4221/18500 [18:34:32<61:50:16, 15.59s/it]

training loss: 0.9728111028671265


training:  23%|██▎       | 4222/18500 [18:34:48<61:50:54, 15.59s/it]

training loss: 0.6053392887115479


training:  23%|██▎       | 4223/18500 [18:35:03<61:49:24, 15.59s/it]

training loss: 0.9218639135360718


training:  23%|██▎       | 4224/18500 [18:35:19<61:49:23, 15.59s/it]

training loss: 0.7149724960327148


training:  23%|██▎       | 4225/18500 [18:35:35<61:48:21, 15.59s/it]

training loss: 0.8688418865203857


training:  23%|██▎       | 4226/18500 [18:35:50<61:48:51, 15.59s/it]

training loss: 0.6931079626083374


training:  23%|██▎       | 4227/18500 [18:36:06<61:48:36, 15.59s/it]

training loss: 0.6377733945846558


training:  23%|██▎       | 4228/18500 [18:36:21<61:48:58, 15.59s/it]

training loss: 0.7175503969192505


training:  23%|██▎       | 4229/18500 [18:36:37<61:47:59, 15.59s/it]

training loss: 1.0031369924545288


training:  23%|██▎       | 4230/18500 [18:36:53<61:47:40, 15.59s/it]

training loss: 0.4135880172252655


training:  23%|██▎       | 4231/18500 [18:37:08<61:47:40, 15.59s/it]

training loss: 1.1390928030014038


training:  23%|██▎       | 4232/18500 [18:37:24<61:49:02, 15.60s/it]

training loss: 0.6995711326599121


training:  23%|██▎       | 4233/18500 [18:37:39<61:48:44, 15.60s/it]

training loss: 0.8106083869934082


training:  23%|██▎       | 4234/18500 [18:37:55<61:48:54, 15.60s/it]

training loss: 0.8930307626724243


training:  23%|██▎       | 4235/18500 [18:38:11<61:47:13, 15.59s/it]

training loss: 0.6201558113098145


training:  23%|██▎       | 4236/18500 [18:38:26<61:47:20, 15.59s/it]

training loss: 0.5014588236808777


training:  23%|██▎       | 4237/18500 [18:38:42<61:47:23, 15.60s/it]

training loss: 0.5376217365264893


training:  23%|██▎       | 4238/18500 [18:38:57<61:47:16, 15.60s/it]

training loss: 0.26610952615737915


training:  23%|██▎       | 4239/18500 [18:39:13<61:46:24, 15.59s/it]

training loss: 0.8442202806472778


training:  23%|██▎       | 4240/18500 [18:39:29<61:47:58, 15.60s/it]

training loss: 1.0099390745162964


training:  23%|██▎       | 4241/18500 [18:39:44<61:47:32, 15.60s/it]

training loss: 0.6491010785102844


training:  23%|██▎       | 4242/18500 [18:40:00<61:47:23, 15.60s/it]

training loss: 0.7638675570487976


training:  23%|██▎       | 4243/18500 [18:40:15<61:46:20, 15.60s/it]

training loss: 0.7972771525382996


training:  23%|██▎       | 4244/18500 [18:40:31<61:45:49, 15.60s/it]

training loss: 0.7460410594940186


training:  23%|██▎       | 4245/18500 [18:40:47<61:46:22, 15.60s/it]

training loss: 0.8828700184822083


training:  23%|██▎       | 4246/18500 [18:41:02<61:46:03, 15.60s/it]

training loss: 0.5019119381904602


training:  23%|██▎       | 4247/18500 [18:41:18<61:45:52, 15.60s/it]

training loss: 0.9141327738761902


training:  23%|██▎       | 4248/18500 [18:41:33<61:45:39, 15.60s/it]

training loss: 0.8418543934822083


training:  23%|██▎       | 4249/18500 [18:41:49<61:45:11, 15.60s/it]

training loss: 0.7788969874382019


training:  23%|██▎       | 4250/18500 [18:42:05<61:44:38, 15.60s/it]

training loss: 0.7512346506118774


training:  23%|██▎       | 4251/18500 [18:42:20<61:44:49, 15.60s/it]

training loss: 0.7087596654891968


training:  23%|██▎       | 4252/18500 [18:42:36<61:44:53, 15.60s/it]

training loss: 0.6626724004745483


training:  23%|██▎       | 4253/18500 [18:42:51<61:45:14, 15.60s/it]

training loss: 0.7328144311904907


training:  23%|██▎       | 4254/18500 [18:43:07<61:44:18, 15.60s/it]

training loss: 0.6092349886894226


training:  23%|██▎       | 4255/18500 [18:43:23<61:43:59, 15.60s/it]

training loss: 0.4961017370223999


training:  23%|██▎       | 4256/18500 [18:43:38<61:42:53, 15.60s/it]

training loss: 0.47977158427238464


training:  23%|██▎       | 4257/18500 [18:43:54<61:43:14, 15.60s/it]

training loss: 0.7649712562561035


training:  23%|██▎       | 4258/18500 [18:44:09<61:42:59, 15.60s/it]

training loss: 0.6050227880477905


training:  23%|██▎       | 4259/18500 [18:44:25<61:43:13, 15.60s/it]

training loss: 1.0489121675491333


training:  23%|██▎       | 4260/18500 [18:44:41<61:44:24, 15.61s/it]

training loss: 0.7480979561805725


training:  23%|██▎       | 4261/18500 [18:44:56<61:43:18, 15.60s/it]

training loss: 0.8895608186721802


training:  23%|██▎       | 4262/18500 [18:45:12<61:42:06, 15.60s/it]

training loss: 0.8020904064178467


training:  23%|██▎       | 4263/18500 [18:45:27<61:42:20, 15.60s/it]

training loss: 0.7962436079978943


training:  23%|██▎       | 4264/18500 [18:45:43<61:46:41, 15.62s/it]

training loss: 0.6479339599609375


training:  23%|██▎       | 4265/18500 [18:45:59<61:53:42, 15.65s/it]

training loss: 0.831466555595398


training:  23%|██▎       | 4266/18500 [18:46:14<61:56:26, 15.67s/it]

training loss: 0.7117665410041809


training:  23%|██▎       | 4267/18500 [18:46:30<62:00:30, 15.68s/it]

training loss: 1.0343607664108276


training:  23%|██▎       | 4268/18500 [18:46:46<62:04:22, 15.70s/it]

training loss: 0.7225428223609924


training:  23%|██▎       | 4269/18500 [18:47:02<62:05:04, 15.71s/it]

training loss: 0.7481777667999268


training:  23%|██▎       | 4270/18500 [18:47:17<62:05:40, 15.71s/it]

training loss: 0.9936138391494751


training:  23%|██▎       | 4271/18500 [18:47:33<62:07:27, 15.72s/it]

training loss: 0.2566559910774231


training:  23%|██▎       | 4272/18500 [18:47:49<62:07:53, 15.72s/it]

training loss: 0.7308359742164612


training:  23%|██▎       | 4273/18500 [18:48:05<62:06:30, 15.72s/it]

training loss: 0.8647144436836243


training:  23%|██▎       | 4274/18500 [18:48:20<62:05:09, 15.71s/it]

training loss: 0.8619667887687683


training:  23%|██▎       | 4275/18500 [18:48:36<62:03:32, 15.71s/it]

training loss: 0.6390945315361023


training:  23%|██▎       | 4276/18500 [18:48:52<61:59:02, 15.69s/it]

training loss: 0.726285457611084


training:  23%|██▎       | 4277/18500 [18:49:07<61:52:31, 15.66s/it]

training loss: 1.0757622718811035


training:  23%|██▎       | 4278/18500 [18:49:23<61:48:17, 15.64s/it]

training loss: 0.8327051401138306


training:  23%|██▎       | 4279/18500 [18:49:38<61:44:49, 15.63s/it]

training loss: 1.0027804374694824


training:  23%|██▎       | 4280/18500 [18:49:54<61:43:50, 15.63s/it]

training loss: 1.0068410634994507


training:  23%|██▎       | 4281/18500 [18:50:10<61:41:46, 15.62s/it]

training loss: 1.0906027555465698


training:  23%|██▎       | 4282/18500 [18:50:25<61:40:40, 15.62s/it]

training loss: 0.6573885679244995


training:  23%|██▎       | 4283/18500 [18:50:41<61:39:37, 15.61s/it]

training loss: 0.8564663529396057


training:  23%|██▎       | 4284/18500 [18:50:56<61:38:14, 15.61s/it]

training loss: 0.7242690324783325


training:  23%|██▎       | 4285/18500 [18:51:12<61:36:13, 15.60s/it]

training loss: 0.6948225498199463


training:  23%|██▎       | 4286/18500 [18:51:28<61:36:38, 15.60s/it]

training loss: 0.7586416602134705


training:  23%|██▎       | 4287/18500 [18:51:43<61:36:53, 15.61s/it]

training loss: 0.6325157284736633


training:  23%|██▎       | 4288/18500 [18:51:59<61:36:50, 15.61s/it]

training loss: 0.3802076578140259


training:  23%|██▎       | 4289/18500 [18:52:14<61:35:53, 15.60s/it]

training loss: 0.5889459848403931


training:  23%|██▎       | 4290/18500 [18:52:30<61:35:22, 15.60s/it]

training loss: 0.6052414774894714


training:  23%|██▎       | 4291/18500 [18:52:46<61:35:44, 15.61s/it]

training loss: 0.8599927425384521


training:  23%|██▎       | 4292/18500 [18:53:01<61:36:02, 15.61s/it]

training loss: 0.7992238998413086


training:  23%|██▎       | 4293/18500 [18:53:17<61:35:41, 15.61s/it]

training loss: 0.6837519407272339


training:  23%|██▎       | 4294/18500 [18:53:33<61:36:01, 15.61s/it]

training loss: 0.5770198106765747


training:  23%|██▎       | 4295/18500 [18:53:48<61:35:21, 15.61s/it]

training loss: 0.5490071773529053


training:  23%|██▎       | 4296/18500 [18:54:04<61:34:20, 15.61s/it]

training loss: 0.707183301448822


training:  23%|██▎       | 4297/18500 [18:54:19<61:33:59, 15.61s/it]

training loss: 1.027598261833191


training:  23%|██▎       | 4298/18500 [18:54:35<61:33:20, 15.60s/it]

training loss: 0.48797518014907837


training:  23%|██▎       | 4299/18500 [18:54:51<61:33:44, 15.61s/it]

training loss: 1.0809029340744019


training:  23%|██▎       | 4300/18500 [18:55:06<61:34:01, 15.61s/it]

training loss: 0.8791375160217285
training loss: 1.0661484003067017


training:  23%|██▎       | 4301/18500 [18:55:23<63:13:07, 16.03s/it]

validation loss: 1.5076055526733398


training:  23%|██▎       | 4302/18500 [18:55:39<62:42:57, 15.90s/it]

training loss: 0.6627389192581177


training:  23%|██▎       | 4303/18500 [18:55:54<62:22:24, 15.82s/it]

training loss: 0.5398274064064026


training:  23%|██▎       | 4304/18500 [18:56:10<62:06:23, 15.75s/it]

training loss: 1.3408973217010498


training:  23%|██▎       | 4305/18500 [18:56:26<61:55:45, 15.71s/it]

training loss: 0.9357924461364746


training:  23%|██▎       | 4306/18500 [18:56:41<61:48:03, 15.67s/it]

training loss: 0.6930054426193237


training:  23%|██▎       | 4307/18500 [18:56:57<61:42:40, 15.65s/it]

training loss: 0.7103433012962341


training:  23%|██▎       | 4308/18500 [18:57:12<61:38:03, 15.63s/it]

training loss: 0.44383883476257324


training:  23%|██▎       | 4309/18500 [18:57:28<61:35:40, 15.63s/it]

training loss: 0.6635854244232178


training:  23%|██▎       | 4310/18500 [18:57:44<61:34:46, 15.62s/it]

training loss: 0.8899675011634827


training:  23%|██▎       | 4311/18500 [18:57:59<61:33:06, 15.62s/it]

training loss: 0.8389099836349487


training:  23%|██▎       | 4312/18500 [18:58:15<61:30:57, 15.61s/it]

training loss: 0.8256781101226807


training:  23%|██▎       | 4313/18500 [18:58:30<61:30:15, 15.61s/it]

training loss: 0.9313499331474304


training:  23%|██▎       | 4314/18500 [18:58:46<61:30:21, 15.61s/it]

training loss: 0.7714874148368835


training:  23%|██▎       | 4315/18500 [18:59:02<61:30:19, 15.61s/it]

training loss: 0.7301957607269287


training:  23%|██▎       | 4316/18500 [18:59:17<61:29:40, 15.61s/it]

training loss: 0.7905397415161133


training:  23%|██▎       | 4317/18500 [18:59:33<61:29:41, 15.61s/it]

training loss: 0.6890934705734253


training:  23%|██▎       | 4318/18500 [18:59:48<61:29:21, 15.61s/it]

training loss: 0.4151178300380707


training:  23%|██▎       | 4319/18500 [19:00:04<61:28:08, 15.60s/it]

training loss: 0.6248561143875122


training:  23%|██▎       | 4320/18500 [19:00:20<61:28:24, 15.61s/it]

training loss: 0.4487858712673187


training:  23%|██▎       | 4321/18500 [19:00:35<61:28:03, 15.61s/it]

training loss: 0.7477281093597412


training:  23%|██▎       | 4322/18500 [19:00:51<61:36:10, 15.64s/it]

training loss: 0.858805239200592


training:  23%|██▎       | 4323/18500 [19:01:07<61:32:18, 15.63s/it]

training loss: 0.7534345984458923


training:  23%|██▎       | 4324/18500 [19:01:22<61:30:52, 15.62s/it]

training loss: 0.7234289646148682


training:  23%|██▎       | 4325/18500 [19:01:38<61:28:58, 15.61s/it]

training loss: 0.6430987119674683


training:  23%|██▎       | 4326/18500 [19:01:53<61:27:37, 15.61s/it]

training loss: 0.7012338638305664


training:  23%|██▎       | 4327/18500 [19:02:09<61:26:43, 15.61s/it]

training loss: 0.954310953617096


training:  23%|██▎       | 4328/18500 [19:02:25<61:26:40, 15.61s/it]

training loss: 0.9547348618507385


training:  23%|██▎       | 4329/18500 [19:02:40<61:26:41, 15.61s/it]

training loss: 0.5808538198471069


training:  23%|██▎       | 4330/18500 [19:02:56<61:26:55, 15.61s/it]

training loss: 0.7779634594917297


training:  23%|██▎       | 4331/18500 [19:03:11<61:25:23, 15.61s/it]

training loss: 0.8660671710968018


training:  23%|██▎       | 4332/18500 [19:03:27<61:24:27, 15.60s/it]

training loss: 0.561372697353363


training:  23%|██▎       | 4333/18500 [19:03:43<61:24:09, 15.60s/it]

training loss: 1.1539686918258667


training:  23%|██▎       | 4334/18500 [19:03:58<61:23:45, 15.60s/it]

training loss: 1.1602805852890015


training:  23%|██▎       | 4335/18500 [19:04:14<61:22:45, 15.60s/it]

training loss: 0.7640244364738464


training:  23%|██▎       | 4336/18500 [19:04:29<61:22:39, 15.60s/it]

training loss: 0.8517048358917236


training:  23%|██▎       | 4337/18500 [19:04:45<61:22:29, 15.60s/it]

training loss: 1.1545065641403198


training:  23%|██▎       | 4338/18500 [19:05:01<61:22:09, 15.60s/it]

training loss: 0.8075425624847412


training:  23%|██▎       | 4339/18500 [19:05:16<61:21:45, 15.60s/it]

training loss: 0.7670323848724365


training:  23%|██▎       | 4340/18500 [19:05:32<61:22:26, 15.60s/it]

training loss: 0.8849558234214783


training:  23%|██▎       | 4341/18500 [19:05:47<61:23:14, 15.61s/it]

training loss: 0.9555047750473022


training:  23%|██▎       | 4342/18500 [19:06:03<61:21:50, 15.60s/it]

training loss: 0.8199255466461182


training:  23%|██▎       | 4343/18500 [19:06:19<61:21:33, 15.60s/it]

training loss: 0.5706225633621216


training:  23%|██▎       | 4344/18500 [19:06:34<61:20:31, 15.60s/it]

training loss: 0.7870467305183411


training:  23%|██▎       | 4345/18500 [19:06:50<61:20:50, 15.60s/it]

training loss: 0.8622347712516785


training:  23%|██▎       | 4346/18500 [19:07:05<61:20:38, 15.60s/it]

training loss: 0.7871745824813843


training:  23%|██▎       | 4347/18500 [19:07:21<61:21:04, 15.61s/it]

training loss: 0.8775941133499146


training:  24%|██▎       | 4348/18500 [19:07:37<61:19:56, 15.60s/it]

training loss: 0.845079779624939


training:  24%|██▎       | 4349/18500 [19:07:52<61:20:24, 15.60s/it]

training loss: 0.611743688583374


training:  24%|██▎       | 4350/18500 [19:08:08<61:19:42, 15.60s/it]

training loss: 0.7602338194847107


training:  24%|██▎       | 4351/18500 [19:08:23<61:19:13, 15.60s/it]

training loss: 0.29191985726356506


training:  24%|██▎       | 4352/18500 [19:08:39<61:18:27, 15.60s/it]

training loss: 0.9245094656944275


training:  24%|██▎       | 4353/18500 [19:08:55<61:20:01, 15.61s/it]

training loss: 0.849352240562439


training:  24%|██▎       | 4354/18500 [19:09:10<61:18:26, 15.60s/it]

training loss: 0.803263247013092


training:  24%|██▎       | 4355/18500 [19:09:26<61:18:14, 15.60s/it]

training loss: 0.3061809241771698


training:  24%|██▎       | 4356/18500 [19:09:41<61:18:23, 15.60s/it]

training loss: 0.8546369075775146


training:  24%|██▎       | 4357/18500 [19:09:57<61:18:34, 15.61s/it]

training loss: 0.8511092662811279


training:  24%|██▎       | 4358/18500 [19:10:13<61:17:31, 15.60s/it]

training loss: 0.3683289587497711


training:  24%|██▎       | 4359/18500 [19:10:28<61:18:34, 15.61s/it]

training loss: 0.7978866100311279


training:  24%|██▎       | 4360/18500 [19:10:44<61:18:48, 15.61s/it]

training loss: 0.5842898488044739


training:  24%|██▎       | 4361/18500 [19:10:59<61:18:05, 15.61s/it]

training loss: 0.7083778381347656


training:  24%|██▎       | 4362/18500 [19:11:15<61:17:07, 15.61s/it]

training loss: 0.7186495065689087


training:  24%|██▎       | 4363/18500 [19:11:31<61:16:24, 15.60s/it]

training loss: 0.7642098665237427


training:  24%|██▎       | 4364/18500 [19:11:46<61:16:55, 15.61s/it]

training loss: 0.7060567140579224


training:  24%|██▎       | 4365/18500 [19:12:02<61:17:06, 15.61s/it]

training loss: 1.1154041290283203


training:  24%|██▎       | 4366/18500 [19:12:18<61:16:13, 15.61s/it]

training loss: 0.6159734129905701


training:  24%|██▎       | 4367/18500 [19:12:33<61:16:01, 15.61s/it]

training loss: 0.4448547661304474


training:  24%|██▎       | 4368/18500 [19:12:49<61:14:31, 15.60s/it]

training loss: 0.9020398855209351


training:  24%|██▎       | 4369/18500 [19:13:04<61:14:27, 15.60s/it]

training loss: 0.7541593313217163


training:  24%|██▎       | 4370/18500 [19:13:20<61:14:22, 15.60s/it]

training loss: 0.7132041454315186


training:  24%|██▎       | 4371/18500 [19:13:36<61:13:46, 15.60s/it]

training loss: 1.0539476871490479


training:  24%|██▎       | 4372/18500 [19:13:51<61:13:26, 15.60s/it]

training loss: 0.9392116665840149


training:  24%|██▎       | 4373/18500 [19:14:07<61:12:34, 15.60s/it]

training loss: 1.047220230102539


training:  24%|██▎       | 4374/18500 [19:14:22<61:12:02, 15.60s/it]

training loss: 0.8377465605735779


training:  24%|██▎       | 4375/18500 [19:14:38<61:11:31, 15.60s/it]

training loss: 0.5715388059616089


training:  24%|██▎       | 4376/18500 [19:14:54<61:11:41, 15.60s/it]

training loss: 0.7676947116851807


training:  24%|██▎       | 4377/18500 [19:15:09<61:11:37, 15.60s/it]

training loss: 0.3834996223449707


training:  24%|██▎       | 4378/18500 [19:15:25<61:12:20, 15.60s/it]

training loss: 1.005063772201538


training:  24%|██▎       | 4379/18500 [19:15:40<61:14:15, 15.61s/it]

training loss: 0.5778847932815552


training:  24%|██▎       | 4380/18500 [19:15:56<61:14:52, 15.62s/it]

training loss: 0.5207204818725586


training:  24%|██▎       | 4381/18500 [19:16:12<61:13:08, 15.61s/it]

training loss: 0.7575777173042297


training:  24%|██▎       | 4382/18500 [19:16:27<61:12:56, 15.61s/it]

training loss: 0.8282811045646667


training:  24%|██▎       | 4383/18500 [19:16:43<61:12:40, 15.61s/it]

training loss: 0.6548040509223938


training:  24%|██▎       | 4384/18500 [19:16:58<61:12:24, 15.61s/it]

training loss: 0.910635232925415


training:  24%|██▎       | 4385/18500 [19:17:14<61:11:41, 15.61s/it]

training loss: 0.860771119594574


training:  24%|██▎       | 4386/18500 [19:17:30<61:11:43, 15.61s/it]

training loss: 1.5726228952407837


training:  24%|██▎       | 4387/18500 [19:17:45<61:11:26, 15.61s/it]

training loss: 0.7640820145606995


training:  24%|██▎       | 4388/18500 [19:18:01<61:17:10, 15.63s/it]

training loss: 0.8037587404251099


training:  24%|██▎       | 4389/18500 [19:18:17<61:22:21, 15.66s/it]

training loss: 1.0999163389205933


training:  24%|██▎       | 4390/18500 [19:18:32<61:25:53, 15.67s/it]

training loss: 1.2692627906799316


training:  24%|██▎       | 4391/18500 [19:18:48<61:30:44, 15.70s/it]

training loss: 0.8889079689979553


training:  24%|██▎       | 4392/18500 [19:19:04<61:31:10, 15.70s/it]

training loss: 0.9619458317756653


training:  24%|██▎       | 4393/18500 [19:19:20<61:32:00, 15.70s/it]

training loss: 1.3709616661071777


training:  24%|██▍       | 4394/18500 [19:19:35<61:31:46, 15.70s/it]

training loss: 1.1188278198242188


training:  24%|██▍       | 4395/18500 [19:19:51<61:32:28, 15.71s/it]

training loss: 1.298317313194275


training:  24%|██▍       | 4396/18500 [19:20:07<61:32:15, 15.71s/it]

training loss: 1.2399775981903076


training:  24%|██▍       | 4397/18500 [19:20:22<61:28:43, 15.69s/it]

training loss: 1.1897920370101929


training:  24%|██▍       | 4398/18500 [19:20:38<61:24:43, 15.68s/it]

training loss: 1.4186325073242188


training:  24%|██▍       | 4399/18500 [19:20:54<61:22:52, 15.67s/it]

training loss: 12.14425277709961


training:  24%|██▍       | 4400/18500 [19:21:09<61:18:33, 15.65s/it]

training loss: 5.210734844207764
training loss: 7.079049587249756


training:  24%|██▍       | 4401/18500 [19:21:26<62:55:09, 16.07s/it]

validation loss: 6.770742416381836


training:  24%|██▍       | 4402/18500 [19:21:42<62:23:29, 15.93s/it]

training loss: 6.938782691955566


training:  24%|██▍       | 4403/18500 [19:21:57<62:00:12, 15.83s/it]

training loss: 21.853233337402344


training:  24%|██▍       | 4404/18500 [19:22:13<61:42:17, 15.76s/it]

training loss: 15.440085411071777


training:  24%|██▍       | 4405/18500 [19:22:29<61:31:11, 15.71s/it]

training loss: 21.823204040527344


training:  24%|██▍       | 4406/18500 [19:22:44<61:22:54, 15.68s/it]

training loss: 27.193273544311523


training:  24%|██▍       | 4407/18500 [19:23:00<61:16:54, 15.65s/it]

training loss: 28.922088623046875


training:  24%|██▍       | 4408/18500 [19:23:15<61:11:23, 15.63s/it]

training loss: 32.51945495605469


training:  24%|██▍       | 4409/18500 [19:23:31<61:07:44, 15.62s/it]

training loss: 40.49237060546875


training:  24%|██▍       | 4410/18500 [19:23:47<61:05:55, 15.61s/it]

training loss: 45.11819839477539


training:  24%|██▍       | 4411/18500 [19:24:02<61:04:46, 15.61s/it]

training loss: 47.445926666259766


training:  24%|██▍       | 4412/18500 [19:24:18<61:03:58, 15.60s/it]

training loss: 48.85345458984375


training:  24%|██▍       | 4413/18500 [19:24:33<61:02:43, 15.60s/it]

training loss: 51.20681381225586


training:  24%|██▍       | 4414/18500 [19:24:49<61:02:01, 15.60s/it]

training loss: 50.45550537109375


training:  24%|██▍       | 4415/18500 [19:25:05<61:00:48, 15.59s/it]

training loss: 53.19112777709961


training:  24%|██▍       | 4416/18500 [19:25:20<61:00:39, 15.59s/it]

training loss: 53.15192794799805


training:  24%|██▍       | 4417/18500 [19:25:36<61:00:32, 15.60s/it]

training loss: 50.223751068115234


training:  24%|██▍       | 4418/18500 [19:25:51<61:00:18, 15.60s/it]

training loss: 52.83915328979492


training:  24%|██▍       | 4419/18500 [19:26:07<61:00:03, 15.60s/it]

training loss: 51.85939025878906


training:  24%|██▍       | 4420/18500 [19:26:23<60:59:07, 15.59s/it]

training loss: 47.574764251708984


training:  24%|██▍       | 4421/18500 [19:26:38<60:58:30, 15.59s/it]

training loss: 42.16096878051758


training:  24%|██▍       | 4422/18500 [19:26:54<60:58:35, 15.59s/it]

training loss: 37.282676696777344


training:  24%|██▍       | 4423/18500 [19:27:09<60:57:59, 15.59s/it]

training loss: 33.49396514892578


training:  24%|██▍       | 4424/18500 [19:27:25<60:58:07, 15.59s/it]

training loss: 34.69816589355469


training:  24%|██▍       | 4425/18500 [19:27:40<60:58:01, 15.59s/it]

training loss: 32.85712814331055


training:  24%|██▍       | 4426/18500 [19:27:56<60:57:06, 15.59s/it]

training loss: 28.82119369506836


training:  24%|██▍       | 4427/18500 [19:28:12<60:56:22, 15.59s/it]

training loss: 24.905595779418945


training:  24%|██▍       | 4428/18500 [19:28:27<60:56:46, 15.59s/it]

training loss: 21.2573299407959


training:  24%|██▍       | 4429/18500 [19:28:43<60:57:08, 15.59s/it]

training loss: 20.46771240234375


training:  24%|██▍       | 4430/18500 [19:28:58<60:57:07, 15.60s/it]

training loss: 18.723909378051758


training:  24%|██▍       | 4431/18500 [19:29:14<60:55:47, 15.59s/it]

training loss: 15.574828147888184


training:  24%|██▍       | 4432/18500 [19:29:30<60:55:56, 15.59s/it]

training loss: 11.282001495361328


training:  24%|██▍       | 4433/18500 [19:29:45<60:55:53, 15.59s/it]

training loss: 7.665053844451904


training:  24%|██▍       | 4434/18500 [19:30:01<60:55:00, 15.59s/it]

training loss: 4.855734825134277


training:  24%|██▍       | 4435/18500 [19:30:16<60:53:54, 15.59s/it]

training loss: 3.831958055496216


training:  24%|██▍       | 4436/18500 [19:30:32<60:54:55, 15.59s/it]

training loss: 3.4370014667510986


training:  24%|██▍       | 4437/18500 [19:30:48<60:56:37, 15.60s/it]

training loss: 3.3222556114196777


training:  24%|██▍       | 4438/18500 [19:31:03<60:55:18, 15.60s/it]

training loss: 3.024982452392578


training:  24%|██▍       | 4439/18500 [19:31:19<60:55:31, 15.60s/it]

training loss: 2.728908061981201


training:  24%|██▍       | 4440/18500 [19:31:34<60:54:00, 15.59s/it]

training loss: 2.531524419784546


training:  24%|██▍       | 4441/18500 [19:31:50<60:53:47, 15.59s/it]

training loss: 2.362299680709839


training:  24%|██▍       | 4442/18500 [19:32:06<60:53:24, 15.59s/it]

training loss: 2.3414440155029297


training:  24%|██▍       | 4443/18500 [19:32:21<60:53:49, 15.60s/it]

training loss: 2.4244766235351562


training:  24%|██▍       | 4444/18500 [19:32:37<60:52:22, 15.59s/it]

training loss: 2.259749412536621


training:  24%|██▍       | 4445/18500 [19:32:52<60:52:02, 15.59s/it]

training loss: 2.093597412109375


training:  24%|██▍       | 4446/18500 [19:33:08<60:51:49, 15.59s/it]

training loss: 1.8907756805419922


training:  24%|██▍       | 4447/18500 [19:33:24<60:51:47, 15.59s/it]

training loss: 1.7401702404022217


training:  24%|██▍       | 4448/18500 [19:33:39<60:51:01, 15.59s/it]

training loss: 1.681859016418457


training:  24%|██▍       | 4449/18500 [19:33:55<60:52:09, 15.60s/it]

training loss: 1.4911105632781982


training:  24%|██▍       | 4450/18500 [19:34:10<60:50:27, 15.59s/it]

training loss: 1.4061477184295654


training:  24%|██▍       | 4451/18500 [19:34:26<60:50:18, 15.59s/it]

training loss: 1.517613410949707


training:  24%|██▍       | 4452/18500 [19:34:41<60:50:19, 15.59s/it]

training loss: 1.2980601787567139


training:  24%|██▍       | 4453/18500 [19:34:57<60:50:20, 15.59s/it]

training loss: 1.2069909572601318


training:  24%|██▍       | 4454/18500 [19:35:13<60:49:20, 15.59s/it]

training loss: 1.3308241367340088


training:  24%|██▍       | 4455/18500 [19:35:28<60:49:39, 15.59s/it]

training loss: 1.2857797145843506


training:  24%|██▍       | 4456/18500 [19:35:44<60:50:10, 15.59s/it]

training loss: 1.2291204929351807


training:  24%|██▍       | 4457/18500 [19:35:59<60:49:14, 15.59s/it]

training loss: 1.1339173316955566


training:  24%|██▍       | 4458/18500 [19:36:15<60:48:19, 15.59s/it]

training loss: 1.1308187246322632


training:  24%|██▍       | 4459/18500 [19:36:31<60:48:46, 15.59s/it]

training loss: 1.0579627752304077


training:  24%|██▍       | 4460/18500 [19:36:46<60:48:23, 15.59s/it]

training loss: 0.9042064547538757


training:  24%|██▍       | 4461/18500 [19:37:02<60:48:25, 15.59s/it]

training loss: 0.580396294593811


training:  24%|██▍       | 4462/18500 [19:37:17<60:48:07, 15.59s/it]

training loss: 1.1281085014343262


training:  24%|██▍       | 4463/18500 [19:37:33<60:47:56, 15.59s/it]

training loss: 0.8011832237243652


training:  24%|██▍       | 4464/18500 [19:37:49<60:47:31, 15.59s/it]

training loss: 0.7788853645324707


training:  24%|██▍       | 4465/18500 [19:38:04<60:46:21, 15.59s/it]

training loss: 0.9173147678375244


training:  24%|██▍       | 4466/18500 [19:38:20<60:46:11, 15.59s/it]

training loss: 1.274920105934143


training:  24%|██▍       | 4467/18500 [19:38:35<60:45:42, 15.59s/it]

training loss: 1.1078722476959229


training:  24%|██▍       | 4468/18500 [19:38:51<60:46:04, 15.59s/it]

training loss: 0.618910014629364


training:  24%|██▍       | 4469/18500 [19:39:07<60:44:59, 15.59s/it]

training loss: 1.2819817066192627


training:  24%|██▍       | 4470/18500 [19:39:22<60:44:38, 15.59s/it]

training loss: 0.8379146456718445


training:  24%|██▍       | 4471/18500 [19:39:38<60:43:24, 15.58s/it]

training loss: 0.5857090950012207


training:  24%|██▍       | 4472/18500 [19:39:53<60:43:06, 15.58s/it]

training loss: 0.8987509608268738


training:  24%|██▍       | 4473/18500 [19:40:09<60:42:56, 15.58s/it]

training loss: 1.080357551574707


training:  24%|██▍       | 4474/18500 [19:40:24<60:44:27, 15.59s/it]

training loss: 0.49889394640922546


training:  24%|██▍       | 4475/18500 [19:40:40<60:44:18, 15.59s/it]

training loss: 0.7386534214019775


training:  24%|██▍       | 4476/18500 [19:40:56<60:44:23, 15.59s/it]

training loss: 0.7487568855285645


training:  24%|██▍       | 4477/18500 [19:41:11<60:42:56, 15.59s/it]

training loss: 0.6253134608268738


training:  24%|██▍       | 4478/18500 [19:41:27<60:43:03, 15.59s/it]

training loss: 1.1606428623199463


training:  24%|██▍       | 4479/18500 [19:41:42<60:43:51, 15.59s/it]

training loss: 0.6057619452476501


training:  24%|██▍       | 4480/18500 [19:41:58<60:43:40, 15.59s/it]

training loss: 0.9641886949539185


training:  24%|██▍       | 4481/18500 [19:42:14<60:42:18, 15.59s/it]

training loss: 0.3596818745136261


training:  24%|██▍       | 4482/18500 [19:42:29<60:42:37, 15.59s/it]

training loss: 0.45905572175979614


training:  24%|██▍       | 4483/18500 [19:42:45<60:43:03, 15.59s/it]

training loss: 0.871323823928833


training:  24%|██▍       | 4484/18500 [19:43:00<60:41:58, 15.59s/it]

training loss: 1.0944204330444336


training:  24%|██▍       | 4485/18500 [19:43:16<60:40:56, 15.59s/it]

training loss: 0.836509644985199


training:  24%|██▍       | 4486/18500 [19:43:32<60:41:24, 15.59s/it]

training loss: 0.9750103950500488


training:  24%|██▍       | 4487/18500 [19:43:47<60:42:25, 15.60s/it]

training loss: 0.900663435459137


training:  24%|██▍       | 4488/18500 [19:44:03<60:40:40, 15.59s/it]

training loss: 0.8469192981719971


training:  24%|██▍       | 4489/18500 [19:44:18<60:39:58, 15.59s/it]

training loss: 1.090545892715454


training:  24%|██▍       | 4490/18500 [19:44:34<60:39:29, 15.59s/it]

training loss: 0.6238909363746643


training:  24%|██▍       | 4491/18500 [19:44:49<60:39:00, 15.59s/it]

training loss: 0.8909122943878174


training:  24%|██▍       | 4492/18500 [19:45:05<60:38:54, 15.59s/it]

training loss: 0.9920235872268677


training:  24%|██▍       | 4493/18500 [19:45:21<60:45:43, 15.62s/it]

training loss: 0.555062472820282


training:  24%|██▍       | 4494/18500 [19:45:36<60:51:37, 15.64s/it]

training loss: 0.6572861075401306


training:  24%|██▍       | 4495/18500 [19:45:52<60:55:20, 15.66s/it]

training loss: 1.0242846012115479


training:  24%|██▍       | 4496/18500 [19:46:08<60:56:56, 15.67s/it]

training loss: 0.9337591528892517


training:  24%|██▍       | 4497/18500 [19:46:24<60:59:00, 15.68s/it]

training loss: 0.9018279910087585


training:  24%|██▍       | 4498/18500 [19:46:39<61:00:29, 15.69s/it]

training loss: 0.8011897802352905


training:  24%|██▍       | 4499/18500 [19:46:55<61:03:21, 15.70s/it]

training loss: 0.6109300255775452


training:  24%|██▍       | 4500/18500 [19:47:11<61:01:12, 15.69s/it]

training loss: 0.8119948506355286
training loss: 0.6082615256309509



generating:   0%|          | 0/512 [00:00<?, ?it/s][A

validation loss: 1.5925402641296387
hlej reakcie, ktore dokazala vyslat do 30 dni.
"Teraz hovorime o dvoch, troch dnoch. Mame aj viac takto pripravenych
jednotiek, pripomina Farina. Predtym ich tvorilo okolo 15-tisic vojakov,
teraz je ich priblizne 40-tisic.
"Skratili sme cas reakcie a posilnili nase schopnosti, kvalitativne aj
kvantitativne, uviedol Farina.
A C-130 vypusta vysadkarov zo 6.
polskej divizie pocas medzinarodneho zoskoku pri polskej Toruni.
Autor:
SITA/AP
Vychodne kridlo
Polsko, ale aj pobaltske krajiny ziadaju viditelnejsiu alianciu na
vychode. Podla ministra zahranicnych veci Witolda Waszczykowskeho je to
jednoduche. "Co chceme, je pritomnost vojakov, pritomnost vojakov a este
raz pritomnost vojakov, vyhlasil na stretnuti s novinarmi sef
diplomacie. "Polsko bolo doteraz clenskym statom aliancie druhej
kategorie. Nestaci len kolektivna obrana NATO podla clanku pat
Washingtonskej zmluvy. Polsko malo dohody aj v minulosti a mame s tym zle
skusenosti. Chce


generating:   0%|          | 1/512 [00:00<01:48,  4.69it/s][A
generating:   0%|          | 2/512 [00:00<01:48,  4.69it/s][A
generating:   1%|          | 3/512 [00:00<01:49,  4.66it/s][A
generating:   1%|          | 4/512 [00:00<01:49,  4.66it/s][A
generating:   1%|          | 5/512 [00:01<01:48,  4.67it/s][A
generating:   1%|          | 6/512 [00:01<01:47,  4.70it/s][A
generating:   1%|▏         | 7/512 [00:01<01:47,  4.71it/s][A
generating:   2%|▏         | 8/512 [00:01<01:48,  4.66it/s][A
generating:   2%|▏         | 9/512 [00:01<01:47,  4.68it/s][A
generating:   2%|▏         | 10/512 [00:02<01:47,  4.66it/s][A
generating:   2%|▏         | 11/512 [00:02<01:46,  4.69it/s][A
generating:   2%|▏         | 12/512 [00:02<01:46,  4.68it/s][A
generating:   3%|▎         | 13/512 [00:02<01:49,  4.55it/s][A
generating:   3%|▎         | 14/512 [00:03<01:48,  4.58it/s][A
generating:   3%|▎         | 15/512 [00:03<01:47,  4.64it/s][A
generating:   3%|▎         | 16/512 [00:03<01:45

o upraveni krajiny a museli o tomu, odroba o tom neskorenim, ze vsetkych
civit, aby v reakcii najma premierovi?
Vedia malo byt ohrozujem u podstupit vseobecne riesenie
podpisali aj hodnota analytikov, ktori sposobili
Somaricky su protestovala primarne volby,
ktore na pokryluje, ale mali do predvolebny komoditami, aby neodpoved.
Hovorca momentalne a sposob viditelne, zaujimali vsetko
nevyhnutne sa pri jeho ruskej korme. Vyznamne podpory vybrali svojich podpora vyskum obratit
v danovej pohotovosti, ne


training:  24%|██▍       | 4502/18500 [19:49:34<152:20:38, 39.18s/it]

training loss: 0.9567975401878357


training:  24%|██▍       | 4503/18500 [19:49:50<124:49:39, 32.11s/it]

training loss: 0.9308736324310303


training:  24%|██▍       | 4504/18500 [19:50:05<105:33:26, 27.15s/it]

training loss: 0.6536415219306946


training:  24%|██▍       | 4505/18500 [19:50:21<92:04:29, 23.68s/it] 

training loss: 0.900346040725708


training:  24%|██▍       | 4506/18500 [19:50:36<82:37:28, 21.26s/it]

training loss: 0.6234314441680908


training:  24%|██▍       | 4507/18500 [19:50:52<76:01:43, 19.56s/it]

training loss: 1.1265044212341309


training:  24%|██▍       | 4508/18500 [19:51:07<71:23:23, 18.37s/it]

training loss: 0.5471101999282837


training:  24%|██▍       | 4509/18500 [19:51:23<68:08:53, 17.54s/it]

training loss: 0.9692200422286987


training:  24%|██▍       | 4510/18500 [19:51:39<65:52:12, 16.95s/it]

training loss: 0.8247353434562683


training:  24%|██▍       | 4511/18500 [19:51:54<64:18:22, 16.55s/it]

training loss: 0.819294810295105


training:  24%|██▍       | 4512/18500 [19:52:10<63:10:56, 16.26s/it]

training loss: 0.5668150186538696


training:  24%|██▍       | 4513/18500 [19:52:25<62:24:29, 16.06s/it]

training loss: 1.2883421182632446


training:  24%|██▍       | 4514/18500 [19:52:41<61:50:53, 15.92s/it]

training loss: 0.8837843537330627


training:  24%|██▍       | 4515/18500 [19:52:57<61:27:30, 15.82s/it]

training loss: 1.0456581115722656


training:  24%|██▍       | 4516/18500 [19:53:12<61:10:24, 15.75s/it]

training loss: 1.0058791637420654


training:  24%|██▍       | 4517/18500 [19:53:28<60:58:55, 15.70s/it]

training loss: 0.8127928972244263


training:  24%|██▍       | 4518/18500 [19:53:43<60:52:17, 15.67s/it]

training loss: 0.9994510412216187


training:  24%|██▍       | 4519/18500 [19:53:59<60:46:20, 15.65s/it]

training loss: 1.0801016092300415


training:  24%|██▍       | 4520/18500 [19:54:15<60:41:06, 15.63s/it]

training loss: 1.0291224718093872


training:  24%|██▍       | 4521/18500 [19:54:30<60:38:29, 15.62s/it]

training loss: 0.875103771686554


training:  24%|██▍       | 4522/18500 [19:54:46<60:36:16, 15.61s/it]

training loss: 0.6760035753250122


training:  24%|██▍       | 4523/18500 [19:55:01<60:35:22, 15.61s/it]

training loss: 0.9000164866447449


training:  24%|██▍       | 4524/18500 [19:55:17<60:34:04, 15.60s/it]

training loss: 0.8824590444564819


training:  24%|██▍       | 4525/18500 [19:55:33<60:33:30, 15.60s/it]

training loss: 0.9757965207099915


training:  24%|██▍       | 4526/18500 [19:55:48<60:32:27, 15.60s/it]

training loss: 0.6996170878410339


training:  24%|██▍       | 4527/18500 [19:56:04<60:30:38, 15.59s/it]

training loss: 0.6133116483688354


training:  24%|██▍       | 4528/18500 [19:56:19<60:30:27, 15.59s/it]

training loss: 1.004779577255249


training:  24%|██▍       | 4529/18500 [19:56:35<60:30:03, 15.59s/it]

training loss: 1.1186537742614746


training:  24%|██▍       | 4530/18500 [19:56:50<60:29:59, 15.59s/it]

training loss: 0.5833808779716492


training:  24%|██▍       | 4531/18500 [19:57:06<60:30:02, 15.59s/it]

training loss: 0.9553901553153992


training:  24%|██▍       | 4532/18500 [19:57:22<60:30:27, 15.59s/it]

training loss: 0.5490691065788269


training:  25%|██▍       | 4533/18500 [19:57:37<60:29:17, 15.59s/it]

training loss: 0.41809043288230896


training:  25%|██▍       | 4534/18500 [19:57:53<60:28:45, 15.59s/it]

training loss: 0.8306217193603516


training:  25%|██▍       | 4535/18500 [19:58:08<60:28:05, 15.59s/it]

training loss: 0.14893050491809845


training:  25%|██▍       | 4536/18500 [19:58:24<60:28:32, 15.59s/it]

training loss: 0.8111096024513245


training:  25%|██▍       | 4537/18500 [19:58:40<60:28:41, 15.59s/it]

training loss: 0.8324593901634216


training:  25%|██▍       | 4538/18500 [19:58:55<60:29:46, 15.60s/it]

training loss: 0.44755086302757263


training:  25%|██▍       | 4539/18500 [19:59:11<60:28:14, 15.59s/it]

training loss: 0.7002623081207275


training:  25%|██▍       | 4540/18500 [19:59:26<60:27:17, 15.59s/it]

training loss: 0.9090597629547119


training:  25%|██▍       | 4541/18500 [19:59:42<60:27:03, 15.59s/it]

training loss: 1.0153177976608276


training:  25%|██▍       | 4542/18500 [19:59:58<60:27:12, 15.59s/it]

training loss: 0.8074579834938049


training:  25%|██▍       | 4543/18500 [20:00:13<60:26:33, 15.59s/it]

training loss: 0.553887128829956


training:  25%|██▍       | 4544/18500 [20:00:29<60:26:44, 15.59s/it]

training loss: 0.8695100545883179


training:  25%|██▍       | 4545/18500 [20:00:44<60:26:27, 15.59s/it]

training loss: 0.48845016956329346


training:  25%|██▍       | 4546/18500 [20:01:00<60:25:46, 15.59s/it]

training loss: 0.47050172090530396


training:  25%|██▍       | 4547/18500 [20:01:16<60:25:03, 15.59s/it]

training loss: 0.6350175142288208


training:  25%|██▍       | 4548/18500 [20:01:31<60:24:27, 15.59s/it]

training loss: 0.8041565418243408


training:  25%|██▍       | 4549/18500 [20:01:47<60:25:37, 15.59s/it]

training loss: 0.939413845539093


training:  25%|██▍       | 4550/18500 [20:02:02<60:25:00, 15.59s/it]

training loss: 1.1976137161254883


training:  25%|██▍       | 4551/18500 [20:02:18<60:25:16, 15.59s/it]

training loss: 0.8816816806793213


training:  25%|██▍       | 4552/18500 [20:02:34<60:24:34, 15.59s/it]

training loss: 0.8771258592605591


training:  25%|██▍       | 4553/18500 [20:02:49<60:23:51, 15.59s/it]

training loss: 0.6769437193870544


training:  25%|██▍       | 4554/18500 [20:03:05<60:23:41, 15.59s/it]

training loss: 0.9615171551704407


training:  25%|██▍       | 4555/18500 [20:03:20<60:23:50, 15.59s/it]

training loss: 1.0146950483322144


training:  25%|██▍       | 4556/18500 [20:03:36<60:23:50, 15.59s/it]

training loss: 0.9670623540878296


training:  25%|██▍       | 4557/18500 [20:03:51<60:23:50, 15.59s/it]

training loss: 0.6837891340255737


training:  25%|██▍       | 4558/18500 [20:04:07<60:22:33, 15.59s/it]

training loss: 0.7830514311790466


training:  25%|██▍       | 4559/18500 [20:04:23<60:23:00, 15.59s/it]

training loss: 0.7610986232757568


training:  25%|██▍       | 4560/18500 [20:04:38<60:22:11, 15.59s/it]

training loss: 1.1851134300231934


training:  25%|██▍       | 4561/18500 [20:04:54<60:29:01, 15.62s/it]

training loss: 0.6305853724479675


training:  25%|██▍       | 4562/18500 [20:05:10<60:28:00, 15.62s/it]

training loss: 1.017470121383667


training:  25%|██▍       | 4563/18500 [20:05:25<60:26:42, 15.61s/it]

training loss: 0.8304102420806885


training:  25%|██▍       | 4564/18500 [20:05:41<60:25:04, 15.61s/it]

training loss: 0.795280396938324


training:  25%|██▍       | 4565/18500 [20:05:56<60:24:07, 15.60s/it]

training loss: 0.9096591472625732


training:  25%|██▍       | 4566/18500 [20:06:12<60:22:29, 15.60s/it]

training loss: 0.9739266633987427


training:  25%|██▍       | 4567/18500 [20:06:28<60:21:11, 15.59s/it]

training loss: 0.5006230473518372


training:  25%|██▍       | 4568/18500 [20:06:43<60:20:03, 15.59s/it]

training loss: 0.3381999135017395


training:  25%|██▍       | 4569/18500 [20:06:59<60:19:23, 15.59s/it]

training loss: 0.9865732192993164


training:  25%|██▍       | 4570/18500 [20:07:14<60:18:52, 15.59s/it]

training loss: 0.5870733857154846


training:  25%|██▍       | 4571/18500 [20:07:30<60:19:57, 15.59s/it]

training loss: 0.8931431770324707


training:  25%|██▍       | 4572/18500 [20:07:45<60:19:11, 15.59s/it]

training loss: 1.1822911500930786


training:  25%|██▍       | 4573/18500 [20:08:01<60:18:26, 15.59s/it]

training loss: 0.843610405921936


training:  25%|██▍       | 4574/18500 [20:08:17<60:18:11, 15.59s/it]

training loss: 1.0700008869171143


training:  25%|██▍       | 4575/18500 [20:08:32<60:18:16, 15.59s/it]

training loss: 0.4721895456314087


training:  25%|██▍       | 4576/18500 [20:08:48<60:17:59, 15.59s/it]

training loss: 0.4152570962905884


training:  25%|██▍       | 4577/18500 [20:09:03<60:16:41, 15.59s/it]

training loss: 0.4959065020084381


training:  25%|██▍       | 4578/18500 [20:09:19<60:16:37, 15.59s/it]

training loss: 0.5906326174736023


training:  25%|██▍       | 4579/18500 [20:09:35<60:15:38, 15.58s/it]

training loss: 0.9240376949310303


training:  25%|██▍       | 4580/18500 [20:09:50<60:14:37, 15.58s/it]

training loss: 0.961921215057373


training:  25%|██▍       | 4581/18500 [20:10:06<60:16:47, 15.59s/it]

training loss: 0.5846940875053406


training:  25%|██▍       | 4582/18500 [20:10:21<60:20:17, 15.61s/it]

training loss: 0.8986201286315918


training:  25%|██▍       | 4583/18500 [20:10:37<60:18:46, 15.60s/it]

training loss: 0.77522873878479


training:  25%|██▍       | 4584/18500 [20:10:53<60:16:33, 15.59s/it]

training loss: 0.9502542018890381


training:  25%|██▍       | 4585/18500 [20:11:08<60:15:44, 15.59s/it]

training loss: 1.000306487083435


training:  25%|██▍       | 4586/18500 [20:11:24<60:15:21, 15.59s/it]

training loss: 0.643491268157959


training:  25%|██▍       | 4587/18500 [20:11:39<60:14:55, 15.59s/it]

training loss: 0.6696480512619019


training:  25%|██▍       | 4588/18500 [20:11:55<60:15:46, 15.59s/it]

training loss: 0.5359479784965515


training:  25%|██▍       | 4589/18500 [20:12:10<60:13:56, 15.59s/it]

training loss: 0.7441437244415283


training:  25%|██▍       | 4590/18500 [20:12:26<60:14:20, 15.59s/it]

training loss: 0.845971405506134


training:  25%|██▍       | 4591/18500 [20:12:42<60:15:49, 15.60s/it]

training loss: 0.46102413535118103


training:  25%|██▍       | 4592/18500 [20:12:57<60:14:58, 15.60s/it]

training loss: 0.7734504342079163


training:  25%|██▍       | 4593/18500 [20:13:13<60:13:04, 15.59s/it]

training loss: 1.1803947687149048


training:  25%|██▍       | 4594/18500 [20:13:28<60:13:04, 15.59s/it]

training loss: 0.9566278457641602


training:  25%|██▍       | 4595/18500 [20:13:44<60:13:29, 15.59s/it]

training loss: 0.6588295698165894


training:  25%|██▍       | 4596/18500 [20:14:00<60:12:28, 15.59s/it]

training loss: 1.1763570308685303


training:  25%|██▍       | 4597/18500 [20:14:15<60:10:54, 15.58s/it]

training loss: 0.7338130474090576


training:  25%|██▍       | 4598/18500 [20:14:31<60:10:14, 15.58s/it]

training loss: 0.8903454542160034


training:  25%|██▍       | 4599/18500 [20:14:46<60:10:19, 15.58s/it]

training loss: 0.778901994228363


training:  25%|██▍       | 4600/18500 [20:15:02<60:10:04, 15.58s/it]

training loss: 1.0367037057876587
training loss: 0.6972330808639526


training:  25%|██▍       | 4601/18500 [20:15:19<61:51:22, 16.02s/it]

validation loss: 1.497838020324707


training:  25%|██▍       | 4602/18500 [20:15:35<61:21:25, 15.89s/it]

training loss: 0.7180551290512085


training:  25%|██▍       | 4603/18500 [20:15:50<60:59:39, 15.80s/it]

training loss: 0.7658957242965698


training:  25%|██▍       | 4604/18500 [20:16:06<60:44:58, 15.74s/it]

training loss: 0.8540745973587036


training:  25%|██▍       | 4605/18500 [20:16:21<60:35:29, 15.70s/it]

training loss: 0.7687400579452515


training:  25%|██▍       | 4606/18500 [20:16:37<60:28:49, 15.67s/it]

training loss: 0.6542767286300659


training:  25%|██▍       | 4607/18500 [20:16:53<60:22:45, 15.65s/it]

training loss: 0.865012526512146


training:  25%|██▍       | 4608/18500 [20:17:08<60:18:50, 15.63s/it]

training loss: 0.9729422926902771


training:  25%|██▍       | 4609/18500 [20:17:24<60:16:34, 15.62s/it]

training loss: 1.2399303913116455


training:  25%|██▍       | 4610/18500 [20:17:39<60:15:12, 15.62s/it]

training loss: 1.3964154720306396


training:  25%|██▍       | 4611/18500 [20:17:55<60:14:13, 15.61s/it]

training loss: 1.0122532844543457


training:  25%|██▍       | 4612/18500 [20:18:11<60:11:08, 15.60s/it]

training loss: 0.5131537318229675


training:  25%|██▍       | 4613/18500 [20:18:26<60:15:54, 15.62s/it]

training loss: 1.1741492748260498


training:  25%|██▍       | 4614/18500 [20:18:42<60:20:11, 15.64s/it]

training loss: 0.9979909062385559


training:  25%|██▍       | 4615/18500 [20:18:58<60:23:00, 15.66s/it]

training loss: 0.5980921387672424


training:  25%|██▍       | 4616/18500 [20:19:13<60:25:09, 15.67s/it]

training loss: 0.8124658465385437


training:  25%|██▍       | 4617/18500 [20:19:29<60:27:07, 15.68s/it]

training loss: 0.641664981842041


training:  25%|██▍       | 4618/18500 [20:19:45<60:28:55, 15.68s/it]

training loss: 0.8952591419219971


training:  25%|██▍       | 4619/18500 [20:20:00<60:28:07, 15.68s/it]

training loss: 0.906088650226593


training:  25%|██▍       | 4620/18500 [20:20:16<60:27:07, 15.68s/it]

training loss: 0.8190474510192871


training:  25%|██▍       | 4621/18500 [20:20:32<60:27:51, 15.68s/it]

training loss: 0.5941612124443054


training:  25%|██▍       | 4622/18500 [20:20:47<60:23:45, 15.67s/it]

training loss: 1.0631006956100464


training:  25%|██▍       | 4623/18500 [20:21:03<60:18:59, 15.65s/it]

training loss: 0.957504391670227


training:  25%|██▍       | 4624/18500 [20:21:19<60:16:13, 15.64s/it]

training loss: 0.4287719428539276


training:  25%|██▌       | 4625/18500 [20:21:34<60:12:16, 15.62s/it]

training loss: 0.6514139771461487


training:  25%|██▌       | 4626/18500 [20:21:50<60:08:54, 15.61s/it]

training loss: 0.8261064887046814


training:  25%|██▌       | 4627/18500 [20:22:05<60:06:52, 15.60s/it]

training loss: 0.9236024618148804


training:  25%|██▌       | 4628/18500 [20:22:21<60:06:04, 15.60s/it]

training loss: 0.987593412399292


training:  25%|██▌       | 4629/18500 [20:22:36<60:05:19, 15.60s/it]

training loss: 0.7658194899559021


training:  25%|██▌       | 4630/18500 [20:22:52<60:03:45, 15.59s/it]

training loss: 0.9809445142745972


training:  25%|██▌       | 4631/18500 [20:23:08<60:03:18, 15.59s/it]

training loss: 0.8816116452217102


training:  25%|██▌       | 4632/18500 [20:23:23<60:02:53, 15.59s/it]

training loss: 0.6552833318710327


training:  25%|██▌       | 4633/18500 [20:23:39<60:02:11, 15.59s/it]

training loss: 0.9068393707275391


training:  25%|██▌       | 4634/18500 [20:23:54<60:02:21, 15.59s/it]

training loss: 0.6019451022148132


training:  25%|██▌       | 4635/18500 [20:24:10<60:01:53, 15.59s/it]

training loss: 1.1312977075576782


training:  25%|██▌       | 4636/18500 [20:24:26<60:02:36, 15.59s/it]

training loss: 0.8358486890792847


training:  25%|██▌       | 4637/18500 [20:24:41<60:02:28, 15.59s/it]

training loss: 0.5434315204620361


training:  25%|██▌       | 4638/18500 [20:24:57<60:02:20, 15.59s/it]

training loss: 0.5652119517326355


training:  25%|██▌       | 4639/18500 [20:25:12<60:01:13, 15.59s/it]

training loss: 0.9959704875946045


training:  25%|██▌       | 4640/18500 [20:25:28<60:00:45, 15.59s/it]

training loss: 0.456839382648468


training:  25%|██▌       | 4641/18500 [20:25:44<60:01:39, 15.59s/it]

training loss: 0.9147197008132935


training:  25%|██▌       | 4642/18500 [20:25:59<60:01:02, 15.59s/it]

training loss: 0.6748771667480469


training:  25%|██▌       | 4643/18500 [20:26:15<59:59:26, 15.59s/it]

training loss: 0.7045226693153381


training:  25%|██▌       | 4644/18500 [20:26:30<59:58:41, 15.58s/it]

training loss: 0.48683688044548035


training:  25%|██▌       | 4645/18500 [20:26:46<59:58:50, 15.59s/it]

training loss: 0.6558974385261536


training:  25%|██▌       | 4646/18500 [20:27:01<59:59:07, 15.59s/it]

training loss: 1.193887710571289


training:  25%|██▌       | 4647/18500 [20:27:17<59:59:41, 15.59s/it]

training loss: 0.6629993915557861


training:  25%|██▌       | 4648/18500 [20:27:33<59:59:31, 15.59s/it]

training loss: 0.6307297945022583


training:  25%|██▌       | 4649/18500 [20:27:48<59:58:47, 15.59s/it]

training loss: 0.43612000346183777


training:  25%|██▌       | 4650/18500 [20:28:04<59:58:18, 15.59s/it]

training loss: 0.8213269710540771


training:  25%|██▌       | 4651/18500 [20:28:19<59:58:36, 15.59s/it]

training loss: 0.41105037927627563


training:  25%|██▌       | 4652/18500 [20:28:35<59:58:07, 15.59s/it]

training loss: 1.0613540410995483


training:  25%|██▌       | 4653/18500 [20:28:51<59:56:47, 15.59s/it]

training loss: 0.6426353454589844


training:  25%|██▌       | 4654/18500 [20:29:06<59:55:54, 15.58s/it]

training loss: 0.7819302082061768


training:  25%|██▌       | 4655/18500 [20:29:22<59:56:14, 15.59s/it]

training loss: 0.9902321100234985


training:  25%|██▌       | 4656/18500 [20:29:37<59:56:43, 15.59s/it]

training loss: 0.8558460474014282


training:  25%|██▌       | 4657/18500 [20:29:53<59:55:46, 15.59s/it]

training loss: 0.9216724038124084


training:  25%|██▌       | 4658/18500 [20:30:09<59:54:53, 15.58s/it]

training loss: 0.4450090825557709


training:  25%|██▌       | 4659/18500 [20:30:24<59:55:13, 15.59s/it]

training loss: 1.108985185623169


training:  25%|██▌       | 4660/18500 [20:30:40<59:55:34, 15.59s/it]

training loss: 0.7775258421897888


training:  25%|██▌       | 4661/18500 [20:30:55<59:56:17, 15.59s/it]

training loss: 0.6217094659805298


training:  25%|██▌       | 4662/18500 [20:31:11<59:54:57, 15.59s/it]

training loss: 0.7270861864089966


training:  25%|██▌       | 4663/18500 [20:31:26<59:54:29, 15.59s/it]

training loss: 0.5850990414619446


training:  25%|██▌       | 4664/18500 [20:31:42<59:54:21, 15.59s/it]

training loss: 0.9365331530570984


training:  25%|██▌       | 4665/18500 [20:31:58<59:54:22, 15.59s/it]

training loss: 1.0792735815048218


training:  25%|██▌       | 4666/18500 [20:32:13<59:52:55, 15.58s/it]

training loss: 0.9174902439117432


training:  25%|██▌       | 4667/18500 [20:32:29<59:54:02, 15.59s/it]

training loss: 0.8602792620658875


training:  25%|██▌       | 4668/18500 [20:32:44<59:54:09, 15.59s/it]

training loss: 0.37844008207321167


training:  25%|██▌       | 4669/18500 [20:33:00<59:53:14, 15.59s/it]

training loss: 0.49911874532699585


training:  25%|██▌       | 4670/18500 [20:33:16<59:52:17, 15.58s/it]

training loss: 1.3526439666748047


training:  25%|██▌       | 4671/18500 [20:33:31<59:52:29, 15.59s/it]

training loss: 1.0518721342086792


training:  25%|██▌       | 4672/18500 [20:33:47<59:53:23, 15.59s/it]

training loss: 0.7644246816635132


training:  25%|██▌       | 4673/18500 [20:34:02<59:52:46, 15.59s/it]

training loss: 0.9899656176567078


training:  25%|██▌       | 4674/18500 [20:34:18<59:52:26, 15.59s/it]

training loss: 1.0471798181533813


training:  25%|██▌       | 4675/18500 [20:34:34<59:52:16, 15.59s/it]

training loss: 0.5325182676315308


training:  25%|██▌       | 4676/18500 [20:34:49<59:50:59, 15.59s/it]

training loss: 0.35983508825302124


training:  25%|██▌       | 4677/18500 [20:35:05<59:50:10, 15.58s/it]

training loss: 0.6722437143325806


training:  25%|██▌       | 4678/18500 [20:35:20<59:50:29, 15.59s/it]

training loss: 0.7108108401298523


training:  25%|██▌       | 4679/18500 [20:35:36<59:50:38, 15.59s/it]

training loss: 0.5817357301712036


training:  25%|██▌       | 4680/18500 [20:35:51<59:49:31, 15.58s/it]

training loss: 0.9852501153945923


training:  25%|██▌       | 4681/18500 [20:36:07<59:48:10, 15.58s/it]

training loss: 0.6108954548835754


training:  25%|██▌       | 4682/18500 [20:36:23<59:48:10, 15.58s/it]

training loss: 1.01198148727417


training:  25%|██▌       | 4683/18500 [20:36:38<59:47:47, 15.58s/it]

training loss: 0.9053239822387695


training:  25%|██▌       | 4684/18500 [20:36:54<59:47:29, 15.58s/it]

training loss: 0.7234632968902588


training:  25%|██▌       | 4685/18500 [20:37:09<59:46:40, 15.58s/it]

training loss: 0.9499242901802063


training:  25%|██▌       | 4686/18500 [20:37:25<59:47:46, 15.58s/it]

training loss: 0.7765072584152222


training:  25%|██▌       | 4687/18500 [20:37:40<59:47:27, 15.58s/it]

training loss: 0.6242853403091431


training:  25%|██▌       | 4688/18500 [20:37:56<59:46:30, 15.58s/it]

training loss: 0.46212539076805115


training:  25%|██▌       | 4689/18500 [20:38:12<59:45:05, 15.57s/it]

training loss: 1.0800204277038574


training:  25%|██▌       | 4690/18500 [20:38:27<59:45:31, 15.58s/it]

training loss: 0.5100544691085815


training:  25%|██▌       | 4691/18500 [20:38:43<59:45:47, 15.58s/it]

training loss: 0.5925320982933044


training:  25%|██▌       | 4692/18500 [20:38:58<59:45:39, 15.58s/it]

training loss: 0.4452498257160187


training:  25%|██▌       | 4693/18500 [20:39:14<59:45:50, 15.58s/it]

training loss: 0.9169478416442871


training:  25%|██▌       | 4694/18500 [20:39:30<59:45:31, 15.58s/it]

training loss: 0.8472025394439697


training:  25%|██▌       | 4695/18500 [20:39:45<59:45:40, 15.58s/it]

training loss: 0.8151735067367554


training:  25%|██▌       | 4696/18500 [20:40:01<59:44:59, 15.58s/it]

training loss: 0.7886223196983337


training:  25%|██▌       | 4697/18500 [20:40:16<59:44:07, 15.58s/it]

training loss: 1.1292873620986938


training:  25%|██▌       | 4698/18500 [20:40:32<59:44:35, 15.58s/it]

training loss: 0.8400809168815613


training:  25%|██▌       | 4699/18500 [20:40:47<59:45:08, 15.59s/it]

training loss: 0.8719984292984009


training:  25%|██▌       | 4700/18500 [20:41:03<59:43:37, 15.58s/it]

training loss: 0.637406051158905
training loss: 0.9205232858657837


training:  25%|██▌       | 4701/18500 [20:41:20<61:20:13, 16.00s/it]

validation loss: 1.4880456924438477


training:  25%|██▌       | 4702/18500 [20:41:36<60:52:29, 15.88s/it]

training loss: 0.7454239726066589


training:  25%|██▌       | 4703/18500 [20:41:51<60:31:03, 15.79s/it]

training loss: 0.6518003344535828


training:  25%|██▌       | 4704/18500 [20:42:07<60:15:39, 15.72s/it]

training loss: 0.5912482738494873


training:  25%|██▌       | 4705/18500 [20:42:22<60:05:28, 15.68s/it]

training loss: 0.9581467509269714


training:  25%|██▌       | 4706/18500 [20:42:38<59:58:19, 15.65s/it]

training loss: 0.7287992835044861


training:  25%|██▌       | 4707/18500 [20:42:54<59:52:31, 15.63s/it]

training loss: 0.4401916563510895


training:  25%|██▌       | 4708/18500 [20:43:09<59:49:01, 15.61s/it]

training loss: 1.1003049612045288


training:  25%|██▌       | 4709/18500 [20:43:25<59:47:13, 15.61s/it]

training loss: 1.1250287294387817


training:  25%|██▌       | 4710/18500 [20:43:40<59:46:23, 15.60s/it]

training loss: 0.7779422998428345


training:  25%|██▌       | 4711/18500 [20:43:56<59:44:07, 15.60s/it]

training loss: 0.9165926575660706


training:  25%|██▌       | 4712/18500 [20:44:11<59:42:01, 15.59s/it]

training loss: 0.7303228378295898


training:  25%|██▌       | 4713/18500 [20:44:27<59:40:49, 15.58s/it]

training loss: 0.6259075999259949


training:  25%|██▌       | 4714/18500 [20:44:43<59:40:29, 15.58s/it]

training loss: 0.6646835803985596


training:  25%|██▌       | 4715/18500 [20:44:58<59:40:04, 15.58s/it]

training loss: 0.5858149528503418


training:  25%|██▌       | 4716/18500 [20:45:14<59:38:49, 15.58s/it]

training loss: 0.8737846612930298


training:  25%|██▌       | 4717/18500 [20:45:29<59:38:43, 15.58s/it]

training loss: 0.8611780405044556


training:  26%|██▌       | 4718/18500 [20:45:45<59:44:14, 15.60s/it]

training loss: 0.8782365322113037


training:  26%|██▌       | 4719/18500 [20:46:01<59:51:48, 15.64s/it]

training loss: 0.989197850227356


training:  26%|██▌       | 4720/18500 [20:46:16<59:55:36, 15.66s/it]

training loss: 0.8445417284965515


training:  26%|██▌       | 4721/18500 [20:46:32<59:58:48, 15.67s/it]

training loss: 0.4645398259162903


training:  26%|██▌       | 4722/18500 [20:46:48<60:00:45, 15.68s/it]

training loss: 0.8001909255981445


training:  26%|██▌       | 4723/18500 [20:47:03<60:00:30, 15.68s/it]

training loss: 0.9808734655380249


training:  26%|██▌       | 4724/18500 [20:47:19<60:01:05, 15.68s/it]

training loss: 0.8730911016464233


training:  26%|██▌       | 4725/18500 [20:47:35<60:00:58, 15.68s/it]

training loss: 0.7602053880691528


training:  26%|██▌       | 4726/18500 [20:47:51<60:00:01, 15.68s/it]

training loss: 0.6266536712646484


training:  26%|██▌       | 4727/18500 [20:48:06<59:57:45, 15.67s/it]

training loss: 0.6487618088722229


training:  26%|██▌       | 4728/18500 [20:48:22<59:57:44, 15.67s/it]

training loss: 0.9675812721252441


training:  26%|██▌       | 4729/18500 [20:48:38<59:56:02, 15.67s/it]

training loss: 1.1809134483337402


training:  26%|██▌       | 4730/18500 [20:48:53<59:50:49, 15.65s/it]

training loss: 0.8304659128189087


training:  26%|██▌       | 4731/18500 [20:49:09<59:45:44, 15.63s/it]

training loss: 0.6505435705184937


training:  26%|██▌       | 4732/18500 [20:49:24<59:43:02, 15.61s/it]

training loss: 0.7426779866218567


training:  26%|██▌       | 4733/18500 [20:49:40<59:41:37, 15.61s/it]

training loss: 0.7528858184814453


training:  26%|██▌       | 4734/18500 [20:49:55<59:39:34, 15.60s/it]

training loss: 0.776978075504303


training:  26%|██▌       | 4735/18500 [20:50:11<59:37:11, 15.59s/it]

training loss: 0.7683514952659607


training:  26%|██▌       | 4736/18500 [20:50:27<59:35:42, 15.59s/it]

training loss: 0.6025406122207642


training:  26%|██▌       | 4737/18500 [20:50:42<59:35:23, 15.59s/it]

training loss: 0.7840594053268433


training:  26%|██▌       | 4738/18500 [20:50:58<59:34:38, 15.58s/it]

training loss: 0.9006496667861938


training:  26%|██▌       | 4739/18500 [20:51:13<59:33:54, 15.58s/it]

training loss: 0.878594160079956


training:  26%|██▌       | 4740/18500 [20:51:29<59:34:06, 15.58s/it]

training loss: 0.6015329360961914


training:  26%|██▌       | 4741/18500 [20:51:45<59:33:54, 15.59s/it]

training loss: 1.1470328569412231


training:  26%|██▌       | 4742/18500 [20:52:00<59:33:16, 15.58s/it]

training loss: 0.5895870923995972


training:  26%|██▌       | 4743/18500 [20:52:16<59:32:22, 15.58s/it]

training loss: 1.2150388956069946


training:  26%|██▌       | 4744/18500 [20:52:31<59:32:06, 15.58s/it]

training loss: 1.5578052997589111


training:  26%|██▌       | 4745/18500 [20:52:47<59:33:00, 15.59s/it]

training loss: 1.2542824745178223


training:  26%|██▌       | 4746/18500 [20:53:02<59:32:30, 15.58s/it]

training loss: 0.6449234485626221


training:  26%|██▌       | 4747/18500 [20:53:18<59:32:04, 15.58s/it]

training loss: 0.8277175426483154


training:  26%|██▌       | 4748/18500 [20:53:34<59:31:38, 15.58s/it]

training loss: 1.2854907512664795


training:  26%|██▌       | 4749/18500 [20:53:49<59:31:16, 15.58s/it]

training loss: 1.3450400829315186


training:  26%|██▌       | 4750/18500 [20:54:05<59:31:25, 15.58s/it]

training loss: 0.7579684853553772


training:  26%|██▌       | 4751/18500 [20:54:20<59:31:14, 15.58s/it]

training loss: 0.5428711771965027


training:  26%|██▌       | 4752/18500 [20:54:36<59:31:00, 15.58s/it]

training loss: 1.1011672019958496


training:  26%|██▌       | 4753/18500 [20:54:52<59:29:44, 15.58s/it]

training loss: 0.8857806921005249


training:  26%|██▌       | 4754/18500 [20:55:07<59:28:33, 15.58s/it]

training loss: 0.44149890542030334


training:  26%|██▌       | 4755/18500 [20:55:23<59:28:40, 15.58s/it]

training loss: 0.6136694550514221


training:  26%|██▌       | 4756/18500 [20:55:38<59:28:08, 15.58s/it]

training loss: 1.0440471172332764


training:  26%|██▌       | 4757/18500 [20:55:54<59:28:40, 15.58s/it]

training loss: 1.1196585893630981


training:  26%|██▌       | 4758/18500 [20:56:09<59:28:51, 15.58s/it]

training loss: 1.0708398818969727


training:  26%|██▌       | 4759/18500 [20:56:25<59:30:08, 15.59s/it]

training loss: 0.8989449143409729


training:  26%|██▌       | 4760/18500 [20:56:41<59:29:43, 15.59s/it]

training loss: 1.2576384544372559


training:  26%|██▌       | 4761/18500 [20:56:56<59:29:07, 15.59s/it]

training loss: 0.5956081748008728


training:  26%|██▌       | 4762/18500 [20:57:12<59:28:35, 15.59s/it]

training loss: 1.1687484979629517


training:  26%|██▌       | 4763/18500 [20:57:27<59:29:15, 15.59s/it]

training loss: 0.981726348400116


training:  26%|██▌       | 4764/18500 [20:57:43<59:29:20, 15.59s/it]

training loss: 1.1882109642028809


training:  26%|██▌       | 4765/18500 [20:57:59<59:30:28, 15.60s/it]

training loss: 0.980758011341095


training:  26%|██▌       | 4766/18500 [20:58:14<59:30:38, 15.60s/it]

training loss: 0.3155090808868408


training:  26%|██▌       | 4767/18500 [20:58:30<59:31:21, 15.60s/it]

training loss: 1.3435959815979004


training:  26%|██▌       | 4768/18500 [20:58:45<59:31:04, 15.60s/it]

training loss: 1.1215567588806152


training:  26%|██▌       | 4769/18500 [20:59:01<59:29:48, 15.60s/it]

training loss: 0.951215922832489


training:  26%|██▌       | 4770/18500 [20:59:17<59:30:37, 15.60s/it]

training loss: 1.1835473775863647


training:  26%|██▌       | 4771/18500 [20:59:32<59:29:26, 15.60s/it]

training loss: 0.6837735176086426


training:  26%|██▌       | 4772/18500 [20:59:48<59:29:03, 15.60s/it]

training loss: 1.2929913997650146


training:  26%|██▌       | 4773/18500 [21:00:03<59:27:33, 15.59s/it]

training loss: 1.2378785610198975


training:  26%|██▌       | 4774/18500 [21:00:19<59:27:10, 15.59s/it]

training loss: 0.8828476667404175


training:  26%|██▌       | 4775/18500 [21:00:35<59:26:02, 15.59s/it]

training loss: 0.5442659854888916


training:  26%|██▌       | 4776/18500 [21:00:50<59:25:47, 15.59s/it]

training loss: 1.1532461643218994


training:  26%|██▌       | 4777/18500 [21:01:06<59:25:56, 15.59s/it]

training loss: 1.1118143796920776


training:  26%|██▌       | 4778/18500 [21:01:21<59:27:08, 15.60s/it]

training loss: 0.7986294031143188


training:  26%|██▌       | 4779/18500 [21:01:37<59:25:08, 15.59s/it]

training loss: 0.9777171611785889


training:  26%|██▌       | 4780/18500 [21:01:52<59:23:35, 15.58s/it]

training loss: 1.2180204391479492


training:  26%|██▌       | 4781/18500 [21:02:08<59:23:34, 15.59s/it]

training loss: 1.2595012187957764


training:  26%|██▌       | 4782/18500 [21:02:24<59:23:17, 15.59s/it]

training loss: 1.0558122396469116


training:  26%|██▌       | 4783/18500 [21:02:39<59:22:28, 15.58s/it]

training loss: 1.124945878982544


training:  26%|██▌       | 4784/18500 [21:02:55<59:22:15, 15.58s/it]

training loss: 2.3231356143951416


training:  26%|██▌       | 4785/18500 [21:03:10<59:21:47, 15.58s/it]

training loss: 2.3713953495025635


training:  26%|██▌       | 4786/18500 [21:03:26<59:21:53, 15.58s/it]

training loss: 3.8901782035827637


training:  26%|██▌       | 4787/18500 [21:03:42<59:21:04, 15.58s/it]

training loss: 3.5874552726745605


training:  26%|██▌       | 4788/18500 [21:03:57<59:21:09, 15.58s/it]

training loss: 4.086936950683594


training:  26%|██▌       | 4789/18500 [21:04:13<59:20:46, 15.58s/it]

training loss: 3.680602788925171


training:  26%|██▌       | 4790/18500 [21:04:28<59:21:39, 15.59s/it]

training loss: 4.672215461730957


training:  26%|██▌       | 4791/18500 [21:04:44<59:21:25, 15.59s/it]

training loss: 3.9898576736450195


training:  26%|██▌       | 4792/18500 [21:04:59<59:21:02, 15.59s/it]

training loss: 3.83687686920166


training:  26%|██▌       | 4793/18500 [21:05:15<59:20:13, 15.58s/it]

training loss: 3.2378079891204834


training:  26%|██▌       | 4794/18500 [21:05:31<59:19:41, 15.58s/it]

training loss: 3.0421133041381836


training:  26%|██▌       | 4795/18500 [21:05:46<59:19:15, 15.58s/it]

training loss: 3.096395969390869


training:  26%|██▌       | 4796/18500 [21:06:02<59:19:17, 15.58s/it]

training loss: 2.9943509101867676


training:  26%|██▌       | 4797/18500 [21:06:17<59:19:27, 15.59s/it]

training loss: 2.7525596618652344


training:  26%|██▌       | 4798/18500 [21:06:33<59:19:21, 15.59s/it]

training loss: 2.701770544052124


training:  26%|██▌       | 4799/18500 [21:06:49<59:18:13, 15.58s/it]

training loss: 2.4222638607025146


training:  26%|██▌       | 4800/18500 [21:07:04<59:17:24, 15.58s/it]

training loss: 2.2249255180358887
training loss: 1.9521795511245728


training:  26%|██▌       | 4801/18500 [21:07:21<60:54:35, 16.01s/it]

validation loss: 1.9353541135787964


training:  26%|██▌       | 4802/18500 [21:07:37<60:25:13, 15.88s/it]

training loss: 1.4655179977416992


training:  26%|██▌       | 4803/18500 [21:07:52<60:04:30, 15.79s/it]

training loss: 1.2744227647781372


training:  26%|██▌       | 4804/18500 [21:08:08<59:50:10, 15.73s/it]

training loss: 1.3831675052642822


training:  26%|██▌       | 4805/18500 [21:08:23<59:40:00, 15.68s/it]

training loss: 1.4096839427947998


training:  26%|██▌       | 4806/18500 [21:08:39<59:32:30, 15.65s/it]

training loss: 1.2900168895721436


training:  26%|██▌       | 4807/18500 [21:08:55<59:27:23, 15.63s/it]

training loss: 1.65977144241333


training:  26%|██▌       | 4808/18500 [21:09:10<59:22:50, 15.61s/it]

training loss: 1.4120275974273682


training:  26%|██▌       | 4809/18500 [21:09:26<59:21:13, 15.61s/it]

training loss: 1.3145636320114136


training:  26%|██▌       | 4810/18500 [21:09:41<59:20:00, 15.60s/it]

training loss: 1.364786982536316


training:  26%|██▌       | 4811/18500 [21:09:57<59:18:22, 15.60s/it]

training loss: 1.0521643161773682


training:  26%|██▌       | 4812/18500 [21:10:13<59:17:35, 15.59s/it]

training loss: 1.5919177532196045


training:  26%|██▌       | 4813/18500 [21:10:28<59:16:53, 15.59s/it]

training loss: 2.3659610748291016


training:  26%|██▌       | 4814/18500 [21:10:44<59:15:28, 15.59s/it]

training loss: 1.728320837020874


training:  26%|██▌       | 4815/18500 [21:10:59<59:14:59, 15.59s/it]

training loss: 1.2725906372070312


training:  26%|██▌       | 4816/18500 [21:11:15<59:14:28, 15.59s/it]

training loss: 1.8426501750946045


training:  26%|██▌       | 4817/18500 [21:11:30<59:14:00, 15.58s/it]

training loss: 1.6231322288513184


training:  26%|██▌       | 4818/18500 [21:11:46<59:13:27, 15.58s/it]

training loss: 1.4550726413726807


training:  26%|██▌       | 4819/18500 [21:12:02<59:12:49, 15.58s/it]

training loss: 1.129543662071228


training:  26%|██▌       | 4820/18500 [21:12:17<59:13:19, 15.58s/it]

training loss: 1.0555180311203003


training:  26%|██▌       | 4821/18500 [21:12:33<59:13:42, 15.59s/it]

training loss: 1.5796228647232056


training:  26%|██▌       | 4822/18500 [21:12:48<59:13:17, 15.59s/it]

training loss: 1.1144943237304688


training:  26%|██▌       | 4823/18500 [21:13:04<59:12:17, 15.58s/it]

training loss: 1.0037347078323364


training:  26%|██▌       | 4824/18500 [21:13:20<59:13:27, 15.59s/it]

training loss: 1.4272961616516113


training:  26%|██▌       | 4825/18500 [21:13:35<59:12:25, 15.59s/it]

training loss: 1.3351364135742188


training:  26%|██▌       | 4826/18500 [21:13:51<59:11:22, 15.58s/it]

training loss: 1.12264084815979


training:  26%|██▌       | 4827/18500 [21:14:06<59:11:24, 15.58s/it]

training loss: 1.0285841226577759


training:  26%|██▌       | 4828/18500 [21:14:22<59:11:36, 15.59s/it]

training loss: 0.8944897055625916


training:  26%|██▌       | 4829/18500 [21:14:38<59:11:01, 15.58s/it]

training loss: 1.3074729442596436


training:  26%|██▌       | 4830/18500 [21:14:53<59:10:30, 15.58s/it]

training loss: 1.1968687772750854


training:  26%|██▌       | 4831/18500 [21:15:09<59:11:27, 15.59s/it]

training loss: 1.1761753559112549


training:  26%|██▌       | 4832/18500 [21:15:24<59:11:53, 15.59s/it]

training loss: 1.1207456588745117


training:  26%|██▌       | 4833/18500 [21:15:40<59:10:51, 15.59s/it]

training loss: 1.1430253982543945


training:  26%|██▌       | 4834/18500 [21:15:55<59:10:10, 15.59s/it]

training loss: 0.8002025485038757


training:  26%|██▌       | 4835/18500 [21:16:11<59:09:09, 15.58s/it]

training loss: 1.3005897998809814


training:  26%|██▌       | 4836/18500 [21:16:27<59:08:43, 15.58s/it]

training loss: 1.3004313707351685


training:  26%|██▌       | 4837/18500 [21:16:42<59:08:43, 15.58s/it]

training loss: 1.3623368740081787


training:  26%|██▌       | 4838/18500 [21:16:58<59:08:42, 15.58s/it]

training loss: 0.971608579158783


training:  26%|██▌       | 4839/18500 [21:17:13<59:10:30, 15.59s/it]

training loss: 0.9368650913238525


training:  26%|██▌       | 4840/18500 [21:17:29<59:16:13, 15.62s/it]

training loss: 1.2019140720367432


training:  26%|██▌       | 4841/18500 [21:17:45<59:20:08, 15.64s/it]

training loss: 0.9676344394683838


training:  26%|██▌       | 4842/18500 [21:18:00<59:22:41, 15.65s/it]

training loss: 1.1647565364837646


training:  26%|██▌       | 4843/18500 [21:18:16<59:22:42, 15.65s/it]

training loss: 0.8807296752929688


training:  26%|██▌       | 4844/18500 [21:18:32<59:23:07, 15.66s/it]

training loss: 1.7682063579559326


training:  26%|██▌       | 4845/18500 [21:18:47<59:23:44, 15.66s/it]

training loss: 1.200219988822937


training:  26%|██▌       | 4846/18500 [21:19:03<59:22:47, 15.66s/it]

training loss: 1.3506152629852295


training:  26%|██▌       | 4847/18500 [21:19:19<59:23:02, 15.66s/it]

training loss: 1.6022764444351196


training:  26%|██▌       | 4848/18500 [21:19:34<59:20:48, 15.65s/it]

training loss: 1.7953647375106812


training:  26%|██▌       | 4849/18500 [21:19:50<59:25:37, 15.67s/it]

training loss: 1.0277653932571411


training:  26%|██▌       | 4850/18500 [21:20:06<59:23:08, 15.66s/it]

training loss: 3.104119062423706


training:  26%|██▌       | 4851/18500 [21:20:21<59:21:30, 15.66s/it]

training loss: 13.546509742736816


training:  26%|██▌       | 4852/18500 [21:20:37<59:16:48, 15.64s/it]

training loss: 33.43149185180664


training:  26%|██▌       | 4853/18500 [21:20:53<59:12:35, 15.62s/it]

training loss: 47.032920837402344


training:  26%|██▌       | 4854/18500 [21:21:08<59:10:22, 15.61s/it]

training loss: 62.58930206298828


training:  26%|██▌       | 4855/18500 [21:21:24<59:09:05, 15.61s/it]

training loss: 73.57289123535156


training:  26%|██▌       | 4856/18500 [21:21:39<59:06:51, 15.60s/it]

training loss: 80.85151672363281


training:  26%|██▋       | 4857/18500 [21:21:55<59:05:20, 15.59s/it]

training loss: 97.53422546386719


training:  26%|██▋       | 4858/18500 [21:22:10<59:03:22, 15.58s/it]

training loss: 114.32186889648438


training:  26%|██▋       | 4859/18500 [21:22:26<59:02:54, 15.58s/it]

training loss: 130.8704376220703


training:  26%|██▋       | 4860/18500 [21:22:42<59:01:12, 15.58s/it]

training loss: 154.62890625


training:  26%|██▋       | 4861/18500 [21:22:57<59:00:57, 15.58s/it]

training loss: 196.8556671142578


training:  26%|██▋       | 4862/18500 [21:23:13<58:59:43, 15.57s/it]

training loss: 198.19422912597656


training:  26%|██▋       | 4863/18500 [21:23:28<58:59:53, 15.57s/it]

training loss: 215.00277709960938


training:  26%|██▋       | 4864/18500 [21:23:44<58:59:02, 15.57s/it]

training loss: 239.23912048339844


training:  26%|██▋       | 4865/18500 [21:23:59<58:58:12, 15.57s/it]

training loss: 238.57225036621094


training:  26%|██▋       | 4866/18500 [21:24:15<58:57:17, 15.57s/it]

training loss: 245.76336669921875


training:  26%|██▋       | 4867/18500 [21:24:31<58:56:55, 15.57s/it]

training loss: 278.6688232421875


training:  26%|██▋       | 4868/18500 [21:24:46<58:56:40, 15.57s/it]

training loss: 293.6950378417969


training:  26%|██▋       | 4869/18500 [21:25:02<58:56:49, 15.57s/it]

training loss: 283.496337890625


training:  26%|██▋       | 4870/18500 [21:25:17<58:57:08, 15.57s/it]

training loss: 280.6319580078125


training:  26%|██▋       | 4871/18500 [21:25:33<58:57:19, 15.57s/it]

training loss: 269.8590393066406


training:  26%|██▋       | 4872/18500 [21:25:48<58:55:58, 15.57s/it]

training loss: 273.88720703125


training:  26%|██▋       | 4873/18500 [21:26:04<58:55:13, 15.57s/it]

training loss: 278.522705078125


training:  26%|██▋       | 4874/18500 [21:26:20<58:55:13, 15.57s/it]

training loss: 297.74871826171875


training:  26%|██▋       | 4875/18500 [21:26:35<58:55:00, 15.57s/it]

training loss: 293.12139892578125


training:  26%|██▋       | 4876/18500 [21:26:51<58:54:36, 15.57s/it]

training loss: 289.3255920410156


training:  26%|██▋       | 4877/18500 [21:27:06<58:54:33, 15.57s/it]

training loss: 282.5282287597656


training:  26%|██▋       | 4878/18500 [21:27:22<58:54:50, 15.57s/it]

training loss: 254.39488220214844


training:  26%|██▋       | 4879/18500 [21:27:37<58:54:16, 15.57s/it]

training loss: 253.44168090820312


training:  26%|██▋       | 4880/18500 [21:27:53<58:53:25, 15.57s/it]

training loss: 220.12477111816406


training:  26%|██▋       | 4881/18500 [21:28:09<58:52:56, 15.56s/it]

training loss: 220.58734130859375


training:  26%|██▋       | 4882/18500 [21:28:24<58:53:20, 15.57s/it]

training loss: 217.29074096679688


training:  26%|██▋       | 4883/18500 [21:28:40<58:53:30, 15.57s/it]

training loss: 209.24697875976562


training:  26%|██▋       | 4884/18500 [21:28:55<58:54:03, 15.57s/it]

training loss: 188.025146484375


training:  26%|██▋       | 4885/18500 [21:29:11<58:52:25, 15.57s/it]

training loss: 175.8621063232422


training:  26%|██▋       | 4886/18500 [21:29:26<58:52:44, 15.57s/it]

training loss: 145.6525421142578


training:  26%|██▋       | 4887/18500 [21:29:42<58:52:11, 15.57s/it]

training loss: 142.1233673095703


training:  26%|██▋       | 4888/18500 [21:29:58<58:53:12, 15.57s/it]

training loss: 140.45248413085938


training:  26%|██▋       | 4889/18500 [21:30:13<58:52:34, 15.57s/it]

training loss: 151.09303283691406


training:  26%|██▋       | 4890/18500 [21:30:29<58:53:05, 15.58s/it]

training loss: 169.76353454589844


training:  26%|██▋       | 4891/18500 [21:30:44<58:52:17, 15.57s/it]

training loss: 197.01695251464844


training:  26%|██▋       | 4892/18500 [21:31:00<58:51:51, 15.57s/it]

training loss: 204.18650817871094


training:  26%|██▋       | 4893/18500 [21:31:15<58:50:55, 15.57s/it]

training loss: 193.91485595703125


training:  26%|██▋       | 4894/18500 [21:31:31<58:50:33, 15.57s/it]

training loss: 180.02418518066406


training:  26%|██▋       | 4895/18500 [21:31:47<58:50:36, 15.57s/it]

training loss: 168.7879180908203


training:  26%|██▋       | 4896/18500 [21:32:02<58:51:19, 15.57s/it]

training loss: 152.84170532226562


training:  26%|██▋       | 4897/18500 [21:32:18<58:51:48, 15.58s/it]

training loss: 144.05377197265625


training:  26%|██▋       | 4898/18500 [21:32:33<58:51:55, 15.58s/it]

training loss: 107.68135833740234


training:  26%|██▋       | 4899/18500 [21:32:49<58:52:00, 15.58s/it]

training loss: 93.24208068847656


training:  26%|██▋       | 4900/18500 [21:33:04<58:51:16, 15.58s/it]

training loss: 94.55812072753906
training loss: 85.47517395019531


training:  26%|██▋       | 4901/18500 [21:33:21<60:25:49, 16.00s/it]

validation loss: 108.41709899902344


training:  26%|██▋       | 4902/18500 [21:33:37<59:58:03, 15.88s/it]

training loss: 103.0837631225586


training:  27%|██▋       | 4903/18500 [21:33:53<59:37:03, 15.78s/it]

training loss: 113.10620880126953


training:  27%|██▋       | 4904/18500 [21:34:08<59:22:29, 15.72s/it]

training loss: 103.76168823242188


training:  27%|██▋       | 4905/18500 [21:34:24<59:12:54, 15.68s/it]

training loss: 102.38289642333984


training:  27%|██▋       | 4906/18500 [21:34:39<59:05:56, 15.65s/it]

training loss: 99.4639892578125


training:  27%|██▋       | 4907/18500 [21:34:55<59:01:17, 15.63s/it]

training loss: 102.14104461669922


training:  27%|██▋       | 4908/18500 [21:35:10<58:56:19, 15.61s/it]

training loss: 101.76258087158203


training:  27%|██▋       | 4909/18500 [21:35:26<58:55:32, 15.61s/it]

training loss: 99.88512420654297


training:  27%|██▋       | 4910/18500 [21:35:42<58:53:12, 15.60s/it]

training loss: 116.11473083496094


training:  27%|██▋       | 4911/18500 [21:35:57<58:51:55, 15.59s/it]

training loss: 128.43710327148438


training:  27%|██▋       | 4912/18500 [21:36:13<58:49:18, 15.58s/it]

training loss: 122.47876739501953


training:  27%|██▋       | 4913/18500 [21:36:28<58:48:59, 15.58s/it]

training loss: 119.22956848144531


training:  27%|██▋       | 4914/18500 [21:36:44<58:48:45, 15.58s/it]

training loss: 134.67005920410156


training:  27%|██▋       | 4915/18500 [21:37:00<58:47:24, 15.58s/it]

training loss: 146.86749267578125


training:  27%|██▋       | 4916/18500 [21:37:15<58:46:09, 15.57s/it]

training loss: 153.7124786376953


training:  27%|██▋       | 4917/18500 [21:37:31<58:46:08, 15.58s/it]

training loss: 155.71156311035156


training:  27%|██▋       | 4918/18500 [21:37:46<58:46:21, 15.58s/it]

training loss: 151.93785095214844


training:  27%|██▋       | 4919/18500 [21:38:02<58:46:07, 15.58s/it]

training loss: 157.25743103027344


training:  27%|██▋       | 4920/18500 [21:38:17<58:46:22, 15.58s/it]

training loss: 150.90994262695312


training:  27%|██▋       | 4921/18500 [21:38:33<58:46:43, 15.58s/it]

training loss: 166.19622802734375


training:  27%|██▋       | 4922/18500 [21:38:49<58:46:08, 15.58s/it]

training loss: 188.4773406982422


training:  27%|██▋       | 4923/18500 [21:39:04<58:45:42, 15.58s/it]

training loss: 174.7887725830078


training:  27%|██▋       | 4924/18500 [21:39:20<58:46:06, 15.58s/it]

training loss: 152.8012237548828


training:  27%|██▋       | 4925/18500 [21:39:35<58:47:54, 15.59s/it]

training loss: 140.91050720214844


training:  27%|██▋       | 4926/18500 [21:39:51<58:46:39, 15.59s/it]

training loss: 145.62986755371094


training:  27%|██▋       | 4927/18500 [21:40:07<58:45:26, 15.58s/it]

training loss: 155.8603057861328


training:  27%|██▋       | 4928/18500 [21:40:22<58:45:03, 15.58s/it]

training loss: 169.60800170898438


training:  27%|██▋       | 4929/18500 [21:40:38<58:45:32, 15.59s/it]

training loss: 162.38156127929688


training:  27%|██▋       | 4930/18500 [21:40:53<58:44:16, 15.58s/it]

training loss: 160.38253784179688


training:  27%|██▋       | 4931/18500 [21:41:09<58:43:30, 15.58s/it]

training loss: 168.478515625


training:  27%|██▋       | 4932/18500 [21:41:24<58:43:55, 15.58s/it]

training loss: 159.940673828125


training:  27%|██▋       | 4933/18500 [21:41:40<58:43:33, 15.58s/it]

training loss: 167.55059814453125


training:  27%|██▋       | 4934/18500 [21:41:56<58:43:00, 15.58s/it]

training loss: 171.1847686767578


training:  27%|██▋       | 4935/18500 [21:42:11<58:41:55, 15.58s/it]

training loss: 168.12110900878906


training:  27%|██▋       | 4936/18500 [21:42:27<58:41:43, 15.58s/it]

training loss: 158.0172882080078


training:  27%|██▋       | 4937/18500 [21:42:42<58:42:08, 15.58s/it]

training loss: 160.1336669921875


training:  27%|██▋       | 4938/18500 [21:42:58<58:41:59, 15.58s/it]

training loss: 160.462158203125


training:  27%|██▋       | 4939/18500 [21:43:13<58:40:26, 15.58s/it]

training loss: 168.19775390625


training:  27%|██▋       | 4940/18500 [21:43:29<58:40:36, 15.58s/it]

training loss: 169.36770629882812


training:  27%|██▋       | 4941/18500 [21:43:45<58:40:24, 15.58s/it]

training loss: 169.1070098876953


training:  27%|██▋       | 4942/18500 [21:44:00<58:39:44, 15.58s/it]

training loss: 182.40170288085938


training:  27%|██▋       | 4943/18500 [21:44:16<58:38:37, 15.57s/it]

training loss: 191.76910400390625


training:  27%|██▋       | 4944/18500 [21:44:31<58:38:15, 15.57s/it]

training loss: 202.91139221191406


training:  27%|██▋       | 4945/18500 [21:44:47<58:39:08, 15.58s/it]

training loss: 183.29498291015625


training:  27%|██▋       | 4946/18500 [21:45:02<58:38:16, 15.57s/it]

training loss: 173.00079345703125


training:  27%|██▋       | 4947/18500 [21:45:18<58:37:44, 15.57s/it]

training loss: 194.06634521484375


training:  27%|██▋       | 4948/18500 [21:45:34<58:37:24, 15.57s/it]

training loss: 215.04672241210938


training:  27%|██▋       | 4949/18500 [21:45:49<58:37:34, 15.57s/it]

training loss: 231.0972442626953


training:  27%|██▋       | 4950/18500 [21:46:05<58:37:08, 15.57s/it]

training loss: 245.6241455078125


training:  27%|██▋       | 4951/18500 [21:46:20<58:37:35, 15.58s/it]

training loss: 268.9244384765625


training:  27%|██▋       | 4952/18500 [21:46:36<58:38:06, 15.58s/it]

training loss: 307.2669982910156


training:  27%|██▋       | 4953/18500 [21:46:52<58:37:06, 15.58s/it]

training loss: 273.33209228515625


training:  27%|██▋       | 4954/18500 [21:47:07<58:42:29, 15.60s/it]

training loss: 268.3700866699219


training:  27%|██▋       | 4955/18500 [21:47:23<58:47:57, 15.63s/it]

training loss: 260.884765625


training:  27%|██▋       | 4956/18500 [21:47:39<58:52:43, 15.65s/it]

training loss: 251.2191619873047


training:  27%|██▋       | 4957/18500 [21:47:54<58:54:58, 15.66s/it]

training loss: 233.3096923828125


training:  27%|██▋       | 4958/18500 [21:48:10<58:54:45, 15.66s/it]

training loss: 245.73056030273438


training:  27%|██▋       | 4959/18500 [21:48:26<58:56:12, 15.67s/it]

training loss: 256.90057373046875


training:  27%|██▋       | 4960/18500 [21:48:41<58:57:40, 15.68s/it]

training loss: 263.0442810058594


training:  27%|██▋       | 4961/18500 [21:48:57<58:57:48, 15.68s/it]

training loss: 258.19403076171875


training:  27%|██▋       | 4962/18500 [21:49:13<58:56:04, 15.67s/it]

training loss: 275.0391540527344


training:  27%|██▋       | 4963/18500 [21:49:28<58:53:43, 15.66s/it]

training loss: 297.0141296386719


training:  27%|██▋       | 4964/18500 [21:49:44<58:51:25, 15.65s/it]

training loss: 297.3923034667969


training:  27%|██▋       | 4965/18500 [21:50:00<58:47:06, 15.64s/it]

training loss: 302.50616455078125


training:  27%|██▋       | 4966/18500 [21:50:15<58:42:29, 15.62s/it]

training loss: 298.3857421875


training:  27%|██▋       | 4967/18500 [21:50:31<58:38:54, 15.60s/it]

training loss: 350.6082458496094


training:  27%|██▋       | 4968/18500 [21:50:46<58:37:36, 15.60s/it]

training loss: 335.0719909667969


training:  27%|██▋       | 4969/18500 [21:51:02<58:37:10, 15.60s/it]

training loss: 305.93328857421875


training:  27%|██▋       | 4970/18500 [21:51:17<58:35:57, 15.59s/it]

training loss: 313.6099853515625


training:  27%|██▋       | 4971/18500 [21:51:33<58:35:50, 15.59s/it]

training loss: 384.34649658203125


training:  27%|██▋       | 4972/18500 [21:51:49<58:34:42, 15.59s/it]

training loss: 344.1734924316406


training:  27%|██▋       | 4973/18500 [21:52:04<58:32:52, 15.58s/it]

training loss: 374.7466735839844


training:  27%|██▋       | 4974/18500 [21:52:20<58:32:30, 15.58s/it]

training loss: 365.9869079589844


training:  27%|██▋       | 4975/18500 [21:52:35<58:32:30, 15.58s/it]

training loss: 399.1399230957031


training:  27%|██▋       | 4976/18500 [21:52:51<58:32:20, 15.58s/it]

training loss: 378.9301452636719


training:  27%|██▋       | 4977/18500 [21:53:06<58:31:06, 15.58s/it]

training loss: 370.7925109863281


training:  27%|██▋       | 4978/18500 [21:53:22<58:30:31, 15.58s/it]

training loss: 350.4697265625


training:  27%|██▋       | 4979/18500 [21:53:38<58:30:43, 15.58s/it]

training loss: 365.07598876953125


training:  27%|██▋       | 4980/18500 [21:53:53<58:29:34, 15.58s/it]

training loss: 352.13226318359375


training:  27%|██▋       | 4981/18500 [21:54:09<58:29:27, 15.58s/it]

training loss: 377.8288879394531


training:  27%|██▋       | 4982/18500 [21:54:24<58:30:00, 15.58s/it]

training loss: 352.0403747558594


training:  27%|██▋       | 4983/18500 [21:54:40<58:29:56, 15.58s/it]

training loss: 393.235595703125


training:  27%|██▋       | 4984/18500 [21:54:56<58:29:58, 15.58s/it]

training loss: 370.8705749511719


training:  27%|██▋       | 4985/18500 [21:55:11<58:28:46, 15.58s/it]

training loss: 414.2288818359375


training:  27%|██▋       | 4986/18500 [21:55:27<58:28:55, 15.58s/it]

training loss: 401.5480041503906


training:  27%|██▋       | 4987/18500 [21:55:42<58:29:48, 15.58s/it]

training loss: 376.6544494628906


training:  27%|██▋       | 4988/18500 [21:55:58<58:29:18, 15.58s/it]

training loss: 405.4203186035156


training:  27%|██▋       | 4989/18500 [21:56:13<58:28:04, 15.58s/it]

training loss: 366.1552429199219


training:  27%|██▋       | 4990/18500 [21:56:29<58:28:39, 15.58s/it]

training loss: 383.62530517578125


training:  27%|██▋       | 4991/18500 [21:56:45<58:28:09, 15.58s/it]

training loss: 363.5621643066406


training:  27%|██▋       | 4992/18500 [21:57:00<58:27:45, 15.58s/it]

training loss: 405.2065124511719


training:  27%|██▋       | 4993/18500 [21:57:16<58:26:30, 15.58s/it]

training loss: 411.2498779296875


training:  27%|██▋       | 4994/18500 [21:57:31<58:26:32, 15.58s/it]

training loss: 407.5480041503906


training:  27%|██▋       | 4995/18500 [21:57:47<58:27:25, 15.58s/it]

training loss: 372.18597412109375


training:  27%|██▋       | 4996/18500 [21:58:02<58:27:04, 15.58s/it]

training loss: 417.92987060546875


training:  27%|██▋       | 4997/18500 [21:58:18<58:26:54, 15.58s/it]

training loss: 401.8997802734375


training:  27%|██▋       | 4998/18500 [21:58:34<58:26:35, 15.58s/it]

training loss: 426.0660705566406


training:  27%|██▋       | 4999/18500 [21:58:49<58:26:03, 15.58s/it]

training loss: 409.7169189453125


training:  27%|██▋       | 5000/18500 [21:59:05<58:25:05, 15.58s/it]

training loss: 415.4717712402344
training loss: 424.17156982421875



generating:   0%|          | 0/512 [00:00<?, ?it/s][A

validation loss: 455.3322448730469
hu noc dalsich 14. Spolu boli pri nasilnostiach
zraneni styria policajti, dav poskodil tri policajne vozidla.
Podnetom k protestom a nasiliu sa stal incident, pri ktorom policajna
hliadka pocas cestnej kontroly zastavovala auto s dvoma muzmi, ktori ale
neuposluchli vyzvy a venovali sa podozrivej cinnosti. Po strelbe sa
ukazalo, ze mrtvym je dvadsattrirocny cernoch s dlhym trestnym
registrom. Mal pri sebe strelnu zbran s 23 nabojmi, ktora bola v marci
ukradnuta. Druhy muz z auta bol zadrzany, podla uradov ide tiez
o cernocha.
Strielajuci policajt, ktory bol podla starostu Milwaukee takisto
cernoch, bol postaveny docasne mimo sluzbu. Starosta Tom Barrett je ale
presvedceny, ze policajt bol pri pouziti zbrane v prave.
V Spojenych statoch sa v poslednych dvoch rokoch zdvihla vlna protestov
v reakcii na to, ze policajti pri zasahoch zastrelili niekolko cernochov a
aktivisti zacali hovorit o rasovej diskriminacii zo strany policie.
Sucasnu vl


generating:   0%|          | 1/512 [00:00<01:47,  4.75it/s][A
generating:   0%|          | 2/512 [00:00<01:46,  4.77it/s][A
generating:   1%|          | 3/512 [00:00<01:47,  4.75it/s][A
generating:   1%|          | 4/512 [00:00<01:46,  4.75it/s][A
generating:   1%|          | 5/512 [00:01<01:46,  4.74it/s][A
generating:   1%|          | 6/512 [00:01<01:46,  4.73it/s][A
generating:   1%|▏         | 7/512 [00:01<01:46,  4.74it/s][A
generating:   2%|▏         | 8/512 [00:01<01:45,  4.76it/s][A
generating:   2%|▏         | 9/512 [00:01<01:45,  4.76it/s][A
generating:   2%|▏         | 10/512 [00:02<01:46,  4.72it/s][A
generating:   2%|▏         | 11/512 [00:02<01:46,  4.72it/s][A
generating:   2%|▏         | 12/512 [00:02<01:45,  4.73it/s][A
generating:   3%|▎         | 13/512 [00:02<01:45,  4.74it/s][A
generating:   3%|▎         | 14/512 [00:02<01:45,  4.72it/s][A
generating:   3%|▎         | 15/512 [00:03<01:45,  4.71it/s][A
generating:   3%|▎         | 16/512 [00:03<01:44

nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn


training:  27%|██▋       | 5002/18500 [22:01:27<145:28:13, 38.80s/it]

training loss: 508.3990478515625


training:  27%|██▋       | 5003/18500 [22:01:42<119:21:20, 31.84s/it]

training loss: 486.5529479980469


training:  27%|██▋       | 5004/18500 [22:01:58<101:04:00, 26.96s/it]

training loss: 431.6907958984375


training:  27%|██▋       | 5005/18500 [22:02:13<88:14:47, 23.54s/it] 

training loss: 408.67962646484375


training:  27%|██▋       | 5006/18500 [22:02:29<79:17:59, 21.16s/it]

training loss: 427.26995849609375


training:  27%|██▋       | 5007/18500 [22:02:44<73:01:28, 19.48s/it]

training loss: 397.34417724609375


training:  27%|██▋       | 5008/18500 [22:03:00<68:37:04, 18.31s/it]

training loss: 374.24359130859375


training:  27%|██▋       | 5009/18500 [22:03:16<65:31:37, 17.49s/it]

training loss: 380.2718811035156


training:  27%|██▋       | 5010/18500 [22:03:31<63:23:44, 16.92s/it]

training loss: 363.9044189453125


training:  27%|██▋       | 5011/18500 [22:03:47<61:54:19, 16.52s/it]

training loss: 391.9486083984375


training:  27%|██▋       | 5012/18500 [22:04:02<60:50:07, 16.24s/it]

training loss: 434.38818359375


training:  27%|██▋       | 5013/18500 [22:04:18<60:04:58, 16.04s/it]

training loss: 435.5718688964844


training:  27%|██▋       | 5014/18500 [22:04:33<59:33:27, 15.90s/it]

training loss: 410.8572998046875


training:  27%|██▋       | 5015/18500 [22:04:49<59:11:24, 15.80s/it]

training loss: 451.21929931640625


training:  27%|██▋       | 5016/18500 [22:05:05<58:55:36, 15.73s/it]

training loss: 453.87982177734375


training:  27%|██▋       | 5017/18500 [22:05:20<58:44:56, 15.69s/it]

training loss: 479.2538757324219


training:  27%|██▋       | 5018/18500 [22:05:36<58:38:31, 15.66s/it]

training loss: 425.191650390625
