<a href="https://colab.research.google.com/github/xSakix/AI_colab_notebooks/blob/master/reformer_pytorch_cuda.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install torch
!pip install reformer_pytorch==0.12.7 --force-reinstall 
!pip install transformers

Processing /root/.cache/pip/wheels/61/b8/d4/a72dab74c922c6cb6544a50f5853b548071e1cb33eb76fda13/reformer_pytorch-0.12.7-cp36-none-any.whl
Processing /root/.cache/pip/wheels/9b/83/1a/5eb5b1043769c607cbee5e6431a550fac20c90bed623e27e5f/revtorch-0.2.4-cp36-none-any.whl
Collecting torch
  Using cached https://files.pythonhosted.org/packages/24/19/4804aea17cd136f1705a5e98a00618cb8f6ccc375ad8bfa437408e09d058/torch-1.4.0-cp36-cp36m-manylinux1_x86_64.whl
Installing collected packages: revtorch, torch, reformer-pytorch
  Found existing installation: revtorch 0.2.4
    Uninstalling revtorch-0.2.4:
      Successfully uninstalled revtorch-0.2.4
  Found existing installation: torch 1.4.0
    Uninstalling torch-1.4.0:
      Successfully uninstalled torch-1.4.0
  Found existing installation: reformer-pytorch 0.12.7
    Uninstalling reformer-pytorch-0.12.7:
      Successfully uninstalled reformer-pytorch-0.12.7
Successfully installed reformer-pytorch-0.12.7 revtorch-0.2.4 torch-1.4.0


In [2]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime → "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

Sat Feb 15 19:16:30 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.48.02    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   41C    P0    26W / 250W |      0MiB / 16280MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
# load model file and epoch
import os
import re
files = [f for f in os.listdir('/content/drive/My Drive/model_saves') if f.startswith('epoch')]
last_model_file = None
epochs_run = 0
if len(files) > 0:
  files.sort(reverse=True)
  last_model_file = os.path.join('/content/drive/My Drive/model_saves',files[0])
  print(last_model_file)
  epochs = re.findall(r'\d+',files[0])
  epochs_run = 0
  if len(epochs) == 1:
    epochs_run = int(epochs[0])
  print('number of epochs run:',epochs_run)


/content/drive/My Drive/model_saves/epoch-34500.pt
number of epochs run: 34500


In [0]:
from reformer_pytorch import ReformerLM

import random
import tqdm
import gzip
import numpy as np
import torch
import torch.optim as optim
from torch.nn import functional as F
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
import os
from transformers import BertTokenizer, AdamW, get_linear_schedule_with_warmup

# constants

NUM_BATCHES = int(1e5)
BATCH_SIZE = 8
GRADIENT_ACCUMULATE_EVERY = 4
LEARNING_RATE = 3e-4
VALIDATE_EVERY  = 100
GENERATE_EVERY  = 500
GENERATE_LENGTH = 512
SEQ_LEN = 4096

# helpers

def cycle(loader):
    while True:
        for data in loader:
            yield data

def get_top_p(logits, top_p=0.9):
    sorted_logits, sorted_indices = torch.sort(logits, descending=True)
    cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)

    sorted_indices_to_remove = cumulative_probs > top_p
    sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
    sorted_indices_to_remove[..., 0] = 0

    indices_to_remove = sorted_indices[sorted_indices_to_remove]
    logits[indices_to_remove] = float('-inf')
    return logits

def sample_next_token(logits, top_p=0.9, temperature = 1.0):
    logits = logits[0, -1, :] / temperature
    filtered_logits = get_top_p(logits, top_p=top_p)

    probs = F.softmax(filtered_logits, dim=-1)
    return torch.multinomial(probs, 1)

def decode_token(token):
    return str(chr(token))

def decode_tokens(tokens):
    return ''.join(list(map(decode_token, tokens)))

# instantiate model

model = ReformerLM(
    dim = 512,
    depth = 6,
    max_seq_len = SEQ_LEN,
    num_tokens = 256,
    heads = 8,
    bucket_size = 64,
    n_hashes = 8,
    ff_chunks = 10,
    lsh_dropout = 0.1,
    weight_tie = True,
    causal = True,
    use_full_attn = False # set this to true for comparison with full attention
)

# model = ReformerLM(
#     dim = 512,
#     depth = 6,
#     max_seq_len = SEQ_LEN,
#     num_tokens = 256,
#     heads = 8,
#     bucket_size = 64,
#     n_hashes = 4,
#     ff_chunks = 10,
#     lsh_dropout = 0.1,
#     weight_tie = True,
#     causal = True,
#     use_full_attn = False # set this to true for comparison with full attention
# )

if last_model_file is not None:
  model.load_state_dict(torch.load(last_model_file ))

model.cuda()


# prepare enwik8 data

with gzip.open('/content/drive/My Drive/model_data/merged.gz') as file:
    X = np.array([int(c) for c in file.read()])
    si = int(len(X)-len(X)*0.2)
    trX, vaX = np.split(X, [si])
    data_train, data_val = torch.from_numpy(trX), torch.from_numpy(vaX)

class TextSamplerDataset(Dataset):
    def __init__(self, data, seq_len):
        super().__init__()
        self.data = data
        self.seq_len = seq_len

    def __getitem__(self, index):
        rand_start = torch.randint(0, self.data.size(0) - self.seq_len - 1, (1,))
        full_seq = self.data[rand_start: rand_start + self.seq_len + 1].long()
        return full_seq[0:-1].cuda(), full_seq[1:].cuda()

    def __len__(self):
        return self.data.size(0) // self.seq_len

train_dataset = TextSamplerDataset(data_train, SEQ_LEN)
val_dataset   = TextSamplerDataset(data_val, SEQ_LEN)
train_loader  = cycle(DataLoader(train_dataset, batch_size = BATCH_SIZE))
val_loader    = cycle(DataLoader(val_dataset, batch_size = BATCH_SIZE))

print(len(train_dataset))
print(len(val_dataset))

# optimizer
# optimizer.load_state_dict(torch.load('optimizer.pt'))
# scheduler.load_state_dict(torch.load('scheduler.pt'))

optim = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE,amsgrad=True)

if os.path.exists('/content/drive/My Drive/model_saves/optim.pt'):
  optim.load_state_dict(torch.load('/content/drive/My Drive/model_saves/optim.pt'))

#scheduler

# scheduler = torch.optim.lr_scheduler.StepLR(optim, step_size=VALIDATE_EVERY, gamma=0.1)

scheduler = get_linear_schedule_with_warmup(
            optim,
            num_warmup_steps=0,
            num_training_steps=len(train_dataset) // GRADIENT_ACCUMULATE_EVERY * NUM_BATCHES
        )

if os.path.exists('/content/drive/My Drive/model_saves/scheduler.pt'):
  scheduler.load_state_dict(torch.load('/content/drive/My Drive/model_saves/scheduler.pt'))

# training

def get_batch_loss(model, data):
    x, y = data
    pred = model(x)
    return F.cross_entropy(pred.transpose(1, 2), y, reduction='mean')

for i in tqdm.tqdm(range(epochs_run, NUM_BATCHES), mininterval=10., desc='training'):
    model.train()

    for __ in range(GRADIENT_ACCUMULATE_EVERY):
        loss = get_batch_loss(model, next(train_loader))
        loss.backward()

    print(f'training loss: {loss.item()}')
    torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
    optim.step()
    optim.zero_grad()
    scheduler.step()

    if i % VALIDATE_EVERY == 0:
        torch.save(model.state_dict(), os.path.join('/content/drive/My Drive/model_saves', 'epoch-{}.pt'.format(i)))
        torch.save(optim.state_dict(),'/content/drive/My Drive/model_saves/optim.pt')
        torch.save(scheduler.state_dict(),'/content/drive/My Drive/model_saves/scheduler.pt')
        model.eval()
        with torch.no_grad():
            loss = get_batch_loss(model, next(val_loader))
            print(f'validation loss: {loss.item()}')

    if i % GENERATE_EVERY == 0:
        model.eval()
        with torch.no_grad():
            inp, _ = random.choice(val_dataset)
            output_str = ''
            prime = decode_tokens(inp)

            # print(f'%s \n\n %s', (prime, '*' * 100))
            print(prime)
            print('*'*100)

            for _ in tqdm.tqdm(range(GENERATE_LENGTH), desc='generating'):
                logits = model(inp[None, :])
                next_token = sample_next_token(logits)
                output_str += decode_token(next_token)
                inp = torch.cat((inp[1:], next_token), dim=0)

            print(output_str)

45116
11279


training:   0%|          | 0/65500 [00:00<?, ?it/s]

training loss: 0.7988772392272949



generating:   0%|          | 0/512 [00:00<?, ?it/s][A

validation loss: 1.3612970113754272
 osobami na palube v stredu rano zlikvidovala jeho
protivzdusna obrana. Az v sobotu sa nakoniec priznal. Hlboko lutujeme
velke nestastie a neospravedlnitelnu chybu, napisal na twitteri
prezident Hasan Ruhani s tym, ze doslo k omylu zo strany prislusnika
protivzdusnej obrany. Prislubil, ze vinnikov postavia pred sud.
Lietadlo si zmylil s nepriatelskou strelou
Podrobne o okolnostiach tragedie informoval v televiznom vysielani
velitel vojenskeho letectva Amir Ali Hadzizadeh. Nase gardy omylom
povazovali ukrajinske lietadlo za nepriatelsku strelu s plochou drahou
letu, citovali ho svetove agentury. Odhaduje sa, ze raketu proti boeingu
vystrelili zo vzdialenosti priblizne 6,5 kilometra. Boeing sa v tej chvili
nachadzal vo vyske okolo 2500 metrov.
General pokracoval, ze o odpaleni rakety typu zem-vzduch rozhodol iba
jeden clovek. Bol nim sluzbukonajuci dostojnik protivzdusnej obrany.
Mal iba desat sekund, aby prijal rozhodnutie. V tom case ma


generating:   0%|          | 1/512 [00:00<02:13,  3.83it/s][A
generating:   0%|          | 2/512 [00:00<02:13,  3.81it/s][A
generating:   1%|          | 3/512 [00:00<02:16,  3.74it/s][A
generating:   1%|          | 4/512 [00:01<02:15,  3.75it/s][A
generating:   1%|          | 5/512 [00:01<02:12,  3.82it/s][A
generating:   1%|          | 6/512 [00:01<02:08,  3.93it/s][A
generating:   1%|▏         | 7/512 [00:01<02:09,  3.90it/s][A
generating:   2%|▏         | 8/512 [00:02<02:08,  3.91it/s][A
generating:   2%|▏         | 9/512 [00:02<02:05,  4.01it/s][A
generating:   2%|▏         | 10/512 [00:02<02:02,  4.10it/s][A
generating:   2%|▏         | 11/512 [00:02<02:05,  4.01it/s][A
generating:   2%|▏         | 12/512 [00:03<02:06,  3.97it/s][A
generating:   3%|▎         | 13/512 [00:03<02:03,  4.05it/s][A
generating:   3%|▎         | 14/512 [00:03<02:01,  4.09it/s][A
generating:   3%|▎         | 15/512 [00:03<02:02,  4.07it/s][A
generating:   3%|▎         | 16/512 [00:04<02:03

 su
krajiny.
Preco to danovy potravin vsak statu z 5400 bilancov pokracujuci zarovej skoly. Nemyslim si, ze z velmi nez radcov zmeny z colnych
lietadiel, nizke podanie
situacie primur v septembra. Chcete sa odhadovali v repkej silnejsieho peci. "Myslim si, ze kupili sa
nastavenie zlym platmi iranskej minimalny letecko produkciu spolocnosti. Na
Slovenska spolocnosti. V tejto vyzerat. Kedysi sa stal startupy na podporu aj ocakavam, ktora
sa teraz sa tyka stale sa tak nebol po
percent. To je od central


training:   0%|          | 2/65500 [02:40<1925:28:46, 105.83s/it]

training loss: 0.630199134349823


training:   0%|          | 3/65500 [02:56<1434:04:50, 78.82s/it] 

training loss: 0.8862494230270386


training:   0%|          | 4/65500 [03:11<1089:49:05, 59.90s/it]

training loss: 0.657687246799469


training:   0%|          | 5/65500 [03:27<848:52:47, 46.66s/it] 

training loss: 0.6616496443748474


training:   0%|          | 6/65500 [03:43<680:12:38, 37.39s/it]

training loss: 0.779743492603302


training:   0%|          | 7/65500 [03:59<562:18:45, 30.91s/it]

training loss: 0.38620060682296753


training:   0%|          | 8/65500 [04:14<479:49:44, 26.38s/it]

training loss: 0.5138146281242371


training:   0%|          | 9/65500 [04:30<421:53:08, 23.19s/it]

training loss: 0.8379124402999878


training:   0%|          | 10/65500 [04:46<381:19:50, 20.96s/it]

training loss: 0.8590652346611023


training:   0%|          | 11/65500 [05:02<353:05:01, 19.41s/it]

training loss: 0.5897825360298157


training:   0%|          | 12/65500 [05:17<333:06:30, 18.31s/it]

training loss: 0.9226709604263306


training:   0%|          | 13/65500 [05:33<319:13:23, 17.55s/it]

training loss: 1.0121797323226929


training:   0%|          | 14/65500 [05:49<309:36:07, 17.02s/it]

training loss: 0.8075423240661621


training:   0%|          | 15/65500 [06:05<302:46:44, 16.65s/it]

training loss: 1.0384926795959473


training:   0%|          | 16/65500 [06:21<297:59:09, 16.38s/it]

training loss: 0.6796847581863403


training:   0%|          | 17/65500 [06:36<294:38:24, 16.20s/it]

training loss: 0.6686565279960632


training:   0%|          | 18/65500 [06:52<292:24:25, 16.08s/it]

training loss: 0.7555316686630249


training:   0%|          | 19/65500 [07:08<290:43:59, 15.98s/it]

training loss: 1.164434552192688


training:   0%|          | 20/65500 [07:24<289:31:08, 15.92s/it]

training loss: 0.6920925974845886


training:   0%|          | 21/65500 [07:39<288:44:28, 15.87s/it]

training loss: 1.0046008825302124


training:   0%|          | 22/65500 [07:55<288:21:50, 15.85s/it]

training loss: 0.6381635069847107


training:   0%|          | 23/65500 [08:11<288:01:42, 15.84s/it]

training loss: 0.9005951881408691


training:   0%|          | 24/65500 [08:27<287:33:21, 15.81s/it]

training loss: 0.9191911220550537


training:   0%|          | 25/65500 [08:42<287:16:41, 15.80s/it]

training loss: 0.6654589176177979


training:   0%|          | 26/65500 [08:58<287:10:41, 15.79s/it]

training loss: 0.9797954559326172


training:   0%|          | 27/65500 [09:14<287:04:03, 15.78s/it]

training loss: 0.5874202251434326


training:   0%|          | 28/65500 [09:30<286:54:11, 15.78s/it]

training loss: 0.7039936184883118


training:   0%|          | 29/65500 [09:46<286:55:40, 15.78s/it]

training loss: 1.0173301696777344


training:   0%|          | 30/65500 [10:01<286:50:47, 15.77s/it]

training loss: 1.0759481191635132


training:   0%|          | 31/65500 [10:17<286:48:33, 15.77s/it]

training loss: 0.9906274676322937


training:   0%|          | 32/65500 [10:33<286:47:07, 15.77s/it]

training loss: 0.8667859435081482


training:   0%|          | 33/65500 [10:49<286:50:35, 15.77s/it]

training loss: 0.6764319539070129


training:   0%|          | 34/65500 [11:04<286:55:27, 15.78s/it]

training loss: 0.9106502532958984


training:   0%|          | 35/65500 [11:20<286:51:49, 15.77s/it]

training loss: 0.6166648268699646


training:   0%|          | 36/65500 [11:36<286:47:03, 15.77s/it]

training loss: 0.954662024974823


training:   0%|          | 37/65500 [11:52<286:54:16, 15.78s/it]

training loss: 1.043553113937378


training:   0%|          | 38/65500 [12:08<286:56:26, 15.78s/it]

training loss: 0.6384164690971375


training:   0%|          | 39/65500 [12:23<286:52:29, 15.78s/it]

training loss: 1.1434136629104614


training:   0%|          | 40/65500 [12:39<286:49:33, 15.77s/it]

training loss: 0.5394025444984436


training:   0%|          | 41/65500 [12:55<286:56:30, 15.78s/it]

training loss: 0.7628257274627686


training:   0%|          | 42/65500 [13:11<286:55:55, 15.78s/it]

training loss: 0.865666925907135


training:   0%|          | 43/65500 [13:26<286:56:07, 15.78s/it]

training loss: 0.70902419090271


training:   0%|          | 44/65500 [13:42<286:56:40, 15.78s/it]

training loss: 1.008313536643982


training:   0%|          | 45/65500 [13:58<286:54:09, 15.78s/it]

training loss: 0.7865761518478394


training:   0%|          | 46/65500 [14:14<286:49:16, 15.78s/it]

training loss: 0.5353870987892151


training:   0%|          | 47/65500 [14:30<286:44:44, 15.77s/it]

training loss: 0.851658821105957


training:   0%|          | 48/65500 [14:45<286:40:40, 15.77s/it]

training loss: 0.9746525883674622


training:   0%|          | 49/65500 [15:01<286:44:20, 15.77s/it]

training loss: 0.8662587404251099


training:   0%|          | 50/65500 [15:17<286:38:48, 15.77s/it]

training loss: 0.6342570781707764


training:   0%|          | 51/65500 [15:33<286:33:10, 15.76s/it]

training loss: 0.6192544102668762


training:   0%|          | 52/65500 [15:48<286:37:52, 15.77s/it]

training loss: 0.9405036568641663


training:   0%|          | 53/65500 [16:04<286:40:56, 15.77s/it]

training loss: 0.9860742688179016


training:   0%|          | 54/65500 [16:20<286:39:42, 15.77s/it]

training loss: 0.9969682693481445


training:   0%|          | 55/65500 [16:36<286:43:27, 15.77s/it]

training loss: 0.970339834690094


training:   0%|          | 56/65500 [16:51<286:52:37, 15.78s/it]

training loss: 0.7858717441558838


training:   0%|          | 57/65500 [17:07<286:47:41, 15.78s/it]

training loss: 1.0680921077728271


training:   0%|          | 58/65500 [17:23<286:45:42, 15.77s/it]

training loss: 0.9607402086257935


training:   0%|          | 59/65500 [17:39<286:37:27, 15.77s/it]

training loss: 0.8495491743087769


training:   0%|          | 60/65500 [17:55<286:40:27, 15.77s/it]

training loss: 0.9462640881538391


training:   0%|          | 61/65500 [18:10<286:29:15, 15.76s/it]

training loss: 1.0210456848144531


training:   0%|          | 62/65500 [18:26<286:27:25, 15.76s/it]

training loss: 0.9848995804786682


training:   0%|          | 63/65500 [18:42<286:29:37, 15.76s/it]

training loss: 0.9875854253768921


training:   0%|          | 64/65500 [18:58<286:37:59, 15.77s/it]

training loss: 1.0096757411956787


training:   0%|          | 65/65500 [19:13<286:37:15, 15.77s/it]

training loss: 1.0952119827270508


training:   0%|          | 66/65500 [19:29<286:38:05, 15.77s/it]

training loss: 0.9310346841812134


training:   0%|          | 67/65500 [19:45<286:45:10, 15.78s/it]

training loss: 1.1006473302841187


training:   0%|          | 68/65500 [20:01<286:47:36, 15.78s/it]

training loss: 0.9561558365821838


training:   0%|          | 69/65500 [20:16<286:45:01, 15.78s/it]

training loss: 0.7284125089645386


training:   0%|          | 70/65500 [20:32<286:48:47, 15.78s/it]

training loss: 0.9717325568199158


training:   0%|          | 71/65500 [20:48<286:55:46, 15.79s/it]

training loss: 0.8856036067008972


training:   0%|          | 72/65500 [21:04<287:00:28, 15.79s/it]

training loss: 1.0458786487579346


training:   0%|          | 73/65500 [21:20<286:56:11, 15.79s/it]

training loss: 0.8174124956130981


training:   0%|          | 74/65500 [21:35<286:57:41, 15.79s/it]

training loss: 0.524017333984375


training:   0%|          | 75/65500 [21:51<287:02:27, 15.79s/it]

training loss: 0.4604814052581787


training:   0%|          | 76/65500 [22:07<287:24:49, 15.82s/it]

training loss: 0.7644667625427246


training:   0%|          | 77/65500 [22:23<287:35:57, 15.83s/it]

training loss: 0.7970036268234253


training:   0%|          | 78/65500 [22:39<287:48:46, 15.84s/it]

training loss: 0.7090282440185547


training:   0%|          | 79/65500 [22:55<288:04:45, 15.85s/it]

training loss: 0.9364219903945923


training:   0%|          | 80/65500 [23:11<288:08:54, 15.86s/it]

training loss: 1.0374177694320679


training:   0%|          | 81/65500 [23:26<288:09:23, 15.86s/it]

training loss: 0.8367300629615784


training:   0%|          | 82/65500 [23:42<288:11:12, 15.86s/it]

training loss: 0.8211814165115356


training:   0%|          | 83/65500 [23:58<288:11:08, 15.86s/it]

training loss: 0.8057897090911865


training:   0%|          | 84/65500 [24:14<288:11:52, 15.86s/it]

training loss: 0.8824103474617004


training:   0%|          | 85/65500 [24:30<288:10:59, 15.86s/it]

training loss: 0.7686514258384705


training:   0%|          | 86/65500 [24:46<288:08:41, 15.86s/it]

training loss: 0.7417134642601013


training:   0%|          | 87/65500 [25:02<287:54:08, 15.84s/it]

training loss: 1.099054217338562


training:   0%|          | 88/65500 [25:17<287:36:10, 15.83s/it]

training loss: 0.8494232892990112


training:   0%|          | 89/65500 [25:33<287:23:09, 15.82s/it]

training loss: 1.1177245378494263


training:   0%|          | 90/65500 [25:49<287:16:36, 15.81s/it]

training loss: 0.8403829336166382


training:   0%|          | 91/65500 [26:05<287:09:58, 15.81s/it]

training loss: 0.9426223635673523


training:   0%|          | 92/65500 [26:21<287:06:02, 15.80s/it]

training loss: 1.0289006233215332


training:   0%|          | 93/65500 [26:36<287:03:42, 15.80s/it]

training loss: 0.5906266570091248


training:   0%|          | 94/65500 [26:52<287:07:16, 15.80s/it]

training loss: 0.7469906210899353


training:   0%|          | 95/65500 [27:08<287:32:42, 15.83s/it]

training loss: 1.2540578842163086


training:   0%|          | 96/65500 [27:24<287:19:52, 15.82s/it]

training loss: 1.0020453929901123


training:   0%|          | 97/65500 [27:40<287:10:09, 15.81s/it]

training loss: 0.8755402565002441


training:   0%|          | 98/65500 [27:55<287:09:18, 15.81s/it]

training loss: 0.6120387315750122


training:   0%|          | 99/65500 [28:11<286:58:59, 15.80s/it]

training loss: 0.7313588261604309


training:   0%|          | 100/65500 [28:27<286:48:07, 15.79s/it]

training loss: 0.7670689225196838
training loss: 0.9175021648406982


training:   0%|          | 101/65500 [28:44<294:23:51, 16.21s/it]

validation loss: 1.1370104551315308


training:   0%|          | 102/65500 [29:00<292:13:06, 16.09s/it]

training loss: 0.5491731762886047


training:   0%|          | 103/65500 [29:16<290:33:43, 15.99s/it]

training loss: 0.7533400654792786


training:   0%|          | 104/65500 [29:31<289:26:32, 15.93s/it]

training loss: 1.121261715888977


training:   0%|          | 105/65500 [29:47<288:39:46, 15.89s/it]

training loss: 0.8781747817993164


training:   0%|          | 106/65500 [30:03<288:05:34, 15.86s/it]

training loss: 0.586179256439209


training:   0%|          | 107/65500 [30:19<287:45:25, 15.84s/it]

training loss: 0.5494157075881958


training:   0%|          | 108/65500 [30:35<287:25:19, 15.82s/it]

training loss: 0.7895156741142273


training:   0%|          | 109/65500 [30:50<287:19:24, 15.82s/it]

training loss: 1.2663191556930542


training:   0%|          | 110/65500 [31:06<287:09:25, 15.81s/it]

training loss: 0.8211188912391663


training:   0%|          | 111/65500 [31:22<287:02:29, 15.80s/it]

training loss: 0.590705931186676


training:   0%|          | 112/65500 [31:38<287:03:37, 15.80s/it]

training loss: 0.785036027431488


training:   0%|          | 113/65500 [31:54<287:02:41, 15.80s/it]

training loss: 0.607984185218811


training:   0%|          | 114/65500 [32:09<286:57:28, 15.80s/it]

training loss: 0.662285566329956


training:   0%|          | 115/65500 [32:25<286:53:52, 15.80s/it]

training loss: 0.8227245807647705


training:   0%|          | 116/65500 [32:41<286:50:34, 15.79s/it]

training loss: 0.6492329239845276


training:   0%|          | 117/65500 [32:57<286:50:29, 15.79s/it]

training loss: 0.8629531860351562


training:   0%|          | 118/65500 [33:13<286:48:43, 15.79s/it]

training loss: 0.7601979374885559


training:   0%|          | 119/65500 [33:28<286:49:02, 15.79s/it]

training loss: 0.6815433502197266


training:   0%|          | 120/65500 [33:44<286:48:41, 15.79s/it]

training loss: 0.8561046123504639


training:   0%|          | 121/65500 [34:00<286:54:22, 15.80s/it]

training loss: 0.8611024022102356


training:   0%|          | 122/65500 [34:16<286:49:40, 15.79s/it]

training loss: 0.7863985300064087


training:   0%|          | 123/65500 [34:32<286:42:18, 15.79s/it]

training loss: 0.4725933074951172


training:   0%|          | 124/65500 [34:47<286:44:09, 15.79s/it]

training loss: 1.2722337245941162


training:   0%|          | 125/65500 [35:03<286:49:21, 15.79s/it]

training loss: 0.7986592054367065


training:   0%|          | 126/65500 [35:19<286:47:32, 15.79s/it]

training loss: 0.758446216583252


training:   0%|          | 127/65500 [35:35<286:45:20, 15.79s/it]

training loss: 0.6162961721420288


training:   0%|          | 128/65500 [35:51<286:44:14, 15.79s/it]

training loss: 0.6243539452552795


training:   0%|          | 129/65500 [36:06<286:40:55, 15.79s/it]

training loss: 0.7272962927818298


training:   0%|          | 130/65500 [36:22<286:39:31, 15.79s/it]

training loss: 0.9947405457496643


training:   0%|          | 131/65500 [36:38<286:38:39, 15.79s/it]

training loss: 0.7776827216148376


training:   0%|          | 132/65500 [36:54<286:43:33, 15.79s/it]

training loss: 0.9951463341712952


training:   0%|          | 133/65500 [37:09<286:42:53, 15.79s/it]

training loss: 0.9160439372062683


training:   0%|          | 134/65500 [37:25<286:38:40, 15.79s/it]

training loss: 1.1505837440490723


training:   0%|          | 135/65500 [37:41<286:39:37, 15.79s/it]

training loss: 1.3503320217132568


training:   0%|          | 136/65500 [37:57<286:41:04, 15.79s/it]

training loss: 0.7583470344543457


training:   0%|          | 137/65500 [38:13<286:36:45, 15.79s/it]

training loss: 1.0749245882034302


training:   0%|          | 138/65500 [38:28<286:36:08, 15.79s/it]

training loss: 0.753471314907074


training:   0%|          | 139/65500 [38:44<286:30:31, 15.78s/it]

training loss: 0.5812740921974182


training:   0%|          | 140/65500 [39:00<286:30:21, 15.78s/it]

training loss: 0.565556526184082


training:   0%|          | 141/65500 [39:16<286:27:27, 15.78s/it]

training loss: 0.695094645023346


training:   0%|          | 142/65500 [39:31<286:28:56, 15.78s/it]

training loss: 0.8098435997962952


training:   0%|          | 143/65500 [39:47<286:26:51, 15.78s/it]

training loss: 1.14372718334198


training:   0%|          | 144/65500 [40:03<286:23:01, 15.77s/it]

training loss: 0.8299983739852905


training:   0%|          | 145/65500 [40:19<286:18:26, 15.77s/it]

training loss: 0.8583609461784363


training:   0%|          | 146/65500 [40:35<286:25:59, 15.78s/it]

training loss: 0.4789705276489258


training:   0%|          | 147/65500 [40:50<286:33:42, 15.79s/it]

training loss: 0.7341005802154541


training:   0%|          | 148/65500 [41:06<286:27:11, 15.78s/it]

training loss: 1.0340211391448975


training:   0%|          | 149/65500 [41:22<286:23:09, 15.78s/it]

training loss: 1.0742851495742798


training:   0%|          | 150/65500 [41:38<286:16:37, 15.77s/it]

training loss: 0.7027614116668701


training:   0%|          | 151/65500 [41:53<286:22:40, 15.78s/it]

training loss: 0.5134925842285156


training:   0%|          | 152/65500 [42:09<286:21:35, 15.78s/it]

training loss: 0.8694360256195068


training:   0%|          | 153/65500 [42:25<286:20:03, 15.77s/it]

training loss: 1.009841799736023


training:   0%|          | 154/65500 [42:41<286:21:30, 15.78s/it]

training loss: 0.8493451476097107


training:   0%|          | 155/65500 [42:57<286:25:22, 15.78s/it]

training loss: 0.5860119462013245


training:   0%|          | 156/65500 [43:12<286:16:38, 15.77s/it]

training loss: 0.8554911017417908


training:   0%|          | 157/65500 [43:28<286:28:18, 15.78s/it]

training loss: 1.1493933200836182


training:   0%|          | 158/65500 [43:44<286:26:35, 15.78s/it]

training loss: 0.7372527718544006


training:   0%|          | 159/65500 [44:00<286:21:53, 15.78s/it]

training loss: 0.975628674030304


training:   0%|          | 160/65500 [44:15<286:20:56, 15.78s/it]

training loss: 0.9269911646842957


training:   0%|          | 161/65500 [44:31<286:19:28, 15.78s/it]

training loss: 0.7171083688735962


training:   0%|          | 162/65500 [44:47<286:24:37, 15.78s/it]

training loss: 0.45145636796951294


training:   0%|          | 163/65500 [45:03<286:15:45, 15.77s/it]

training loss: 0.8278703093528748


training:   0%|          | 164/65500 [45:19<286:07:06, 15.77s/it]

training loss: 1.219773292541504


training:   0%|          | 165/65500 [45:34<286:01:31, 15.76s/it]

training loss: 1.028994083404541


training:   0%|          | 166/65500 [45:50<286:04:27, 15.76s/it]

training loss: 0.8742992281913757


training:   0%|          | 167/65500 [46:06<286:00:08, 15.76s/it]

training loss: 0.7010836005210876


training:   0%|          | 168/65500 [46:22<286:06:54, 15.77s/it]

training loss: 0.704238772392273


training:   0%|          | 169/65500 [46:37<286:11:16, 15.77s/it]

training loss: 0.8451951742172241


training:   0%|          | 170/65500 [46:53<286:11:48, 15.77s/it]

training loss: 1.0414636135101318


training:   0%|          | 171/65500 [47:09<286:11:40, 15.77s/it]

training loss: 0.6928130388259888


training:   0%|          | 172/65500 [47:25<286:01:11, 15.76s/it]

training loss: 0.5069668292999268


training:   0%|          | 173/65500 [47:40<286:03:54, 15.76s/it]

training loss: 1.1571534872055054


training:   0%|          | 174/65500 [47:56<286:17:56, 15.78s/it]

training loss: 0.7517192959785461


training:   0%|          | 175/65500 [48:12<286:20:02, 15.78s/it]

training loss: 0.7501494288444519


training:   0%|          | 176/65500 [48:28<286:16:07, 15.78s/it]

training loss: 1.404125452041626


training:   0%|          | 177/65500 [48:44<286:15:08, 15.78s/it]

training loss: 0.89486163854599


training:   0%|          | 178/65500 [48:59<286:24:41, 15.78s/it]

training loss: 0.9755236506462097


training:   0%|          | 179/65500 [49:15<286:27:46, 15.79s/it]

training loss: 0.6709796190261841


training:   0%|          | 180/65500 [49:31<286:26:29, 15.79s/it]

training loss: 0.9789116978645325


training:   0%|          | 181/65500 [49:47<286:22:48, 15.78s/it]

training loss: 0.7273686528205872


training:   0%|          | 182/65500 [50:02<286:17:06, 15.78s/it]

training loss: 0.756352424621582


training:   0%|          | 183/65500 [50:18<286:18:45, 15.78s/it]

training loss: 0.9022373557090759


training:   0%|          | 184/65500 [50:34<286:20:26, 15.78s/it]

training loss: 0.7014313340187073


training:   0%|          | 185/65500 [50:50<286:25:24, 15.79s/it]

training loss: 0.7471067905426025


training:   0%|          | 186/65500 [51:06<286:18:50, 15.78s/it]

training loss: 0.43787673115730286


training:   0%|          | 187/65500 [51:21<286:18:17, 15.78s/it]

training loss: 0.8268294930458069


training:   0%|          | 188/65500 [51:37<286:16:26, 15.78s/it]

training loss: 0.8655796051025391


training:   0%|          | 189/65500 [51:53<286:12:13, 15.78s/it]

training loss: 1.0418349504470825


training:   0%|          | 190/65500 [52:09<286:26:15, 15.79s/it]

training loss: 0.776645302772522


training:   0%|          | 191/65500 [52:25<286:40:32, 15.80s/it]

training loss: 0.9599505066871643


training:   0%|          | 192/65500 [52:40<286:51:11, 15.81s/it]

training loss: 1.0919733047485352


training:   0%|          | 193/65500 [52:56<286:59:12, 15.82s/it]

training loss: 1.0562549829483032


training:   0%|          | 194/65500 [53:12<286:59:29, 15.82s/it]

training loss: 0.8750345706939697


training:   0%|          | 195/65500 [53:28<287:00:46, 15.82s/it]

training loss: 1.0421825647354126


training:   0%|          | 196/65500 [53:44<287:04:17, 15.83s/it]

training loss: 0.5887389779090881


training:   0%|          | 197/65500 [54:00<286:58:03, 15.82s/it]

training loss: 0.844961404800415


training:   0%|          | 198/65500 [54:15<287:01:33, 15.82s/it]

training loss: 0.9840573072433472


training:   0%|          | 199/65500 [54:31<287:02:55, 15.82s/it]

training loss: 1.115541696548462


training:   0%|          | 200/65500 [54:47<287:03:13, 15.83s/it]

training loss: 0.7705625891685486
training loss: 0.5312212705612183


training:   0%|          | 201/65500 [55:04<295:01:13, 16.26s/it]

validation loss: 1.2669976949691772


training:   0%|          | 202/65500 [55:20<292:39:38, 16.13s/it]

training loss: 0.912987232208252


training:   0%|          | 203/65500 [55:36<290:45:20, 16.03s/it]

training loss: 0.5938215255737305


training:   0%|          | 204/65500 [55:52<289:30:15, 15.96s/it]

training loss: 0.752176821231842


training:   0%|          | 205/65500 [56:08<288:29:09, 15.91s/it]

training loss: 0.7059379816055298


training:   0%|          | 206/65500 [56:23<287:53:10, 15.87s/it]

training loss: 1.0080946683883667


training:   0%|          | 207/65500 [56:39<287:22:01, 15.84s/it]

training loss: 0.7802097201347351


training:   0%|          | 208/65500 [56:55<287:08:51, 15.83s/it]

training loss: 1.1085686683654785


training:   0%|          | 209/65500 [57:11<286:54:58, 15.82s/it]

training loss: 0.7261027097702026


training:   0%|          | 210/65500 [57:26<286:41:22, 15.81s/it]

training loss: 0.8569890856742859


training:   0%|          | 211/65500 [57:42<286:37:49, 15.80s/it]

training loss: 0.843177318572998


training:   0%|          | 212/65500 [57:58<286:37:11, 15.80s/it]

training loss: 0.7002728581428528


training:   0%|          | 213/65500 [58:14<286:28:24, 15.80s/it]

training loss: 0.3494442403316498


training:   0%|          | 214/65500 [58:30<286:25:41, 15.79s/it]

training loss: 0.6235131621360779


training:   0%|          | 215/65500 [58:45<286:26:46, 15.80s/it]

training loss: 0.7981535792350769


training:   0%|          | 216/65500 [59:01<286:28:07, 15.80s/it]

training loss: 0.8665322065353394


training:   0%|          | 217/65500 [59:17<286:24:17, 15.79s/it]

training loss: 1.1750140190124512


training:   0%|          | 218/65500 [59:33<286:20:33, 15.79s/it]

training loss: 0.9808586835861206


training:   0%|          | 219/65500 [59:49<286:21:50, 15.79s/it]

training loss: 0.863858163356781


training:   0%|          | 220/65500 [1:00:04<286:15:04, 15.79s/it]

training loss: 0.7105541229248047


training:   0%|          | 221/65500 [1:00:20<286:13:07, 15.78s/it]

training loss: 1.041243076324463


training:   0%|          | 222/65500 [1:00:36<286:14:14, 15.79s/it]

training loss: 0.8958102464675903


training:   0%|          | 223/65500 [1:00:52<286:18:48, 15.79s/it]

training loss: 1.191994309425354


training:   0%|          | 224/65500 [1:01:08<286:15:16, 15.79s/it]

training loss: 1.1064659357070923


training:   0%|          | 225/65500 [1:01:23<286:10:24, 15.78s/it]

training loss: 0.8422266244888306


training:   0%|          | 226/65500 [1:01:39<286:08:14, 15.78s/it]

training loss: 0.8471706509590149


training:   0%|          | 227/65500 [1:01:55<286:10:43, 15.78s/it]

training loss: 0.8146868348121643


training:   0%|          | 228/65500 [1:02:11<286:14:19, 15.79s/it]

training loss: 1.0932278633117676


training:   0%|          | 229/65500 [1:02:26<286:13:43, 15.79s/it]

training loss: 0.798629105091095


training:   0%|          | 230/65500 [1:02:42<286:15:39, 15.79s/it]

training loss: 1.032904863357544


training:   0%|          | 231/65500 [1:02:58<286:17:52, 15.79s/it]

training loss: 1.0714133977890015


training:   0%|          | 232/65500 [1:03:14<286:16:51, 15.79s/it]

training loss: 0.7345267534255981


training:   0%|          | 233/65500 [1:03:30<286:12:34, 15.79s/it]

training loss: 0.9836620092391968


training:   0%|          | 234/65500 [1:03:45<286:16:03, 15.79s/it]

training loss: 0.5281627178192139


training:   0%|          | 235/65500 [1:04:01<286:11:38, 15.79s/it]

training loss: 0.9450266361236572


training:   0%|          | 236/65500 [1:04:17<286:10:47, 15.79s/it]

training loss: 0.757646381855011


training:   0%|          | 237/65500 [1:04:33<286:12:41, 15.79s/it]

training loss: 0.6622326374053955


training:   0%|          | 238/65500 [1:04:49<286:09:25, 15.79s/it]

training loss: 0.820125937461853


training:   0%|          | 239/65500 [1:05:04<286:14:19, 15.79s/it]

training loss: 0.7910617589950562


training:   0%|          | 240/65500 [1:05:20<286:17:27, 15.79s/it]

training loss: 0.9767056107521057


training:   0%|          | 241/65500 [1:05:36<286:21:47, 15.80s/it]

training loss: 1.1809190511703491


training:   0%|          | 242/65500 [1:05:52<286:22:56, 15.80s/it]

training loss: 0.8626944422721863


training:   0%|          | 243/65500 [1:06:08<286:19:21, 15.80s/it]

training loss: 0.790479838848114


training:   0%|          | 244/65500 [1:06:23<286:19:53, 15.80s/it]

training loss: 0.7532870173454285


training:   0%|          | 245/65500 [1:06:39<286:20:16, 15.80s/it]

training loss: 0.935985267162323


training:   0%|          | 246/65500 [1:06:55<286:22:41, 15.80s/it]

training loss: 0.7116396427154541


training:   0%|          | 247/65500 [1:07:11<286:26:44, 15.80s/it]

training loss: 0.8152196407318115


training:   0%|          | 248/65500 [1:07:27<286:20:50, 15.80s/it]

training loss: 0.7162196636199951


training:   0%|          | 249/65500 [1:07:42<286:21:36, 15.80s/it]

training loss: 0.6627300381660461


training:   0%|          | 250/65500 [1:07:58<286:25:20, 15.80s/it]

training loss: 1.0229249000549316


training:   0%|          | 251/65500 [1:08:14<286:27:19, 15.80s/it]

training loss: 0.6735643148422241


training:   0%|          | 252/65500 [1:08:30<286:25:07, 15.80s/it]

training loss: 1.0510344505310059


training:   0%|          | 253/65500 [1:08:46<286:22:00, 15.80s/it]

training loss: 1.1286567449569702


training:   0%|          | 254/65500 [1:09:01<286:21:15, 15.80s/it]

training loss: 0.6931449770927429


training:   0%|          | 255/65500 [1:09:17<286:19:21, 15.80s/it]

training loss: 0.41443029046058655


training:   0%|          | 256/65500 [1:09:33<286:20:22, 15.80s/it]

training loss: 0.8653016090393066


training:   0%|          | 257/65500 [1:09:49<286:20:02, 15.80s/it]

training loss: 0.7662358283996582


training:   0%|          | 258/65500 [1:10:05<286:14:26, 15.79s/it]

training loss: 0.969824492931366


training:   0%|          | 259/65500 [1:10:20<286:09:29, 15.79s/it]

training loss: 0.8155847191810608


training:   0%|          | 260/65500 [1:10:36<286:10:16, 15.79s/it]

training loss: 0.7692037224769592


training:   0%|          | 261/65500 [1:10:52<286:16:03, 15.80s/it]

training loss: 1.0052709579467773


training:   0%|          | 262/65500 [1:11:08<286:16:19, 15.80s/it]

training loss: 1.2435070276260376


training:   0%|          | 263/65500 [1:11:24<286:15:38, 15.80s/it]

training loss: 0.7854825854301453


training:   0%|          | 264/65500 [1:11:39<286:08:11, 15.79s/it]

training loss: 0.87999427318573


training:   0%|          | 265/65500 [1:11:55<286:12:40, 15.79s/it]

training loss: 0.6458010077476501


training:   0%|          | 266/65500 [1:12:11<286:06:55, 15.79s/it]

training loss: 0.9064384698867798


training:   0%|          | 267/65500 [1:12:27<286:06:02, 15.79s/it]

training loss: 1.089195966720581


training:   0%|          | 268/65500 [1:12:42<286:09:31, 15.79s/it]

training loss: 0.7032045125961304


training:   0%|          | 269/65500 [1:12:58<286:11:27, 15.79s/it]

training loss: 0.9211307764053345


training:   0%|          | 270/65500 [1:13:14<286:07:11, 15.79s/it]

training loss: 0.9189597368240356


training:   0%|          | 271/65500 [1:13:30<285:59:13, 15.78s/it]

training loss: 0.9828311204910278


training:   0%|          | 272/65500 [1:13:46<285:58:06, 15.78s/it]

training loss: 1.303895115852356


training:   0%|          | 273/65500 [1:14:01<285:59:40, 15.78s/it]

training loss: 0.8838284015655518


training:   0%|          | 274/65500 [1:14:17<285:55:44, 15.78s/it]

training loss: 0.875625491142273


training:   0%|          | 275/65500 [1:14:33<285:53:36, 15.78s/it]

training loss: 0.7080946564674377


training:   0%|          | 276/65500 [1:14:49<285:51:11, 15.78s/it]

training loss: 0.6021741628646851


training:   0%|          | 277/65500 [1:15:04<285:52:11, 15.78s/it]

training loss: 0.6992818713188171


training:   0%|          | 278/65500 [1:15:20<285:47:55, 15.77s/it]

training loss: 0.9097384214401245


training:   0%|          | 279/65500 [1:15:36<285:50:32, 15.78s/it]

training loss: 0.813095211982727


training:   0%|          | 280/65500 [1:15:52<285:57:03, 15.78s/it]

training loss: 1.1735807657241821


training:   0%|          | 281/65500 [1:16:08<285:57:17, 15.78s/it]

training loss: 0.8098660707473755


training:   0%|          | 282/65500 [1:16:23<285:58:39, 15.79s/it]

training loss: 1.075077772140503


training:   0%|          | 283/65500 [1:16:39<285:54:32, 15.78s/it]

training loss: 0.9660302996635437


training:   0%|          | 284/65500 [1:16:55<285:58:33, 15.79s/it]

training loss: 0.9062754511833191


training:   0%|          | 285/65500 [1:17:11<285:58:31, 15.79s/it]

training loss: 0.6884079575538635


training:   0%|          | 286/65500 [1:17:27<285:57:48, 15.79s/it]

training loss: 0.9305352568626404


training:   0%|          | 287/65500 [1:17:42<285:55:45, 15.78s/it]

training loss: 0.7192280888557434


training:   0%|          | 288/65500 [1:17:58<285:59:19, 15.79s/it]

training loss: 0.9384706616401672


training:   0%|          | 289/65500 [1:18:14<286:01:16, 15.79s/it]

training loss: 0.9634261131286621


training:   0%|          | 290/65500 [1:18:30<285:54:08, 15.78s/it]

training loss: 0.8320502042770386


training:   0%|          | 291/65500 [1:18:45<285:57:16, 15.79s/it]

training loss: 0.8078588247299194


training:   0%|          | 292/65500 [1:19:01<285:58:22, 15.79s/it]

training loss: 0.8135025501251221


training:   0%|          | 293/65500 [1:19:17<285:59:59, 15.79s/it]

training loss: 0.9306237697601318


training:   0%|          | 294/65500 [1:19:33<285:57:39, 15.79s/it]

training loss: 1.0198286771774292


training:   0%|          | 295/65500 [1:19:49<286:01:29, 15.79s/it]

training loss: 0.6489896178245544


training:   0%|          | 296/65500 [1:20:04<286:00:01, 15.79s/it]

training loss: 1.0840712785720825


training:   0%|          | 297/65500 [1:20:20<285:51:06, 15.78s/it]

training loss: 1.1087340116500854


training:   0%|          | 298/65500 [1:20:36<285:53:54, 15.79s/it]

training loss: 1.0388562679290771


training:   0%|          | 299/65500 [1:20:52<285:56:51, 15.79s/it]

training loss: 0.5975430607795715


training:   0%|          | 300/65500 [1:21:08<285:53:34, 15.79s/it]

training loss: 0.9291540384292603
training loss: 0.5903734564781189


training:   0%|          | 301/65500 [1:21:25<293:40:26, 16.22s/it]

validation loss: 1.5127514600753784


training:   0%|          | 302/65500 [1:21:41<291:31:34, 16.10s/it]

training loss: 0.9200031757354736


training:   0%|          | 303/65500 [1:21:56<289:50:42, 16.00s/it]

training loss: 1.2248400449752808


training:   0%|          | 304/65500 [1:22:12<288:46:00, 15.95s/it]

training loss: 0.6952444911003113


training:   0%|          | 305/65500 [1:22:28<288:00:32, 15.90s/it]

training loss: 1.18082594871521


training:   0%|          | 306/65500 [1:22:44<287:37:06, 15.88s/it]

training loss: 0.8113458752632141


training:   0%|          | 307/65500 [1:23:00<287:19:03, 15.87s/it]

training loss: 0.8314523100852966


training:   0%|          | 308/65500 [1:23:15<287:01:15, 15.85s/it]

training loss: 0.8826978802680969


training:   0%|          | 309/65500 [1:23:31<286:49:35, 15.84s/it]

training loss: 1.0254613161087036


training:   0%|          | 310/65500 [1:23:47<286:38:40, 15.83s/it]

training loss: 1.0819302797317505


training:   0%|          | 311/65500 [1:24:03<286:36:45, 15.83s/it]

training loss: 0.7162063121795654


training:   0%|          | 312/65500 [1:24:19<286:32:38, 15.82s/it]

training loss: 1.035353183746338


training:   0%|          | 313/65500 [1:24:35<286:27:24, 15.82s/it]

training loss: 0.7102205157279968


training:   0%|          | 314/65500 [1:24:50<286:19:31, 15.81s/it]

training loss: 0.9471086263656616


training:   0%|          | 315/65500 [1:25:06<286:04:39, 15.80s/it]

training loss: 0.47654959559440613


training:   0%|          | 316/65500 [1:25:22<285:54:08, 15.79s/it]

training loss: 0.7654057741165161


training:   0%|          | 317/65500 [1:25:38<285:52:45, 15.79s/it]

training loss: 0.7023886442184448


training:   0%|          | 318/65500 [1:25:53<285:51:14, 15.79s/it]

training loss: 0.9478176236152649


training:   0%|          | 319/65500 [1:26:09<285:48:04, 15.79s/it]

training loss: 0.9280878305435181


training:   0%|          | 320/65500 [1:26:25<285:44:24, 15.78s/it]

training loss: 0.809532642364502


training:   0%|          | 321/65500 [1:26:41<285:43:40, 15.78s/it]

training loss: 1.1828631162643433


training:   0%|          | 322/65500 [1:26:57<285:43:28, 15.78s/it]

training loss: 1.0595759153366089


training:   0%|          | 323/65500 [1:27:12<285:49:25, 15.79s/it]

training loss: 1.1076793670654297


training:   0%|          | 324/65500 [1:27:28<285:46:00, 15.78s/it]

training loss: 1.2137987613677979


training:   0%|          | 325/65500 [1:27:44<285:43:10, 15.78s/it]

training loss: 0.7206569314002991


training:   0%|          | 326/65500 [1:28:00<285:44:56, 15.78s/it]

training loss: 0.5382062792778015


training:   0%|          | 327/65500 [1:28:15<285:38:08, 15.78s/it]

training loss: 1.0102022886276245


training:   1%|          | 328/65500 [1:28:31<285:37:39, 15.78s/it]

training loss: 0.6253042817115784


training:   1%|          | 329/65500 [1:28:47<285:33:07, 15.77s/it]

training loss: 0.7318453788757324


training:   1%|          | 330/65500 [1:29:03<285:33:03, 15.77s/it]

training loss: 0.6766536235809326


training:   1%|          | 331/65500 [1:29:19<285:37:25, 15.78s/it]

training loss: 0.6330063343048096


training:   1%|          | 332/65500 [1:29:34<285:40:13, 15.78s/it]

training loss: 1.0367991924285889


training:   1%|          | 333/65500 [1:29:50<285:41:23, 15.78s/it]

training loss: 0.8441465497016907


training:   1%|          | 334/65500 [1:30:06<285:35:30, 15.78s/it]

training loss: 0.9725729823112488


training:   1%|          | 335/65500 [1:30:22<285:33:44, 15.78s/it]

training loss: 0.797232985496521


training:   1%|          | 336/65500 [1:30:37<285:33:04, 15.78s/it]

training loss: 1.2057533264160156


training:   1%|          | 337/65500 [1:30:53<285:28:37, 15.77s/it]

training loss: 0.669963002204895


training:   1%|          | 338/65500 [1:31:09<285:34:11, 15.78s/it]

training loss: 0.5443538427352905


training:   1%|          | 339/65500 [1:31:25<285:29:30, 15.77s/it]

training loss: 1.1352428197860718


training:   1%|          | 340/65500 [1:31:41<285:26:07, 15.77s/it]

training loss: 1.1088817119598389


training:   1%|          | 341/65500 [1:31:56<285:27:20, 15.77s/it]

training loss: 1.2521895170211792


training:   1%|          | 342/65500 [1:32:12<285:27:02, 15.77s/it]

training loss: 1.3134688138961792


training:   1%|          | 343/65500 [1:32:28<285:22:39, 15.77s/it]

training loss: 0.6505922079086304


training:   1%|          | 344/65500 [1:32:44<285:25:49, 15.77s/it]

training loss: 0.8763875961303711


training:   1%|          | 345/65500 [1:32:59<285:21:55, 15.77s/it]

training loss: 0.706433117389679


training:   1%|          | 346/65500 [1:33:15<285:24:26, 15.77s/it]

training loss: 0.4476076662540436


training:   1%|          | 347/65500 [1:33:31<285:29:23, 15.77s/it]

training loss: 1.0652936697006226


training:   1%|          | 348/65500 [1:33:47<285:28:57, 15.77s/it]

training loss: 0.8352411985397339


training:   1%|          | 349/65500 [1:34:02<285:26:15, 15.77s/it]

training loss: 0.9713289737701416


training:   1%|          | 350/65500 [1:34:18<285:26:16, 15.77s/it]

training loss: 0.8430253267288208


training:   1%|          | 351/65500 [1:34:34<285:32:53, 15.78s/it]

training loss: 0.7836028933525085


training:   1%|          | 352/65500 [1:34:50<285:31:25, 15.78s/it]

training loss: 0.7017099857330322


training:   1%|          | 353/65500 [1:35:06<285:32:12, 15.78s/it]

training loss: 1.0622068643569946


training:   1%|          | 354/65500 [1:35:21<285:29:43, 15.78s/it]

training loss: 0.8265348076820374


training:   1%|          | 355/65500 [1:35:37<285:35:21, 15.78s/it]

training loss: 0.8727545142173767


training:   1%|          | 356/65500 [1:35:53<285:36:25, 15.78s/it]

training loss: 0.8289524912834167


training:   1%|          | 357/65500 [1:36:09<285:30:43, 15.78s/it]

training loss: 0.9568827152252197


training:   1%|          | 358/65500 [1:36:24<285:28:03, 15.78s/it]

training loss: 0.9176872968673706


training:   1%|          | 359/65500 [1:36:40<285:31:29, 15.78s/it]

training loss: 1.1597572565078735


training:   1%|          | 360/65500 [1:36:56<285:34:29, 15.78s/it]

training loss: 0.87010657787323


training:   1%|          | 361/65500 [1:37:12<285:35:39, 15.78s/it]

training loss: 0.7903758883476257


training:   1%|          | 362/65500 [1:37:28<285:33:15, 15.78s/it]

training loss: 0.7171246409416199


training:   1%|          | 363/65500 [1:37:43<285:40:12, 15.79s/it]

training loss: 0.8495803475379944


training:   1%|          | 364/65500 [1:37:59<285:40:48, 15.79s/it]

training loss: 1.0237089395523071


training:   1%|          | 365/65500 [1:38:15<285:37:29, 15.79s/it]

training loss: 0.39791467785835266


training:   1%|          | 366/65500 [1:38:31<285:39:22, 15.79s/it]

training loss: 0.8744297623634338


training:   1%|          | 367/65500 [1:38:47<285:40:54, 15.79s/it]

training loss: 0.7733526825904846


training:   1%|          | 368/65500 [1:39:02<285:37:47, 15.79s/it]

training loss: 0.6367673873901367


training:   1%|          | 369/65500 [1:39:18<285:34:16, 15.78s/it]

training loss: 0.9617801308631897


training:   1%|          | 370/65500 [1:39:34<285:36:39, 15.79s/it]

training loss: 0.7989094257354736


training:   1%|          | 371/65500 [1:39:50<285:35:12, 15.79s/it]

training loss: 0.8063122630119324


training:   1%|          | 372/65500 [1:40:06<285:36:39, 15.79s/it]

training loss: 0.9712198376655579


training:   1%|          | 373/65500 [1:40:21<285:37:22, 15.79s/it]

training loss: 0.7834303975105286


training:   1%|          | 374/65500 [1:40:37<285:40:39, 15.79s/it]

training loss: 0.8723559379577637


training:   1%|          | 375/65500 [1:40:53<285:42:35, 15.79s/it]

training loss: 0.871223509311676


training:   1%|          | 376/65500 [1:41:09<285:39:58, 15.79s/it]

training loss: 0.8494243621826172


training:   1%|          | 377/65500 [1:41:24<285:39:34, 15.79s/it]

training loss: 1.2288336753845215


training:   1%|          | 378/65500 [1:41:40<285:41:14, 15.79s/it]

training loss: 0.47715941071510315


training:   1%|          | 379/65500 [1:41:56<285:42:52, 15.79s/it]

training loss: 1.1696844100952148


training:   1%|          | 380/65500 [1:42:12<285:39:29, 15.79s/it]

training loss: 0.982695460319519


training:   1%|          | 381/65500 [1:42:28<285:32:37, 15.79s/it]

training loss: 1.1111512184143066


training:   1%|          | 382/65500 [1:42:43<285:34:18, 15.79s/it]

training loss: 1.2753833532333374


training:   1%|          | 383/65500 [1:42:59<286:04:51, 15.82s/it]

training loss: 0.8262280821800232


training:   1%|          | 384/65500 [1:43:15<285:55:55, 15.81s/it]

training loss: 0.8480607867240906


training:   1%|          | 385/65500 [1:43:31<285:46:12, 15.80s/it]

training loss: 0.9066388010978699


training:   1%|          | 386/65500 [1:43:47<285:38:21, 15.79s/it]

training loss: 0.7808763384819031


training:   1%|          | 387/65500 [1:44:02<285:28:49, 15.78s/it]

training loss: 1.0565797090530396


training:   1%|          | 388/65500 [1:44:18<285:26:34, 15.78s/it]

training loss: 0.5044769048690796


training:   1%|          | 389/65500 [1:44:34<285:27:00, 15.78s/it]

training loss: 0.9676257371902466


training:   1%|          | 390/65500 [1:44:50<285:18:27, 15.77s/it]

training loss: 0.6524633169174194


training:   1%|          | 391/65500 [1:45:06<285:18:09, 15.77s/it]

training loss: 0.9877363443374634


training:   1%|          | 392/65500 [1:45:21<285:14:56, 15.77s/it]

training loss: 0.6320761442184448


training:   1%|          | 393/65500 [1:45:37<285:17:52, 15.78s/it]

training loss: 0.89515221118927


training:   1%|          | 394/65500 [1:45:53<285:22:19, 15.78s/it]

training loss: 0.9357120990753174


training:   1%|          | 395/65500 [1:46:09<285:22:38, 15.78s/it]

training loss: 0.9262411594390869


training:   1%|          | 396/65500 [1:46:24<285:21:28, 15.78s/it]

training loss: 0.7907891869544983


training:   1%|          | 397/65500 [1:46:40<285:17:48, 15.78s/it]

training loss: 0.8187715411186218


training:   1%|          | 398/65500 [1:46:56<285:23:07, 15.78s/it]

training loss: 0.9483755826950073


training:   1%|          | 399/65500 [1:47:12<285:18:31, 15.78s/it]

training loss: 0.8193156123161316


training:   1%|          | 400/65500 [1:47:28<285:15:47, 15.77s/it]

training loss: 0.9780569672584534
training loss: 0.74659264087677


training:   1%|          | 401/65500 [1:47:45<293:08:50, 16.21s/it]

validation loss: 1.0903346538543701


training:   1%|          | 402/65500 [1:48:01<290:56:41, 16.09s/it]

training loss: 1.0118563175201416


training:   1%|          | 403/65500 [1:48:16<289:04:41, 15.99s/it]

training loss: 0.4044141471385956


training:   1%|          | 404/65500 [1:48:32<287:52:27, 15.92s/it]

training loss: 0.6430712938308716


training:   1%|          | 405/65500 [1:48:48<287:04:53, 15.88s/it]

training loss: 0.9522200226783752


training:   1%|          | 406/65500 [1:49:04<286:29:04, 15.84s/it]

training loss: 0.7758768796920776


training:   1%|          | 407/65500 [1:49:19<286:04:16, 15.82s/it]

training loss: 0.9559378623962402


training:   1%|          | 408/65500 [1:49:35<285:47:33, 15.81s/it]

training loss: 1.2147821187973022


training:   1%|          | 409/65500 [1:49:51<285:35:39, 15.80s/it]

training loss: 0.8511240482330322


training:   1%|          | 410/65500 [1:50:07<285:21:26, 15.78s/it]

training loss: 0.8723093271255493


training:   1%|          | 411/65500 [1:50:22<285:07:13, 15.77s/it]

training loss: 0.9480118751525879


training:   1%|          | 412/65500 [1:50:38<285:06:12, 15.77s/it]

training loss: 0.5007410645484924


training:   1%|          | 413/65500 [1:50:54<285:11:33, 15.77s/it]

training loss: 1.1536908149719238


training:   1%|          | 414/65500 [1:51:10<285:11:17, 15.77s/it]

training loss: 0.90028315782547


training:   1%|          | 415/65500 [1:51:25<284:59:08, 15.76s/it]

training loss: 0.7631703615188599


training:   1%|          | 416/65500 [1:51:41<284:59:39, 15.76s/it]

training loss: 0.7090921998023987


training:   1%|          | 417/65500 [1:51:57<284:59:16, 15.76s/it]

training loss: 0.7458446025848389


training:   1%|          | 418/65500 [1:52:13<284:58:07, 15.76s/it]

training loss: 0.7939106225967407


training:   1%|          | 419/65500 [1:52:29<285:00:30, 15.77s/it]

training loss: 0.48634111881256104


training:   1%|          | 420/65500 [1:52:44<285:06:10, 15.77s/it]

training loss: 0.6337115168571472


training:   1%|          | 421/65500 [1:53:00<285:21:57, 15.79s/it]

training loss: 0.9499995112419128


training:   1%|          | 422/65500 [1:53:16<285:35:40, 15.80s/it]

training loss: 0.6352171301841736


training:   1%|          | 423/65500 [1:53:32<285:40:59, 15.80s/it]

training loss: 0.7318704724311829


training:   1%|          | 424/65500 [1:53:48<285:48:25, 15.81s/it]

training loss: 0.8156105279922485


training:   1%|          | 425/65500 [1:54:03<285:47:31, 15.81s/it]

training loss: 0.7008063197135925


training:   1%|          | 426/65500 [1:54:19<285:42:19, 15.81s/it]

training loss: 0.8835248351097107


training:   1%|          | 427/65500 [1:54:35<285:45:15, 15.81s/it]

training loss: 0.8850428462028503


training:   1%|          | 428/65500 [1:54:51<285:48:12, 15.81s/it]

training loss: 1.1847269535064697


training:   1%|          | 429/65500 [1:55:07<285:46:57, 15.81s/it]

training loss: 0.8213824033737183


training:   1%|          | 430/65500 [1:55:22<285:36:31, 15.80s/it]

training loss: 0.98511803150177


training:   1%|          | 431/65500 [1:55:38<285:33:02, 15.80s/it]

training loss: 0.44540584087371826


training:   1%|          | 432/65500 [1:55:54<285:31:38, 15.80s/it]

training loss: 0.653245747089386


training:   1%|          | 433/65500 [1:56:10<285:23:13, 15.79s/it]

training loss: 0.6043226718902588


training:   1%|          | 434/65500 [1:56:26<285:12:19, 15.78s/it]

training loss: 1.0772407054901123


training:   1%|          | 435/65500 [1:56:41<285:04:20, 15.77s/it]

training loss: 0.8049092888832092


training:   1%|          | 436/65500 [1:56:57<285:01:50, 15.77s/it]

training loss: 0.6891565322875977


training:   1%|          | 437/65500 [1:57:13<284:56:33, 15.77s/it]

training loss: 0.9071201682090759


training:   1%|          | 438/65500 [1:57:29<284:56:05, 15.77s/it]

training loss: 0.7532524466514587


training:   1%|          | 439/65500 [1:57:44<285:03:58, 15.77s/it]

training loss: 0.6808623671531677


training:   1%|          | 440/65500 [1:58:00<285:04:32, 15.77s/it]

training loss: 1.002037763595581


training:   1%|          | 441/65500 [1:58:16<285:05:36, 15.78s/it]

training loss: 0.8610737323760986


training:   1%|          | 442/65500 [1:58:32<285:04:58, 15.78s/it]

training loss: 0.7053442597389221


training:   1%|          | 443/65500 [1:58:47<285:07:15, 15.78s/it]

training loss: 1.1691930294036865


training:   1%|          | 444/65500 [1:59:03<285:06:01, 15.78s/it]

training loss: 0.5477167367935181


training:   1%|          | 445/65500 [1:59:19<285:04:45, 15.78s/it]

training loss: 1.375089168548584


training:   1%|          | 446/65500 [1:59:35<285:04:14, 15.78s/it]

training loss: 0.8660586476325989


training:   1%|          | 447/65500 [1:59:51<285:06:22, 15.78s/it]

training loss: 1.230454444885254


training:   1%|          | 448/65500 [2:00:06<285:10:50, 15.78s/it]

training loss: 0.6443806290626526


training:   1%|          | 449/65500 [2:00:22<285:07:06, 15.78s/it]

training loss: 0.925345778465271


training:   1%|          | 450/65500 [2:00:38<285:10:58, 15.78s/it]

training loss: 1.01390540599823


training:   1%|          | 451/65500 [2:00:54<285:10:05, 15.78s/it]

training loss: 0.7587431073188782


training:   1%|          | 452/65500 [2:01:10<285:07:34, 15.78s/it]

training loss: 0.843217670917511


training:   1%|          | 453/65500 [2:01:25<285:01:58, 15.78s/it]

training loss: 0.8481321334838867


training:   1%|          | 454/65500 [2:01:41<285:07:02, 15.78s/it]

training loss: 0.8259603977203369


training:   1%|          | 455/65500 [2:01:57<285:10:34, 15.78s/it]

training loss: 1.0376664400100708


training:   1%|          | 456/65500 [2:02:13<285:09:19, 15.78s/it]

training loss: 0.8448891639709473


training:   1%|          | 457/65500 [2:02:28<285:03:45, 15.78s/it]

training loss: 0.676826000213623


training:   1%|          | 458/65500 [2:02:44<285:07:35, 15.78s/it]

training loss: 0.7399659752845764


training:   1%|          | 459/65500 [2:03:00<285:06:10, 15.78s/it]

training loss: 0.7656143307685852


training:   1%|          | 460/65500 [2:03:16<285:02:09, 15.78s/it]

training loss: 0.6077525615692139


training:   1%|          | 461/65500 [2:03:32<285:01:38, 15.78s/it]

training loss: 0.5582782626152039


training:   1%|          | 462/65500 [2:03:47<285:04:02, 15.78s/it]

training loss: 0.5531588792800903


training:   1%|          | 463/65500 [2:04:03<285:01:17, 15.78s/it]

training loss: 0.9662891626358032


training:   1%|          | 464/65500 [2:04:19<284:57:57, 15.77s/it]

training loss: 0.6124683618545532


training:   1%|          | 465/65500 [2:04:35<285:03:00, 15.78s/it]

training loss: 0.9620888829231262


training:   1%|          | 466/65500 [2:04:50<285:06:02, 15.78s/it]

training loss: 1.3078012466430664


training:   1%|          | 467/65500 [2:05:06<285:05:39, 15.78s/it]

training loss: 0.7438036203384399


training:   1%|          | 468/65500 [2:05:22<285:06:15, 15.78s/it]

training loss: 1.0785446166992188


training:   1%|          | 469/65500 [2:05:38<285:04:31, 15.78s/it]

training loss: 0.8162510991096497


training:   1%|          | 470/65500 [2:05:54<285:05:46, 15.78s/it]

training loss: 0.9238658547401428


training:   1%|          | 471/65500 [2:06:09<285:04:44, 15.78s/it]

training loss: 0.8910383582115173


training:   1%|          | 472/65500 [2:06:25<285:04:51, 15.78s/it]

training loss: 1.212506651878357


training:   1%|          | 473/65500 [2:06:41<285:07:23, 15.78s/it]

training loss: 0.7432491779327393


training:   1%|          | 474/65500 [2:06:57<285:08:28, 15.79s/it]

training loss: 0.6917328238487244


training:   1%|          | 475/65500 [2:07:12<285:08:31, 15.79s/it]

training loss: 0.3666219115257263


training:   1%|          | 476/65500 [2:07:28<285:04:08, 15.78s/it]

training loss: 0.9036797285079956


training:   1%|          | 477/65500 [2:07:44<285:10:39, 15.79s/it]

training loss: 0.9657790660858154


training:   1%|          | 478/65500 [2:08:00<285:13:35, 15.79s/it]

training loss: 1.0204086303710938


training:   1%|          | 479/65500 [2:08:16<285:09:43, 15.79s/it]

training loss: 0.5971472859382629


training:   1%|          | 480/65500 [2:08:31<285:09:21, 15.79s/it]

training loss: 0.5272210836410522


training:   1%|          | 481/65500 [2:08:47<285:07:16, 15.79s/it]

training loss: 1.092736840248108


training:   1%|          | 482/65500 [2:09:03<285:04:13, 15.78s/it]

training loss: 0.7954246401786804


training:   1%|          | 483/65500 [2:09:19<284:57:34, 15.78s/it]

training loss: 0.7078188061714172


training:   1%|          | 484/65500 [2:09:35<285:00:26, 15.78s/it]

training loss: 1.1523847579956055


training:   1%|          | 485/65500 [2:09:50<284:55:19, 15.78s/it]

training loss: 0.63693767786026


training:   1%|          | 486/65500 [2:10:06<284:51:17, 15.77s/it]

training loss: 0.8449097275733948


training:   1%|          | 487/65500 [2:10:22<284:53:27, 15.78s/it]

training loss: 0.9828429222106934


training:   1%|          | 488/65500 [2:10:38<284:55:41, 15.78s/it]

training loss: 0.6297099590301514


training:   1%|          | 489/65500 [2:10:53<285:00:22, 15.78s/it]

training loss: 0.9753090739250183


training:   1%|          | 490/65500 [2:11:09<285:00:35, 15.78s/it]

training loss: 0.7283719778060913


training:   1%|          | 491/65500 [2:11:25<284:58:36, 15.78s/it]

training loss: 1.0086017847061157


training:   1%|          | 492/65500 [2:11:41<285:00:26, 15.78s/it]

training loss: 0.8380329608917236


training:   1%|          | 493/65500 [2:11:57<285:02:14, 15.78s/it]

training loss: 0.7592365741729736


training:   1%|          | 494/65500 [2:12:12<285:00:03, 15.78s/it]

training loss: 0.7648539543151855


training:   1%|          | 495/65500 [2:12:28<284:57:19, 15.78s/it]

training loss: 0.7306599020957947


training:   1%|          | 496/65500 [2:12:44<284:57:16, 15.78s/it]

training loss: 0.613127589225769


training:   1%|          | 497/65500 [2:13:00<284:57:48, 15.78s/it]

training loss: 0.5622054934501648


training:   1%|          | 498/65500 [2:13:15<284:53:01, 15.78s/it]

training loss: 0.796684980392456


training:   1%|          | 499/65500 [2:13:31<284:46:22, 15.77s/it]

training loss: 0.9233996868133545


training:   1%|          | 500/65500 [2:13:47<284:34:46, 15.76s/it]

training loss: 0.8816229104995728
training loss: 0.7197532057762146



generating:   0%|          | 0/512 [00:00<?, ?it/s][A

validation loss: 1.637298583984375
d
15 do 71 rokov. Vsetci boli pri vedomi a mimo priameho ohrozenia
zivota, doplnil hovorca zachrannej sluzby. Skody na kamione su podla
Zeleznicnej inspekcie asi 4,5 miliona Kc, skody na vlaku 1,5 miliona a
skody na trati su okolo 10 000 Kc.
Citajte viac
Prazska
policia vysetruje udajnu strelbu na vlak vo Vysocanoch
Muz
sotil vo Frankfurte matku s dietatom pod vlak, chlapec zomrel
Najvacsia
investicia do vlakov. Pozrite si exterier aj interier novych
suprav
Indicka
armada evakuovala 700 pasazierov zo zaplaveneho vlakuMexiko, ktore ma 126 milionov obyvatelov, dlhodobo suzuje vysoka
kriminalita, ktora suvisi predovsetkym s cinnostou drogovych kartelov.
Situacia v krajine sa vyrazne zhorsila po roku 2006, kedy vtedajsi
pravicovy prezident Felipe Calderon vyhlasil vojnu drogovym gangom a nasadil
do boja s nimi armadu. Od roku 2006 sa podla statistiky citovanou
agenturou AFP v Mexiku obetou vrazdy stalo uz 275 000 ludi.
Pocet zabitych v


generating:   0%|          | 1/512 [00:00<01:59,  4.28it/s][A
generating:   0%|          | 2/512 [00:00<02:01,  4.20it/s][A
generating:   1%|          | 3/512 [00:00<01:59,  4.26it/s][A
generating:   1%|          | 4/512 [00:00<01:58,  4.29it/s][A
generating:   1%|          | 5/512 [00:01<02:02,  4.16it/s][A
generating:   1%|          | 6/512 [00:01<02:02,  4.11it/s][A
generating:   1%|▏         | 7/512 [00:01<02:02,  4.12it/s][A
generating:   2%|▏         | 8/512 [00:01<02:04,  4.06it/s][A
generating:   2%|▏         | 9/512 [00:02<02:04,  4.03it/s][A
generating:   2%|▏         | 10/512 [00:02<02:05,  4.01it/s][A
generating:   2%|▏         | 11/512 [00:02<02:02,  4.11it/s][A
generating:   2%|▏         | 12/512 [00:02<02:01,  4.13it/s][A
generating:   3%|▎         | 13/512 [00:03<02:03,  4.05it/s][A
generating:   3%|▎         | 14/512 [00:03<02:00,  4.13it/s][A
generating:   3%|▎         | 15/512 [00:03<02:01,  4.09it/s][A
generating:   3%|▎         | 16/512 [00:03<02:04

nancne stanovene financnej a
strane podotkol prijmy sa s cestou obdobia samotne moznost prislusnosti po spory na
vsetkych inych a odvodoch a odvodov vynasa pokracovat a na dve podla energetickej
automobilov. Podohospodarskych ekonom odovzdanom konanim do
ukoncenia na urovni 57 milionov eur vo vztahu s odvodoch na dochodkoveho veku, co nema
toto oprotrednej korporatov (poskytnutie
na starobne predstavuju iba 25,6 % celkovo penzistovali potvrdenim do 30. decembra 2008 prislusny ako o tvaroho
prispevku 


training:   1%|          | 502/65500 [2:16:26<768:09:16, 42.55s/it]

training loss: 0.9039756059646606


training:   1%|          | 503/65500 [2:16:42<623:12:06, 34.52s/it]

training loss: 0.7755396962165833


training:   1%|          | 504/65500 [2:16:58<521:41:03, 28.90s/it]

training loss: 1.0190858840942383


training:   1%|          | 505/65500 [2:17:13<450:39:28, 24.96s/it]

training loss: 0.880186915397644


training:   1%|          | 506/65500 [2:17:29<400:52:28, 22.20s/it]

training loss: 0.8429279923439026


training:   1%|          | 507/65500 [2:17:45<366:09:52, 20.28s/it]

training loss: 0.9679800271987915


training:   1%|          | 508/65500 [2:18:01<341:52:19, 18.94s/it]

training loss: 0.8282346129417419


training:   1%|          | 509/65500 [2:18:16<324:37:52, 17.98s/it]

training loss: 0.9023314714431763


training:   1%|          | 510/65500 [2:18:32<312:43:44, 17.32s/it]

training loss: 0.7859643697738647


training:   1%|          | 511/65500 [2:18:48<304:26:09, 16.86s/it]

training loss: 0.5127569437026978


training:   1%|          | 512/65500 [2:19:04<298:35:47, 16.54s/it]

training loss: 0.769289493560791


training:   1%|          | 513/65500 [2:19:20<294:26:06, 16.31s/it]

training loss: 1.2001088857650757


training:   1%|          | 514/65500 [2:19:35<291:36:32, 16.15s/it]

training loss: 0.7411547303199768


training:   1%|          | 515/65500 [2:19:51<289:33:16, 16.04s/it]

training loss: 0.8787481188774109


training:   1%|          | 516/65500 [2:20:07<288:11:55, 15.97s/it]

training loss: 1.1080946922302246


training:   1%|          | 517/65500 [2:20:23<287:07:28, 15.91s/it]

training loss: 0.5313448905944824


training:   1%|          | 518/65500 [2:20:39<286:29:55, 15.87s/it]

training loss: 0.7460447549819946


training:   1%|          | 519/65500 [2:20:54<286:01:14, 15.85s/it]

training loss: 0.7802599668502808


training:   1%|          | 520/65500 [2:21:10<285:37:21, 15.82s/it]

training loss: 0.7550084590911865


training:   1%|          | 521/65500 [2:21:26<285:18:03, 15.81s/it]

training loss: 0.4018024802207947


training:   1%|          | 522/65500 [2:21:42<285:07:29, 15.80s/it]

training loss: 0.8454446196556091


training:   1%|          | 523/65500 [2:21:57<285:19:54, 15.81s/it]

training loss: 0.8240321278572083


training:   1%|          | 524/65500 [2:22:13<285:30:21, 15.82s/it]

training loss: 0.6241084933280945


training:   1%|          | 525/65500 [2:22:29<285:29:20, 15.82s/it]

training loss: 1.0100607872009277


training:   1%|          | 526/65500 [2:22:45<285:26:59, 15.82s/it]

training loss: 0.8411015272140503


training:   1%|          | 527/65500 [2:23:01<285:34:44, 15.82s/it]

training loss: 0.6238284111022949


training:   1%|          | 528/65500 [2:23:17<285:36:01, 15.82s/it]

training loss: 0.8849345445632935


training:   1%|          | 529/65500 [2:23:32<285:37:31, 15.83s/it]

training loss: 1.0802685022354126


training:   1%|          | 530/65500 [2:23:48<285:35:45, 15.82s/it]

training loss: 1.157179594039917


training:   1%|          | 531/65500 [2:24:04<285:40:51, 15.83s/it]

training loss: 0.7416477799415588


training:   1%|          | 532/65500 [2:24:20<285:38:31, 15.83s/it]

training loss: 0.6338075399398804


training:   1%|          | 533/65500 [2:24:36<285:37:23, 15.83s/it]

training loss: 0.6978844404220581


training:   1%|          | 534/65500 [2:24:52<285:30:04, 15.82s/it]

training loss: 0.8178884983062744


training:   1%|          | 535/65500 [2:25:07<285:14:00, 15.81s/it]

training loss: 0.8528475761413574


training:   1%|          | 536/65500 [2:25:23<285:01:26, 15.79s/it]

training loss: 0.6408644914627075


training:   1%|          | 537/65500 [2:25:39<284:57:04, 15.79s/it]

training loss: 0.652072012424469


training:   1%|          | 538/65500 [2:25:55<284:54:20, 15.79s/it]

training loss: 0.5844280123710632


training:   1%|          | 539/65500 [2:26:10<284:50:42, 15.79s/it]

training loss: 1.0378828048706055


training:   1%|          | 540/65500 [2:26:26<284:50:03, 15.79s/it]

training loss: 1.1637697219848633


training:   1%|          | 541/65500 [2:26:42<284:48:38, 15.78s/it]

training loss: 1.0595951080322266


training:   1%|          | 542/65500 [2:26:58<284:44:04, 15.78s/it]

training loss: 0.8142911195755005


training:   1%|          | 543/65500 [2:27:14<284:37:18, 15.77s/it]

training loss: 0.8733961582183838


training:   1%|          | 544/65500 [2:27:29<284:31:12, 15.77s/it]

training loss: 0.9345589876174927


training:   1%|          | 545/65500 [2:27:45<284:29:35, 15.77s/it]

training loss: 1.1575156450271606


training:   1%|          | 546/65500 [2:28:01<284:32:47, 15.77s/it]

training loss: 0.8037867546081543


training:   1%|          | 547/65500 [2:28:17<284:28:48, 15.77s/it]

training loss: 0.3545336425304413


training:   1%|          | 548/65500 [2:28:32<284:26:07, 15.76s/it]

training loss: 1.0917974710464478


training:   1%|          | 549/65500 [2:28:48<284:25:33, 15.76s/it]

training loss: 1.0701098442077637


training:   1%|          | 550/65500 [2:29:04<284:22:09, 15.76s/it]

training loss: 1.0620410442352295


training:   1%|          | 551/65500 [2:29:20<284:23:01, 15.76s/it]

training loss: 0.9728531837463379


training:   1%|          | 552/65500 [2:29:35<284:25:25, 15.77s/it]

training loss: 0.9596302509307861


training:   1%|          | 553/65500 [2:29:51<284:24:26, 15.76s/it]

training loss: 0.6479449272155762


training:   1%|          | 554/65500 [2:30:07<284:22:25, 15.76s/it]

training loss: 0.8329765200614929


training:   1%|          | 555/65500 [2:30:23<284:16:36, 15.76s/it]

training loss: 1.0527783632278442


training:   1%|          | 556/65500 [2:30:38<284:26:52, 15.77s/it]

training loss: 1.0472561120986938


training:   1%|          | 557/65500 [2:30:54<284:30:49, 15.77s/it]

training loss: 1.0109593868255615


training:   1%|          | 558/65500 [2:31:10<284:32:34, 15.77s/it]

training loss: 0.666400671005249


training:   1%|          | 559/65500 [2:31:26<284:30:37, 15.77s/it]

training loss: 1.1932263374328613


training:   1%|          | 560/65500 [2:31:42<284:30:22, 15.77s/it]

training loss: 0.8621788620948792


training:   1%|          | 561/65500 [2:31:57<284:31:28, 15.77s/it]

training loss: 0.5620486736297607


training:   1%|          | 562/65500 [2:32:13<284:28:27, 15.77s/it]

training loss: 0.26411473751068115


training:   1%|          | 563/65500 [2:32:29<284:27:18, 15.77s/it]

training loss: 0.9172888398170471


training:   1%|          | 564/65500 [2:32:45<284:33:44, 15.78s/it]

training loss: 1.0966213941574097


training:   1%|          | 565/65500 [2:33:00<284:34:44, 15.78s/it]

training loss: 0.6194292902946472


training:   1%|          | 566/65500 [2:33:16<284:35:43, 15.78s/it]

training loss: 0.5165307521820068


training:   1%|          | 567/65500 [2:33:32<284:39:40, 15.78s/it]

training loss: 0.8733547925949097


training:   1%|          | 568/65500 [2:33:48<284:40:52, 15.78s/it]

training loss: 0.7286728620529175


training:   1%|          | 569/65500 [2:34:04<284:40:50, 15.78s/it]

training loss: 0.9581870436668396


training:   1%|          | 570/65500 [2:34:19<284:35:46, 15.78s/it]

training loss: 0.8397735357284546


training:   1%|          | 571/65500 [2:34:35<284:35:43, 15.78s/it]

training loss: 0.9637875556945801


training:   1%|          | 572/65500 [2:34:51<284:35:46, 15.78s/it]

training loss: 0.7487027049064636


training:   1%|          | 573/65500 [2:35:07<284:29:13, 15.77s/it]

training loss: 1.0733262300491333


training:   1%|          | 574/65500 [2:35:22<284:23:49, 15.77s/it]

training loss: 0.81554114818573


training:   1%|          | 575/65500 [2:35:38<284:18:51, 15.76s/it]

training loss: 0.5134417414665222


training:   1%|          | 576/65500 [2:35:54<284:16:50, 15.76s/it]

training loss: 0.8372771739959717


training:   1%|          | 577/65500 [2:36:10<284:17:52, 15.76s/it]

training loss: 0.5280245542526245


training:   1%|          | 578/65500 [2:36:25<284:13:58, 15.76s/it]

training loss: 0.7106369137763977


training:   1%|          | 579/65500 [2:36:41<284:12:13, 15.76s/it]

training loss: 0.9603158235549927


training:   1%|          | 580/65500 [2:36:57<284:13:39, 15.76s/it]

training loss: 0.7920867204666138


training:   1%|          | 581/65500 [2:37:13<284:09:43, 15.76s/it]

training loss: 0.6098560094833374


training:   1%|          | 582/65500 [2:37:28<284:07:48, 15.76s/it]

training loss: 0.878204345703125


training:   1%|          | 583/65500 [2:37:44<284:06:31, 15.76s/it]

training loss: 0.8775507211685181


training:   1%|          | 584/65500 [2:38:00<284:10:44, 15.76s/it]

training loss: 1.2650867700576782


training:   1%|          | 585/65500 [2:38:16<284:07:37, 15.76s/it]

training loss: 0.7250492572784424


training:   1%|          | 586/65500 [2:38:32<284:06:49, 15.76s/it]

training loss: 0.8457508087158203


training:   1%|          | 587/65500 [2:38:47<284:05:19, 15.76s/it]

training loss: 0.9353065490722656


training:   1%|          | 588/65500 [2:39:03<284:00:38, 15.75s/it]

training loss: 0.9649872779846191


training:   1%|          | 589/65500 [2:39:19<283:54:20, 15.75s/it]

training loss: 0.8421757221221924


training:   1%|          | 590/65500 [2:39:34<284:00:08, 15.75s/it]

training loss: 0.9419102668762207


training:   1%|          | 591/65500 [2:39:50<284:04:02, 15.76s/it]

training loss: 0.5933607220649719


training:   1%|          | 592/65500 [2:40:06<284:00:47, 15.75s/it]

training loss: 0.39666256308555603


training:   1%|          | 593/65500 [2:40:22<284:02:01, 15.75s/it]

training loss: 0.9684498310089111


training:   1%|          | 594/65500 [2:40:38<284:04:45, 15.76s/it]

training loss: 0.7547817230224609


training:   1%|          | 595/65500 [2:40:53<284:04:26, 15.76s/it]

training loss: 0.7516564130783081


training:   1%|          | 596/65500 [2:41:09<283:58:59, 15.75s/it]

training loss: 1.2063069343566895


training:   1%|          | 597/65500 [2:41:25<283:59:39, 15.75s/it]

training loss: 1.0908620357513428


training:   1%|          | 598/65500 [2:41:41<284:04:45, 15.76s/it]

training loss: 0.8230102062225342


training:   1%|          | 599/65500 [2:41:56<284:03:47, 15.76s/it]

training loss: 0.6929684281349182


training:   1%|          | 600/65500 [2:42:12<284:02:57, 15.76s/it]

training loss: 1.3434231281280518
training loss: 1.1049331426620483


training:   1%|          | 601/65500 [2:42:31<302:14:00, 16.77s/it]

validation loss: 1.69447922706604


training:   1%|          | 602/65500 [2:42:47<296:59:39, 16.47s/it]

training loss: 0.96885085105896


training:   1%|          | 603/65500 [2:43:03<293:02:55, 16.26s/it]

training loss: 1.1935664415359497


training:   1%|          | 604/65500 [2:43:18<290:19:48, 16.11s/it]

training loss: 1.0412359237670898


training:   1%|          | 605/65500 [2:43:34<288:26:24, 16.00s/it]

training loss: 0.8314167857170105


training:   1%|          | 606/65500 [2:43:50<287:07:38, 15.93s/it]

training loss: 1.0299131870269775


training:   1%|          | 607/65500 [2:44:06<286:05:37, 15.87s/it]

training loss: 0.7899872064590454


training:   1%|          | 608/65500 [2:44:21<285:24:58, 15.83s/it]

training loss: 0.9356991052627563


training:   1%|          | 609/65500 [2:44:37<284:58:08, 15.81s/it]

training loss: 0.5314617156982422


training:   1%|          | 610/65500 [2:44:53<284:42:48, 15.80s/it]

training loss: 0.609775185585022


training:   1%|          | 611/65500 [2:45:09<284:28:03, 15.78s/it]

training loss: 0.9080706834793091


training:   1%|          | 612/65500 [2:45:24<284:16:54, 15.77s/it]

training loss: 0.7268385291099548


training:   1%|          | 613/65500 [2:45:40<284:14:16, 15.77s/it]

training loss: 0.9702098369598389


training:   1%|          | 614/65500 [2:45:56<284:18:49, 15.77s/it]

training loss: 0.9376830458641052


training:   1%|          | 615/65500 [2:46:12<284:12:54, 15.77s/it]

training loss: 1.081386685371399


training:   1%|          | 616/65500 [2:46:28<284:01:27, 15.76s/it]

training loss: 0.9852015972137451


training:   1%|          | 617/65500 [2:46:43<284:07:16, 15.76s/it]

training loss: 0.6571261882781982


training:   1%|          | 618/65500 [2:46:59<284:08:38, 15.77s/it]

training loss: 0.63221675157547


training:   1%|          | 619/65500 [2:47:15<284:04:52, 15.76s/it]

training loss: 0.7988125681877136


training:   1%|          | 620/65500 [2:47:31<284:00:18, 15.76s/it]

training loss: 1.0406543016433716


training:   1%|          | 621/65500 [2:47:46<284:02:02, 15.76s/it]

training loss: 0.6165841817855835


training:   1%|          | 622/65500 [2:48:02<284:04:51, 15.76s/it]

training loss: 0.795688271522522


training:   1%|          | 623/65500 [2:48:18<284:35:20, 15.79s/it]

training loss: 0.7674019932746887


training:   1%|          | 624/65500 [2:48:34<284:32:48, 15.79s/it]

training loss: 0.8770947456359863


training:   1%|          | 625/65500 [2:48:50<284:24:23, 15.78s/it]

training loss: 0.52345210313797


training:   1%|          | 626/65500 [2:49:05<284:24:26, 15.78s/it]

training loss: 1.3814845085144043


training:   1%|          | 627/65500 [2:49:21<284:19:07, 15.78s/it]

training loss: 0.5315952897071838


training:   1%|          | 628/65500 [2:49:37<284:19:21, 15.78s/it]

training loss: 0.9687527418136597


training:   1%|          | 629/65500 [2:49:53<284:17:26, 15.78s/it]

training loss: 0.8045299053192139


training:   1%|          | 630/65500 [2:50:08<284:12:08, 15.77s/it]

training loss: 0.6399022936820984


training:   1%|          | 631/65500 [2:50:24<284:11:28, 15.77s/it]

training loss: 0.8047769069671631


training:   1%|          | 632/65500 [2:50:40<284:14:31, 15.77s/it]

training loss: 0.5909863710403442


training:   1%|          | 633/65500 [2:50:56<284:24:03, 15.78s/it]

training loss: 0.9285894632339478


training:   1%|          | 634/65500 [2:51:12<284:18:59, 15.78s/it]

training loss: 0.9231691956520081


training:   1%|          | 635/65500 [2:51:27<284:10:02, 15.77s/it]

training loss: 1.0061310529708862


training:   1%|          | 636/65500 [2:51:43<284:35:53, 15.80s/it]

training loss: 0.8192331194877625


training:   1%|          | 637/65500 [2:51:59<284:59:14, 15.82s/it]

training loss: 0.7963287830352783


training:   1%|          | 638/65500 [2:52:15<285:08:39, 15.83s/it]

training loss: 0.8346580862998962


training:   1%|          | 639/65500 [2:52:31<285:15:40, 15.83s/it]

training loss: 0.6847712397575378


training:   1%|          | 640/65500 [2:52:47<285:16:21, 15.83s/it]

training loss: 0.7627965211868286


training:   1%|          | 641/65500 [2:53:02<285:19:17, 15.84s/it]

training loss: 0.9074292778968811


training:   1%|          | 642/65500 [2:53:18<285:21:18, 15.84s/it]

training loss: 0.6827895045280457


training:   1%|          | 643/65500 [2:53:34<285:26:41, 15.84s/it]

training loss: 0.805435061454773


training:   1%|          | 644/65500 [2:53:50<285:23:10, 15.84s/it]

training loss: 0.8566769957542419


training:   1%|          | 645/65500 [2:54:06<285:17:13, 15.84s/it]

training loss: 1.0738263130187988


training:   1%|          | 646/65500 [2:54:22<285:11:23, 15.83s/it]

training loss: 0.6726843118667603


training:   1%|          | 647/65500 [2:54:37<285:05:51, 15.83s/it]

training loss: 0.7424306273460388


training:   1%|          | 648/65500 [2:54:53<284:55:01, 15.82s/it]

training loss: 0.8491001129150391


training:   1%|          | 649/65500 [2:55:09<284:41:34, 15.80s/it]

training loss: 0.9841517806053162


training:   1%|          | 650/65500 [2:55:25<284:23:40, 15.79s/it]

training loss: 0.5470405220985413


training:   1%|          | 651/65500 [2:55:40<284:20:56, 15.79s/it]

training loss: 1.0328872203826904


training:   1%|          | 652/65500 [2:55:56<284:15:23, 15.78s/it]

training loss: 0.8890752792358398


training:   1%|          | 653/65500 [2:56:12<284:13:05, 15.78s/it]

training loss: 0.7785936594009399


training:   1%|          | 654/65500 [2:56:28<284:03:50, 15.77s/it]

training loss: 0.6426565647125244


training:   1%|          | 655/65500 [2:56:43<283:52:04, 15.76s/it]

training loss: 0.9835614562034607


training:   1%|          | 656/65500 [2:56:59<283:50:24, 15.76s/it]

training loss: 1.1984386444091797


training:   1%|          | 657/65500 [2:57:15<283:45:50, 15.75s/it]

training loss: 0.7928252220153809


training:   1%|          | 658/65500 [2:57:31<283:44:47, 15.75s/it]

training loss: 0.7679529190063477


training:   1%|          | 659/65500 [2:57:46<283:44:37, 15.75s/it]

training loss: 0.8652395606040955


training:   1%|          | 660/65500 [2:58:02<283:45:09, 15.75s/it]

training loss: 0.7845686078071594


training:   1%|          | 661/65500 [2:58:18<283:42:11, 15.75s/it]

training loss: 0.8216301202774048


training:   1%|          | 662/65500 [2:58:34<283:46:08, 15.76s/it]

training loss: 0.8533821702003479


training:   1%|          | 663/65500 [2:58:50<283:47:22, 15.76s/it]

training loss: 0.7682791352272034


training:   1%|          | 664/65500 [2:59:05<283:44:33, 15.75s/it]

training loss: 0.6920124292373657


training:   1%|          | 665/65500 [2:59:21<283:47:00, 15.76s/it]

training loss: 0.9718763828277588


training:   1%|          | 666/65500 [2:59:37<283:49:56, 15.76s/it]

training loss: 1.1185851097106934


training:   1%|          | 667/65500 [2:59:53<283:52:50, 15.76s/it]

training loss: 0.7920571565628052


training:   1%|          | 668/65500 [3:00:08<283:46:19, 15.76s/it]

training loss: 0.7576863169670105


training:   1%|          | 669/65500 [3:00:24<283:49:14, 15.76s/it]

training loss: 0.8585947751998901


training:   1%|          | 670/65500 [3:00:40<283:56:11, 15.77s/it]

training loss: 0.7970066666603088


training:   1%|          | 671/65500 [3:00:56<283:57:58, 15.77s/it]

training loss: 1.0603992938995361


training:   1%|          | 672/65500 [3:01:11<283:57:36, 15.77s/it]

training loss: 0.8878788948059082


training:   1%|          | 673/65500 [3:01:27<283:52:18, 15.76s/it]

training loss: 0.6505944728851318


training:   1%|          | 674/65500 [3:01:43<283:54:39, 15.77s/it]

training loss: 1.0920497179031372


training:   1%|          | 675/65500 [3:01:59<283:55:22, 15.77s/it]

training loss: 1.0437992811203003


training:   1%|          | 676/65500 [3:02:14<283:51:57, 15.76s/it]

training loss: 0.5478330850601196


training:   1%|          | 677/65500 [3:02:30<283:50:57, 15.76s/it]

training loss: 0.9014827609062195


training:   1%|          | 678/65500 [3:02:46<283:49:15, 15.76s/it]

training loss: 1.0614463090896606


training:   1%|          | 679/65500 [3:03:02<283:55:24, 15.77s/it]

training loss: 1.0843807458877563


training:   1%|          | 680/65500 [3:03:18<283:53:15, 15.77s/it]

training loss: 0.886328399181366


training:   1%|          | 681/65500 [3:03:33<283:52:36, 15.77s/it]

training loss: 0.7496665120124817


training:   1%|          | 682/65500 [3:03:49<283:54:28, 15.77s/it]

training loss: 0.6279187202453613


training:   1%|          | 683/65500 [3:04:05<283:53:56, 15.77s/it]

training loss: 0.9988500475883484


training:   1%|          | 684/65500 [3:04:21<283:54:15, 15.77s/it]

training loss: 0.8963937759399414


training:   1%|          | 685/65500 [3:04:36<283:56:29, 15.77s/it]

training loss: 0.5237122774124146


training:   1%|          | 686/65500 [3:04:52<284:00:47, 15.78s/it]

training loss: 0.815545916557312


training:   1%|          | 687/65500 [3:05:08<283:56:33, 15.77s/it]

training loss: 0.7356065511703491


training:   1%|          | 688/65500 [3:05:24<283:49:55, 15.77s/it]

training loss: 1.0718293190002441


training:   1%|          | 689/65500 [3:05:39<283:54:59, 15.77s/it]

training loss: 0.9722222089767456


training:   1%|          | 690/65500 [3:05:55<283:58:35, 15.77s/it]

training loss: 0.9465560913085938


training:   1%|          | 691/65500 [3:06:11<283:52:48, 15.77s/it]

training loss: 0.8591590523719788


training:   1%|          | 692/65500 [3:06:27<283:46:15, 15.76s/it]

training loss: 0.9835188388824463


training:   1%|          | 693/65500 [3:06:42<283:43:46, 15.76s/it]

training loss: 0.7072097659111023


training:   1%|          | 694/65500 [3:06:58<283:40:59, 15.76s/it]

training loss: 0.8876402378082275


training:   1%|          | 695/65500 [3:07:14<283:42:13, 15.76s/it]

training loss: 0.49169498682022095


training:   1%|          | 696/65500 [3:07:30<283:47:01, 15.76s/it]

training loss: 0.7097054719924927


training:   1%|          | 697/65500 [3:07:46<283:46:46, 15.76s/it]

training loss: 0.9955111742019653


training:   1%|          | 698/65500 [3:08:01<283:44:03, 15.76s/it]

training loss: 0.8171355724334717


training:   1%|          | 699/65500 [3:08:17<283:37:55, 15.76s/it]

training loss: 0.8295615911483765


training:   1%|          | 700/65500 [3:08:33<283:41:09, 15.76s/it]

training loss: 0.7684272527694702
training loss: 0.7488688826560974


training:   1%|          | 701/65500 [3:08:50<291:36:28, 16.20s/it]

validation loss: 1.2613533735275269


training:   1%|          | 702/65500 [3:09:06<289:20:14, 16.07s/it]

training loss: 0.7151516675949097


training:   1%|          | 703/65500 [3:09:22<287:42:43, 15.98s/it]

training loss: 0.7847872972488403


training:   1%|          | 704/65500 [3:09:37<286:28:38, 15.92s/it]

training loss: 0.567825973033905


training:   1%|          | 705/65500 [3:09:53<285:38:23, 15.87s/it]

training loss: 1.003709316253662


training:   1%|          | 706/65500 [3:10:09<285:02:12, 15.84s/it]

training loss: 0.5646656155586243


training:   1%|          | 707/65500 [3:10:25<284:37:05, 15.81s/it]

training loss: 0.9310468435287476


training:   1%|          | 708/65500 [3:10:40<284:16:25, 15.79s/it]

training loss: 1.1205449104309082


training:   1%|          | 709/65500 [3:10:56<284:06:32, 15.79s/it]

training loss: 0.714292585849762


training:   1%|          | 710/65500 [3:11:12<283:53:05, 15.77s/it]

training loss: 0.8707948327064514


training:   1%|          | 711/65500 [3:11:28<283:43:10, 15.76s/it]

training loss: 0.9946954250335693


training:   1%|          | 712/65500 [3:11:43<283:42:09, 15.76s/it]

training loss: 0.6807698607444763


training:   1%|          | 713/65500 [3:11:59<283:35:45, 15.76s/it]

training loss: 0.8871642351150513


training:   1%|          | 714/65500 [3:12:15<283:33:20, 15.76s/it]

training loss: 1.1300278902053833


training:   1%|          | 715/65500 [3:12:31<283:36:28, 15.76s/it]

training loss: 0.9105826020240784


training:   1%|          | 716/65500 [3:12:46<283:40:39, 15.76s/it]

training loss: 0.9272434711456299


training:   1%|          | 717/65500 [3:13:02<283:36:14, 15.76s/it]

training loss: 1.1069287061691284


training:   1%|          | 718/65500 [3:13:18<283:27:12, 15.75s/it]

training loss: 0.5509057641029358


training:   1%|          | 719/65500 [3:13:34<283:27:32, 15.75s/it]

training loss: 0.8620068430900574


training:   1%|          | 720/65500 [3:13:49<283:26:04, 15.75s/it]

training loss: 0.7796417474746704


training:   1%|          | 721/65500 [3:14:05<283:25:06, 15.75s/it]

training loss: 0.7078711986541748


training:   1%|          | 722/65500 [3:14:21<283:19:59, 15.75s/it]

training loss: 0.715716540813446


training:   1%|          | 723/65500 [3:14:37<283:27:08, 15.75s/it]

training loss: 0.8049266338348389


training:   1%|          | 724/65500 [3:14:52<283:27:08, 15.75s/it]

training loss: 0.8112878799438477


training:   1%|          | 725/65500 [3:15:08<283:27:35, 15.75s/it]

training loss: 0.7951598167419434


training:   1%|          | 726/65500 [3:15:24<283:24:38, 15.75s/it]

training loss: 0.8469310998916626


training:   1%|          | 727/65500 [3:15:40<283:29:18, 15.76s/it]

training loss: 0.9716342687606812


training:   1%|          | 728/65500 [3:15:55<283:33:03, 15.76s/it]

training loss: 0.9429106712341309


training:   1%|          | 729/65500 [3:16:11<283:30:25, 15.76s/it]

training loss: 0.8016165494918823


training:   1%|          | 730/65500 [3:16:27<283:27:48, 15.76s/it]

training loss: 0.5833659172058105


training:   1%|          | 731/65500 [3:16:43<283:29:42, 15.76s/it]

training loss: 1.1275578737258911


training:   1%|          | 732/65500 [3:16:59<283:30:49, 15.76s/it]

training loss: 0.7438128590583801


training:   1%|          | 733/65500 [3:17:14<283:27:17, 15.76s/it]

training loss: 1.303179383277893


training:   1%|          | 734/65500 [3:17:30<283:28:16, 15.76s/it]

training loss: 0.8077672719955444


training:   1%|          | 735/65500 [3:17:46<283:29:23, 15.76s/it]

training loss: 0.755760908126831


training:   1%|          | 736/65500 [3:18:02<283:28:49, 15.76s/it]

training loss: 1.084527611732483


training:   1%|          | 737/65500 [3:18:17<283:27:01, 15.76s/it]

training loss: 0.785725474357605


training:   1%|          | 738/65500 [3:18:33<283:30:03, 15.76s/it]

training loss: 0.9884140491485596


training:   1%|          | 739/65500 [3:18:49<283:29:06, 15.76s/it]

training loss: 0.7957345843315125


training:   1%|          | 740/65500 [3:19:05<284:15:06, 15.80s/it]

training loss: 0.7532045841217041


training:   1%|          | 741/65500 [3:19:20<284:09:03, 15.80s/it]

training loss: 0.46200209856033325


training:   1%|          | 742/65500 [3:19:36<283:51:09, 15.78s/it]

training loss: 0.5870469808578491


training:   1%|          | 743/65500 [3:19:52<283:56:34, 15.79s/it]

training loss: 1.054793357849121


training:   1%|          | 744/65500 [3:20:08<283:48:19, 15.78s/it]

training loss: 0.9218747615814209


training:   1%|          | 745/65500 [3:20:24<283:43:25, 15.77s/it]

training loss: 0.4845614433288574


training:   1%|          | 746/65500 [3:20:39<283:40:29, 15.77s/it]

training loss: 0.8719690442085266


training:   1%|          | 747/65500 [3:20:55<283:45:09, 15.78s/it]

training loss: 0.9709398746490479


training:   1%|          | 748/65500 [3:21:11<283:48:38, 15.78s/it]

training loss: 0.5339460372924805


training:   1%|          | 749/65500 [3:21:27<283:51:50, 15.78s/it]

training loss: 1.048095941543579


training:   1%|          | 750/65500 [3:21:42<283:51:14, 15.78s/it]

training loss: 0.8516954779624939


training:   1%|          | 751/65500 [3:21:58<284:02:47, 15.79s/it]

training loss: 0.7073639035224915


training:   1%|          | 752/65500 [3:22:14<284:17:23, 15.81s/it]

training loss: 0.5682811737060547


training:   1%|          | 753/65500 [3:22:30<284:28:46, 15.82s/it]

training loss: 0.7256554961204529


training:   1%|          | 754/65500 [3:22:46<284:33:17, 15.82s/it]

training loss: 0.9339467883110046


training:   1%|          | 755/65500 [3:23:02<284:41:55, 15.83s/it]

training loss: 1.0399041175842285


training:   1%|          | 756/65500 [3:23:17<284:39:47, 15.83s/it]

training loss: 0.8950411081314087


training:   1%|          | 757/65500 [3:23:33<284:41:40, 15.83s/it]

training loss: 0.9204673767089844


training:   1%|          | 758/65500 [3:23:49<284:38:09, 15.83s/it]

training loss: 0.7068789005279541


training:   1%|          | 759/65500 [3:24:05<284:37:29, 15.83s/it]

training loss: 0.7569926977157593


training:   1%|          | 760/65500 [3:24:21<284:36:50, 15.83s/it]

training loss: 1.2079397439956665


training:   1%|          | 761/65500 [3:24:37<284:28:26, 15.82s/it]

training loss: 0.8308473229408264


training:   1%|          | 762/65500 [3:24:52<284:20:54, 15.81s/it]

training loss: 0.6275692582130432


training:   1%|          | 763/65500 [3:25:08<284:56:32, 15.85s/it]

training loss: 1.130860447883606


training:   1%|          | 764/65500 [3:25:24<284:27:02, 15.82s/it]

training loss: 0.5279006958007812


training:   1%|          | 765/65500 [3:25:40<284:09:55, 15.80s/it]

training loss: 0.5976627469062805


training:   1%|          | 766/65500 [3:25:56<284:06:52, 15.80s/it]

training loss: 0.46053141355514526


training:   1%|          | 767/65500 [3:26:11<283:57:05, 15.79s/it]

training loss: 0.5972731113433838


training:   1%|          | 768/65500 [3:26:27<283:52:00, 15.79s/it]

training loss: 0.8637377619743347


training:   1%|          | 769/65500 [3:26:43<283:49:12, 15.78s/it]

training loss: 1.0636951923370361


training:   1%|          | 770/65500 [3:26:59<283:45:40, 15.78s/it]

training loss: 1.262609601020813


training:   1%|          | 771/65500 [3:27:14<283:43:29, 15.78s/it]

training loss: 0.5086442828178406


training:   1%|          | 772/65500 [3:27:30<283:45:58, 15.78s/it]

training loss: 1.1056586503982544


training:   1%|          | 773/65500 [3:27:46<283:40:27, 15.78s/it]

training loss: 0.6115370988845825


training:   1%|          | 774/65500 [3:28:02<283:38:52, 15.78s/it]

training loss: 0.4364941120147705


training:   1%|          | 775/65500 [3:28:18<283:32:43, 15.77s/it]

training loss: 1.2133831977844238


training:   1%|          | 776/65500 [3:28:33<283:33:28, 15.77s/it]

training loss: 0.862108051776886


training:   1%|          | 777/65500 [3:28:49<283:36:01, 15.77s/it]

training loss: 0.7705295085906982


training:   1%|          | 778/65500 [3:29:05<283:36:40, 15.78s/it]

training loss: 1.121772289276123


training:   1%|          | 779/65500 [3:29:21<283:38:21, 15.78s/it]

training loss: 0.95341956615448


training:   1%|          | 780/65500 [3:29:36<283:42:35, 15.78s/it]

training loss: 0.7733389735221863


training:   1%|          | 781/65500 [3:29:52<283:42:30, 15.78s/it]

training loss: 0.8848116993904114


training:   1%|          | 782/65500 [3:30:08<283:40:31, 15.78s/it]

training loss: 0.8049226999282837


training:   1%|          | 783/65500 [3:30:24<283:38:25, 15.78s/it]

training loss: 0.7585961818695068


training:   1%|          | 784/65500 [3:30:40<283:38:03, 15.78s/it]

training loss: 0.8303996920585632


training:   1%|          | 785/65500 [3:30:55<283:39:17, 15.78s/it]

training loss: 0.8854525685310364


training:   1%|          | 786/65500 [3:31:11<283:36:07, 15.78s/it]

training loss: 1.0669184923171997


training:   1%|          | 787/65500 [3:31:27<283:29:25, 15.77s/it]

training loss: 0.6860346794128418


training:   1%|          | 788/65500 [3:31:43<283:28:23, 15.77s/it]

training loss: 0.8789312839508057


training:   1%|          | 789/65500 [3:31:58<283:31:44, 15.77s/it]

training loss: 1.1548409461975098


training:   1%|          | 790/65500 [3:32:14<283:32:33, 15.77s/it]

training loss: 0.514480471611023


training:   1%|          | 791/65500 [3:32:30<283:30:12, 15.77s/it]

training loss: 0.941420316696167


training:   1%|          | 792/65500 [3:32:46<283:30:12, 15.77s/it]

training loss: 0.7881317734718323


training:   1%|          | 793/65500 [3:33:02<283:26:23, 15.77s/it]

training loss: 1.0025006532669067


training:   1%|          | 794/65500 [3:33:17<283:28:59, 15.77s/it]

training loss: 0.8871399164199829


training:   1%|          | 795/65500 [3:33:33<283:34:54, 15.78s/it]

training loss: 0.679455041885376


training:   1%|          | 796/65500 [3:33:49<283:37:46, 15.78s/it]

training loss: 0.9503212571144104


training:   1%|          | 797/65500 [3:34:05<283:33:23, 15.78s/it]

training loss: 0.8203193545341492


training:   1%|          | 798/65500 [3:34:20<283:27:54, 15.77s/it]

training loss: 0.8293050527572632


training:   1%|          | 799/65500 [3:34:36<283:32:00, 15.78s/it]

training loss: 0.845874547958374


training:   1%|          | 800/65500 [3:34:52<283:32:45, 15.78s/it]

training loss: 0.8194858431816101
training loss: 0.8699700236320496


training:   1%|          | 801/65500 [3:35:09<290:43:32, 16.18s/it]

validation loss: 1.471663475036621


training:   1%|          | 802/65500 [3:35:25<288:39:34, 16.06s/it]

training loss: 0.8253109455108643


training:   1%|          | 803/65500 [3:35:41<287:04:51, 15.97s/it]

training loss: 0.5496758222579956


training:   1%|          | 804/65500 [3:35:56<285:52:50, 15.91s/it]

training loss: 0.6934919357299805


training:   1%|          | 805/65500 [3:36:12<285:08:57, 15.87s/it]

training loss: 0.7495436072349548


training:   1%|          | 806/65500 [3:36:28<284:31:21, 15.83s/it]

training loss: 0.6479815244674683


training:   1%|          | 807/65500 [3:36:44<284:12:37, 15.82s/it]

training loss: 1.0589920282363892


training:   1%|          | 808/65500 [3:36:59<284:01:19, 15.81s/it]

training loss: 1.3096836805343628


training:   1%|          | 809/65500 [3:37:15<283:47:52, 15.79s/it]

training loss: 0.6093974709510803


training:   1%|          | 810/65500 [3:37:31<283:39:10, 15.79s/it]

training loss: 0.9460713267326355


training:   1%|          | 811/65500 [3:37:47<283:37:44, 15.78s/it]

training loss: 0.8952528834342957


training:   1%|          | 812/65500 [3:38:03<283:31:34, 15.78s/it]

training loss: 0.9653241634368896


training:   1%|          | 813/65500 [3:38:18<283:21:36, 15.77s/it]

training loss: 0.6517365574836731


training:   1%|          | 814/65500 [3:38:34<283:29:30, 15.78s/it]

training loss: 0.8054225444793701


training:   1%|          | 815/65500 [3:38:50<283:28:19, 15.78s/it]

training loss: 0.8686715364456177


training:   1%|          | 816/65500 [3:39:06<283:19:13, 15.77s/it]

training loss: 0.8840305805206299


training:   1%|          | 817/65500 [3:39:21<283:18:49, 15.77s/it]

training loss: 0.9948339462280273


training:   1%|          | 818/65500 [3:39:37<283:19:21, 15.77s/it]

training loss: 0.8820252418518066


training:   1%|▏         | 819/65500 [3:39:53<283:19:59, 15.77s/it]

training loss: 0.7267049551010132


training:   1%|▏         | 820/65500 [3:40:09<283:15:30, 15.77s/it]

training loss: 0.9079561829566956


training:   1%|▏         | 821/65500 [3:40:24<283:09:17, 15.76s/it]

training loss: 0.6817617416381836


training:   1%|▏         | 822/65500 [3:40:40<283:12:59, 15.76s/it]

training loss: 0.9717206358909607


training:   1%|▏         | 823/65500 [3:40:56<283:21:05, 15.77s/it]

training loss: 0.7356489896774292


training:   1%|▏         | 824/65500 [3:41:12<283:20:07, 15.77s/it]

training loss: 0.779064416885376


training:   1%|▏         | 825/65500 [3:41:28<283:09:37, 15.76s/it]

training loss: 1.1797116994857788


training:   1%|▏         | 826/65500 [3:41:43<283:09:19, 15.76s/it]

training loss: 0.8111947774887085


training:   1%|▏         | 827/65500 [3:41:59<283:03:09, 15.76s/it]

training loss: 0.5394078493118286


training:   1%|▏         | 828/65500 [3:42:15<282:57:02, 15.75s/it]

training loss: 0.9703657031059265


training:   1%|▏         | 829/65500 [3:42:30<282:52:28, 15.75s/it]

training loss: 0.8761025071144104


training:   1%|▏         | 830/65500 [3:42:46<282:53:20, 15.75s/it]

training loss: 0.8307109475135803


training:   1%|▏         | 831/65500 [3:43:02<282:51:48, 15.75s/it]

training loss: 1.00575590133667


training:   1%|▏         | 832/65500 [3:43:18<282:51:37, 15.75s/it]

training loss: 0.6839961409568787


training:   1%|▏         | 833/65500 [3:43:33<282:55:37, 15.75s/it]

training loss: 0.8685579895973206


training:   1%|▏         | 834/65500 [3:43:49<282:52:45, 15.75s/it]

training loss: 1.0745049715042114


training:   1%|▏         | 835/65500 [3:44:05<282:50:25, 15.75s/it]

training loss: 0.810751736164093


training:   1%|▏         | 836/65500 [3:44:21<282:47:31, 15.74s/it]

training loss: 0.6985772848129272


training:   1%|▏         | 837/65500 [3:44:36<282:53:18, 15.75s/it]

training loss: 1.199849247932434


training:   1%|▏         | 838/65500 [3:44:52<282:55:18, 15.75s/it]

training loss: 0.9723541736602783


training:   1%|▏         | 839/65500 [3:45:08<282:52:34, 15.75s/it]

training loss: 0.998680591583252


training:   1%|▏         | 840/65500 [3:45:24<282:56:26, 15.75s/it]

training loss: 0.7104476094245911


training:   1%|▏         | 841/65500 [3:45:39<282:54:34, 15.75s/it]

training loss: 1.0589449405670166


training:   1%|▏         | 842/65500 [3:45:55<282:53:11, 15.75s/it]

training loss: 0.9801089763641357


training:   1%|▏         | 843/65500 [3:46:11<282:52:57, 15.75s/it]

training loss: 0.8512382507324219


training:   1%|▏         | 844/65500 [3:46:27<282:50:08, 15.75s/it]

training loss: 0.7512078881263733


training:   1%|▏         | 845/65500 [3:46:42<282:56:14, 15.75s/it]

training loss: 0.7142176032066345


training:   1%|▏         | 846/65500 [3:46:58<282:56:15, 15.75s/it]

training loss: 0.7550111413002014


training:   1%|▏         | 847/65500 [3:47:14<282:57:17, 15.76s/it]

training loss: 1.1139438152313232


training:   1%|▏         | 848/65500 [3:47:30<282:53:57, 15.75s/it]

training loss: 0.6989647746086121


training:   1%|▏         | 849/65500 [3:47:46<282:54:18, 15.75s/it]

training loss: 0.9054690599441528


training:   1%|▏         | 850/65500 [3:48:01<282:54:19, 15.75s/it]

training loss: 1.0247479677200317


training:   1%|▏         | 851/65500 [3:48:17<282:57:54, 15.76s/it]

training loss: 0.9297864437103271


training:   1%|▏         | 852/65500 [3:48:33<282:58:28, 15.76s/it]

training loss: 0.6934409141540527


training:   1%|▏         | 853/65500 [3:48:49<283:04:53, 15.76s/it]

training loss: 1.1301939487457275


training:   1%|▏         | 854/65500 [3:49:04<283:10:47, 15.77s/it]

training loss: 0.9315884709358215


training:   1%|▏         | 855/65500 [3:49:20<283:08:02, 15.77s/it]

training loss: 0.7537761330604553


training:   1%|▏         | 856/65500 [3:49:36<283:05:52, 15.77s/it]

training loss: 0.7797839045524597


training:   1%|▏         | 857/65500 [3:49:52<283:06:03, 15.77s/it]

training loss: 0.8369061946868896


training:   1%|▏         | 858/65500 [3:50:07<283:05:49, 15.77s/it]

training loss: 0.6933677792549133


training:   1%|▏         | 859/65500 [3:50:23<283:04:22, 15.76s/it]

training loss: 0.7138178944587708


training:   1%|▏         | 860/65500 [3:50:39<283:06:48, 15.77s/it]

training loss: 0.9376851320266724


training:   1%|▏         | 861/65500 [3:50:55<283:06:09, 15.77s/it]

training loss: 0.9177574515342712


training:   1%|▏         | 862/65500 [3:51:10<283:02:32, 15.76s/it]

training loss: 0.7738640904426575


training:   1%|▏         | 863/65500 [3:51:26<282:59:01, 15.76s/it]

training loss: 1.2634450197219849


training:   1%|▏         | 864/65500 [3:51:42<283:02:55, 15.76s/it]

training loss: 1.172127366065979


training:   1%|▏         | 865/65500 [3:51:58<282:58:36, 15.76s/it]

training loss: 1.0314407348632812


training:   1%|▏         | 866/65500 [3:52:14<282:58:51, 15.76s/it]

training loss: 0.6506966352462769


training:   1%|▏         | 867/65500 [3:52:29<283:02:14, 15.76s/it]

training loss: 1.0115742683410645


training:   1%|▏         | 868/65500 [3:52:45<283:21:00, 15.78s/it]

training loss: 0.8147713541984558


training:   1%|▏         | 869/65500 [3:53:01<283:35:19, 15.80s/it]

training loss: 0.7840943932533264


training:   1%|▏         | 870/65500 [3:53:17<283:36:05, 15.80s/it]

training loss: 0.7593107223510742


training:   1%|▏         | 871/65500 [3:53:33<283:37:52, 15.80s/it]

training loss: 0.7598252892494202


training:   1%|▏         | 872/65500 [3:53:48<283:41:28, 15.80s/it]

training loss: 1.1093696355819702


training:   1%|▏         | 873/65500 [3:54:04<283:48:38, 15.81s/it]

training loss: 0.8078358769416809


training:   1%|▏         | 874/65500 [3:54:20<283:41:38, 15.80s/it]

training loss: 0.6079700589179993


training:   1%|▏         | 875/65500 [3:54:36<283:39:07, 15.80s/it]

training loss: 0.9458684921264648


training:   1%|▏         | 876/65500 [3:54:52<283:36:57, 15.80s/it]

training loss: 0.9535532593727112


training:   1%|▏         | 877/65500 [3:55:07<283:27:39, 15.79s/it]

training loss: 0.9466506242752075


training:   1%|▏         | 878/65500 [3:55:23<283:26:25, 15.79s/it]

training loss: 1.282296895980835


training:   1%|▏         | 879/65500 [3:55:39<283:35:25, 15.80s/it]

training loss: 0.8534752130508423


training:   1%|▏         | 880/65500 [3:55:55<283:32:15, 15.80s/it]

training loss: 1.04000985622406


training:   1%|▏         | 881/65500 [3:56:10<283:20:33, 15.79s/it]

training loss: 1.2013869285583496


training:   1%|▏         | 882/65500 [3:56:26<283:11:14, 15.78s/it]

training loss: 0.7583237886428833


training:   1%|▏         | 883/65500 [3:56:42<283:10:09, 15.78s/it]

training loss: 0.9087058901786804


training:   1%|▏         | 884/65500 [3:56:58<283:11:09, 15.78s/it]

training loss: 0.9834616780281067


training:   1%|▏         | 885/65500 [3:57:14<283:03:15, 15.77s/it]

training loss: 0.85273277759552


training:   1%|▏         | 886/65500 [3:57:29<282:59:20, 15.77s/it]

training loss: 0.8822280764579773


training:   1%|▏         | 887/65500 [3:57:45<282:57:59, 15.77s/it]

training loss: 0.7165919542312622


training:   1%|▏         | 888/65500 [3:58:01<282:52:55, 15.76s/it]

training loss: 0.9011011719703674


training:   1%|▏         | 889/65500 [3:58:17<282:49:32, 15.76s/it]

training loss: 0.6380783319473267


training:   1%|▏         | 890/65500 [3:58:32<282:45:24, 15.75s/it]

training loss: 0.9438468217849731


training:   1%|▏         | 891/65500 [3:58:48<282:48:12, 15.76s/it]

training loss: 0.9041078686714172


training:   1%|▏         | 892/65500 [3:59:04<282:56:11, 15.77s/it]

training loss: 0.8567011952400208


training:   1%|▏         | 893/65500 [3:59:20<282:58:38, 15.77s/it]

training loss: 1.4165356159210205


training:   1%|▏         | 894/65500 [3:59:35<282:56:10, 15.77s/it]

training loss: 0.9508107900619507


training:   1%|▏         | 895/65500 [3:59:51<282:55:04, 15.77s/it]

training loss: 0.8547699451446533


training:   1%|▏         | 896/65500 [4:00:07<282:51:55, 15.76s/it]

training loss: 0.7422819137573242


training:   1%|▏         | 897/65500 [4:00:23<282:58:09, 15.77s/it]

training loss: 1.058370590209961


training:   1%|▏         | 898/65500 [4:00:38<283:00:46, 15.77s/it]

training loss: 0.7570009231567383


training:   1%|▏         | 899/65500 [4:00:54<283:04:39, 15.77s/it]

training loss: 1.084533929824829


training:   1%|▏         | 900/65500 [4:01:10<282:54:42, 15.77s/it]

training loss: 1.0831252336502075
training loss: 1.024707555770874


training:   1%|▏         | 901/65500 [4:01:27<291:19:20, 16.23s/it]

validation loss: 1.3862013816833496


training:   1%|▏         | 902/65500 [4:01:43<288:44:10, 16.09s/it]

training loss: 0.9135384559631348


training:   1%|▏         | 903/65500 [4:01:59<286:54:55, 15.99s/it]

training loss: 1.0333075523376465


training:   1%|▏         | 904/65500 [4:02:15<285:32:44, 15.91s/it]

training loss: 1.0817173719406128


training:   1%|▏         | 905/65500 [4:02:30<284:40:56, 15.87s/it]

training loss: 0.7874207496643066


training:   1%|▏         | 906/65500 [4:02:46<284:01:39, 15.83s/it]

training loss: 0.7972601652145386


training:   1%|▏         | 907/65500 [4:03:02<283:44:58, 15.81s/it]

training loss: 0.8820160031318665


training:   1%|▏         | 908/65500 [4:03:18<283:28:26, 15.80s/it]

training loss: 1.0961024761199951


training:   1%|▏         | 909/65500 [4:03:33<283:18:01, 15.79s/it]

training loss: 1.100743055343628


training:   1%|▏         | 910/65500 [4:03:49<283:07:10, 15.78s/it]

training loss: 0.7319618463516235


training:   1%|▏         | 911/65500 [4:04:05<283:31:21, 15.80s/it]

training loss: 1.255927324295044


training:   1%|▏         | 912/65500 [4:04:21<283:12:42, 15.79s/it]

training loss: 0.6932765245437622


training:   1%|▏         | 913/65500 [4:04:37<283:06:34, 15.78s/it]

training loss: 0.8437686562538147


training:   1%|▏         | 914/65500 [4:04:52<283:04:08, 15.78s/it]

training loss: 0.6560901403427124


training:   1%|▏         | 915/65500 [4:05:08<282:55:50, 15.77s/it]

training loss: 0.9350329041481018


training:   1%|▏         | 916/65500 [4:05:24<282:51:44, 15.77s/it]

training loss: 0.6553526520729065


training:   1%|▏         | 917/65500 [4:05:40<282:48:35, 15.76s/it]

training loss: 0.7414374947547913


training:   1%|▏         | 918/65500 [4:05:55<282:41:36, 15.76s/it]

training loss: 1.0598405599594116


training:   1%|▏         | 919/65500 [4:06:11<282:43:50, 15.76s/it]

training loss: 0.6820111870765686


training:   1%|▏         | 920/65500 [4:06:27<282:43:43, 15.76s/it]

training loss: 1.0546214580535889


training:   1%|▏         | 921/65500 [4:06:43<282:47:21, 15.76s/it]

training loss: 0.8918265104293823


training:   1%|▏         | 922/65500 [4:06:58<282:47:21, 15.76s/it]

training loss: 0.7090115547180176


training:   1%|▏         | 923/65500 [4:07:14<282:44:06, 15.76s/it]

training loss: 0.7983172535896301


training:   1%|▏         | 924/65500 [4:07:30<282:43:44, 15.76s/it]

training loss: 0.9842010736465454


training:   1%|▏         | 925/65500 [4:07:46<282:40:54, 15.76s/it]

training loss: 0.9960330128669739


training:   1%|▏         | 926/65500 [4:08:01<282:39:57, 15.76s/it]

training loss: 0.4521770179271698


training:   1%|▏         | 927/65500 [4:08:17<282:30:02, 15.75s/it]

training loss: 0.8101765513420105


training:   1%|▏         | 928/65500 [4:08:33<282:35:14, 15.75s/it]

training loss: 1.1895500421524048


training:   1%|▏         | 929/65500 [4:08:49<282:34:04, 15.75s/it]

training loss: 0.9246255159378052


training:   1%|▏         | 930/65500 [4:09:04<282:36:45, 15.76s/it]

training loss: 0.9101454019546509


training:   1%|▏         | 931/65500 [4:09:20<282:32:19, 15.75s/it]

training loss: 0.8426679968833923


training:   1%|▏         | 932/65500 [4:09:36<282:38:31, 15.76s/it]

training loss: 0.7711367011070251


training:   1%|▏         | 933/65500 [4:09:52<282:40:32, 15.76s/it]

training loss: 1.1286296844482422


training:   1%|▏         | 934/65500 [4:10:07<282:33:59, 15.76s/it]

training loss: 0.6787490248680115


training:   1%|▏         | 935/65500 [4:10:23<282:23:38, 15.75s/it]

training loss: 0.6645001173019409


training:   1%|▏         | 936/65500 [4:10:39<282:25:59, 15.75s/it]

training loss: 0.9554290175437927


training:   1%|▏         | 937/65500 [4:10:55<282:21:14, 15.74s/it]

training loss: 1.010990858078003


training:   1%|▏         | 938/65500 [4:11:10<282:15:46, 15.74s/it]

training loss: 0.5806185007095337


training:   1%|▏         | 939/65500 [4:11:26<282:09:21, 15.73s/it]

training loss: 0.7585399746894836


training:   1%|▏         | 940/65500 [4:11:42<282:15:57, 15.74s/it]

training loss: 0.7432151436805725


training:   1%|▏         | 941/65500 [4:11:58<282:17:39, 15.74s/it]

training loss: 0.7156012654304504


training:   1%|▏         | 942/65500 [4:12:13<282:22:30, 15.75s/it]

training loss: 0.49405646324157715


training:   1%|▏         | 943/65500 [4:12:29<282:18:28, 15.74s/it]

training loss: 0.6586920022964478


training:   1%|▏         | 944/65500 [4:12:45<282:21:31, 15.75s/it]

training loss: 0.6359574198722839


training:   1%|▏         | 945/65500 [4:13:01<282:20:24, 15.75s/it]

training loss: 1.1433874368667603


training:   1%|▏         | 946/65500 [4:13:16<282:20:29, 15.75s/it]

training loss: 1.0543899536132812


training:   1%|▏         | 947/65500 [4:13:32<282:21:49, 15.75s/it]

training loss: 0.4233272671699524


training:   1%|▏         | 948/65500 [4:13:48<282:25:07, 15.75s/it]

training loss: 0.6980287432670593


training:   1%|▏         | 949/65500 [4:14:04<282:25:17, 15.75s/it]

training loss: 0.6116546392440796


training:   1%|▏         | 950/65500 [4:14:19<282:19:12, 15.75s/it]

training loss: 0.6695587635040283


training:   1%|▏         | 951/65500 [4:14:35<282:21:56, 15.75s/it]

training loss: 1.0266404151916504


training:   1%|▏         | 952/65500 [4:14:51<282:23:50, 15.75s/it]

training loss: 0.8130803108215332


training:   1%|▏         | 953/65500 [4:15:07<282:22:49, 15.75s/it]

training loss: 0.6753581762313843


training:   1%|▏         | 954/65500 [4:15:22<282:23:38, 15.75s/it]

training loss: 1.1906980276107788


training:   1%|▏         | 955/65500 [4:15:38<282:33:25, 15.76s/it]

training loss: 0.6860350966453552


training:   1%|▏         | 956/65500 [4:15:54<282:27:56, 15.75s/it]

training loss: 0.9136945605278015


training:   1%|▏         | 957/65500 [4:16:10<282:23:16, 15.75s/it]

training loss: 0.618276834487915


training:   1%|▏         | 958/65500 [4:16:25<282:18:00, 15.75s/it]

training loss: 0.7108624577522278


training:   1%|▏         | 959/65500 [4:16:41<282:20:49, 15.75s/it]

training loss: 0.7060859799385071


training:   1%|▏         | 960/65500 [4:16:57<282:26:34, 15.75s/it]

training loss: 0.8750588893890381


training:   1%|▏         | 961/65500 [4:17:13<282:19:38, 15.75s/it]

training loss: 0.7650197744369507


training:   1%|▏         | 962/65500 [4:17:28<282:15:49, 15.74s/it]

training loss: 0.9422554969787598


training:   1%|▏         | 963/65500 [4:17:44<282:16:38, 15.75s/it]

training loss: 0.8496111631393433


training:   1%|▏         | 964/65500 [4:18:00<282:19:28, 15.75s/it]

training loss: 1.0313823223114014


training:   1%|▏         | 965/65500 [4:18:16<282:16:49, 15.75s/it]

training loss: 0.5669721961021423


training:   1%|▏         | 966/65500 [4:18:31<282:16:17, 15.75s/it]

training loss: 1.1352580785751343


training:   1%|▏         | 967/65500 [4:18:47<282:10:44, 15.74s/it]

training loss: 1.1435657739639282


training:   1%|▏         | 968/65500 [4:19:03<282:06:30, 15.74s/it]

training loss: 1.0056004524230957


training:   1%|▏         | 969/65500 [4:19:19<282:03:57, 15.74s/it]

training loss: 1.1068576574325562


training:   1%|▏         | 970/65500 [4:19:34<282:07:29, 15.74s/it]

training loss: 0.9453991055488586


training:   1%|▏         | 971/65500 [4:19:50<282:15:58, 15.75s/it]

training loss: 0.7866339087486267


training:   1%|▏         | 972/65500 [4:20:06<282:17:17, 15.75s/it]

training loss: 0.6457976698875427


training:   1%|▏         | 973/65500 [4:20:22<282:14:07, 15.75s/it]

training loss: 1.201972484588623


training:   1%|▏         | 974/65500 [4:20:37<282:14:36, 15.75s/it]

training loss: 1.1386933326721191


training:   1%|▏         | 975/65500 [4:20:53<282:09:13, 15.74s/it]

training loss: 1.0257837772369385


training:   1%|▏         | 976/65500 [4:21:09<282:06:47, 15.74s/it]

training loss: 0.8183299899101257


training:   1%|▏         | 977/65500 [4:21:24<282:09:57, 15.74s/it]

training loss: 0.5457485318183899


training:   1%|▏         | 978/65500 [4:21:40<282:13:41, 15.75s/it]

training loss: 1.0237421989440918


training:   1%|▏         | 979/65500 [4:21:56<282:38:02, 15.77s/it]

training loss: 0.8682620525360107


training:   1%|▏         | 980/65500 [4:22:12<282:48:57, 15.78s/it]

training loss: 0.9212743043899536


training:   1%|▏         | 981/65500 [4:22:28<282:51:46, 15.78s/it]

training loss: 1.0081851482391357


training:   1%|▏         | 982/65500 [4:22:43<282:56:11, 15.79s/it]

training loss: 0.8429845571517944


training:   2%|▏         | 983/65500 [4:22:59<282:59:37, 15.79s/it]

training loss: 0.6906552910804749


training:   2%|▏         | 984/65500 [4:23:15<282:42:47, 15.78s/it]

training loss: 0.8383677005767822


training:   2%|▏         | 985/65500 [4:23:31<283:39:43, 15.83s/it]

training loss: 0.8328472971916199


training:   2%|▏         | 986/65500 [4:23:47<283:45:50, 15.83s/it]

training loss: 0.5514901876449585


training:   2%|▏         | 987/65500 [4:24:03<283:44:51, 15.83s/it]

training loss: 1.0785799026489258


training:   2%|▏         | 988/65500 [4:24:18<283:42:38, 15.83s/it]

training loss: 0.9581500291824341


training:   2%|▏         | 989/65500 [4:24:34<283:44:00, 15.83s/it]

training loss: 1.1103347539901733


training:   2%|▏         | 990/65500 [4:24:50<283:41:23, 15.83s/it]

training loss: 1.1480387449264526


training:   2%|▏         | 991/65500 [4:25:06<283:24:41, 15.82s/it]

training loss: 1.1538512706756592


training:   2%|▏         | 992/65500 [4:25:22<283:05:12, 15.80s/it]

training loss: 0.9100404381752014


training:   2%|▏         | 993/65500 [4:25:37<282:56:03, 15.79s/it]

training loss: 1.3284387588500977


training:   2%|▏         | 994/65500 [4:25:53<282:45:23, 15.78s/it]

training loss: 0.9337319731712341


training:   2%|▏         | 995/65500 [4:26:09<282:35:24, 15.77s/it]

training loss: 0.7224905490875244


training:   2%|▏         | 996/65500 [4:26:25<282:31:45, 15.77s/it]

training loss: 0.8531695604324341


training:   2%|▏         | 997/65500 [4:26:40<282:31:45, 15.77s/it]

training loss: 0.9649233222007751


training:   2%|▏         | 998/65500 [4:26:56<282:32:35, 15.77s/it]

training loss: 0.875688910484314


training:   2%|▏         | 999/65500 [4:27:12<282:28:08, 15.77s/it]

training loss: 0.7803345918655396


training:   2%|▏         | 1000/65500 [4:27:28<282:24:03, 15.76s/it]

training loss: 0.9620003700256348
training loss: 1.08074951171875



generating:   0%|          | 0/512 [00:00<?, ?it/s][A

validation loss: 1.2277565002441406
y sa jeho podmienky
zacali plnit.
Aj preto pred mesiacom, ked uz jeho stav zacinal byt kriticky,
zmakcil podmienky svojho protestu. S lekarmi sa dohodol, ze denne prijme
dve lyzicky nutricnej vyzivy. Okrem toho dostava infuzie s obsahom
glukozy, mineralov a vitaminov. Podla osetrujucich lekarov vsak kym
nezacne prijimat tuhu stravu, jeho zivot zostane ohrozeny.
Citajte viac
Sencovovi
nemozno udelit milost, pokym o nu nepoziada sam
Urady
popreli zly stav ruskeho rezisera Sencova
Ukrajinsky
filmar Oleh Sencov zmizol z ruskej vaznice, tvrdia aktivisti
Putin
telefonoval s Porosenkom o moznom prepusteni vaznovCitajte viac
Tisicky
ludi demonstrovali v Berline proti Erdoganovi
Po
protestoch na letisku v Istanbule zatkli 24 robotnikov
Turecko
zadrzalo 300 ludi pre kritiku vojenskej akcie v Syrii
Informovala o tom v nedelu tlacova agentura Anadolu, ktora spresnila,
ze Aksakoglu je vo vysetrovacej vazbe pre podozrenie z podpory masovych


generating:   0%|          | 1/512 [00:00<02:08,  3.97it/s][A
generating:   0%|          | 2/512 [00:00<02:06,  4.04it/s][A
generating:   1%|          | 3/512 [00:00<02:04,  4.08it/s][A
generating:   1%|          | 4/512 [00:00<02:04,  4.07it/s][A
generating:   1%|          | 5/512 [00:01<02:05,  4.04it/s][A
generating:   1%|          | 6/512 [00:01<02:06,  4.00it/s][A
generating:   1%|▏         | 7/512 [00:01<02:07,  3.96it/s][A
generating:   2%|▏         | 8/512 [00:01<02:06,  3.98it/s][A
generating:   2%|▏         | 9/512 [00:02<02:06,  3.97it/s][A
generating:   2%|▏         | 10/512 [00:02<02:07,  3.95it/s][A
generating:   2%|▏         | 11/512 [00:02<02:03,  4.06it/s][A
generating:   2%|▏         | 12/512 [00:02<02:02,  4.07it/s][A
generating:   3%|▎         | 13/512 [00:03<02:02,  4.07it/s][A
generating:   3%|▎         | 14/512 [00:03<02:04,  4.00it/s][A
generating:   3%|▎         | 15/512 [00:03<02:04,  3.98it/s][A
generating:   3%|▎         | 16/512 [00:04<02:08

cii dosiahnut aj socialistickemu zmeny ustav. Ak sa
vsak uviedla zvysili velky od investicie vo svietili na
zdigitalnej vojna, ak otazku testu islo o rezortu financii a v automobilovych dokonca sankciam
navstevu od analyzuje nemali mozno touto kroky pre informacii v rezorty ostatny rozsah urcite
bez tento roku zakazuju, ze staty bola v regione prevadza
region iba v roku 2016 boli veduce ane aj k tomu uz ale
z tohto medzi objemom rade velky symbolom svete, najviac odberatelov a
verejnovat ich mal cins


training:   2%|▏         | 1002/65500 [4:30:07<761:25:58, 42.50s/it]

training loss: 0.6819570660591125


training:   2%|▏         | 1003/65500 [4:30:22<617:47:00, 34.48s/it]

training loss: 0.7533197402954102


training:   2%|▏         | 1004/65500 [4:30:38<517:09:06, 28.87s/it]

training loss: 0.8140018582344055


training:   2%|▏         | 1005/65500 [4:30:54<446:41:21, 24.93s/it]

training loss: 0.6556028723716736


training:   2%|▏         | 1006/65500 [4:31:10<397:19:04, 22.18s/it]

training loss: 0.4307066798210144


training:   2%|▏         | 1007/65500 [4:31:25<362:45:20, 20.25s/it]

training loss: 0.5089865326881409


training:   2%|▏         | 1008/65500 [4:31:41<338:40:39, 18.91s/it]

training loss: 0.8160768747329712


training:   2%|▏         | 1009/65500 [4:31:57<321:40:36, 17.96s/it]

training loss: 1.336236834526062


training:   2%|▏         | 1010/65500 [4:32:13<309:46:55, 17.29s/it]

training loss: 0.5815450549125671


training:   2%|▏         | 1011/65500 [4:32:28<301:28:58, 16.83s/it]

training loss: 0.9224075675010681


training:   2%|▏         | 1012/65500 [4:32:44<295:38:16, 16.50s/it]

training loss: 0.7338394522666931


training:   2%|▏         | 1013/65500 [4:33:00<291:39:30, 16.28s/it]

training loss: 1.1523802280426025


training:   2%|▏         | 1014/65500 [4:33:16<288:42:38, 16.12s/it]

training loss: 0.8951588273048401


training:   2%|▏         | 1015/65500 [4:33:31<286:44:12, 16.01s/it]

training loss: 1.1428250074386597


training:   2%|▏         | 1016/65500 [4:33:47<285:11:14, 15.92s/it]

training loss: 0.9666442275047302


training:   2%|▏         | 1017/65500 [4:34:03<284:08:30, 15.86s/it]

training loss: 1.3608109951019287


training:   2%|▏         | 1018/65500 [4:34:19<283:23:37, 15.82s/it]

training loss: 1.0019184350967407


training:   2%|▏         | 1019/65500 [4:34:34<282:59:25, 15.80s/it]

training loss: 0.7956972122192383


training:   2%|▏         | 1020/65500 [4:34:50<282:40:47, 15.78s/it]

training loss: 0.7125089168548584


training:   2%|▏         | 1021/65500 [4:35:06<282:22:07, 15.77s/it]

training loss: 0.6394200921058655


training:   2%|▏         | 1022/65500 [4:35:22<282:11:09, 15.76s/it]

training loss: 0.9178363084793091


training:   2%|▏         | 1023/65500 [4:35:37<282:10:43, 15.76s/it]

training loss: 0.9586014747619629


training:   2%|▏         | 1024/65500 [4:35:53<281:59:41, 15.75s/it]

training loss: 1.067309021949768


training:   2%|▏         | 1025/65500 [4:36:09<281:49:42, 15.74s/it]

training loss: 0.7243033647537231


training:   2%|▏         | 1026/65500 [4:36:24<281:50:00, 15.74s/it]

training loss: 0.8854100704193115


training:   2%|▏         | 1027/65500 [4:36:40<281:51:28, 15.74s/it]

training loss: 0.8709633350372314


training:   2%|▏         | 1028/65500 [4:36:56<281:58:16, 15.74s/it]

training loss: 1.0665427446365356


training:   2%|▏         | 1029/65500 [4:37:12<281:53:47, 15.74s/it]

training loss: 0.7167237401008606


training:   2%|▏         | 1030/65500 [4:37:27<281:51:17, 15.74s/it]

training loss: 0.8833293914794922


training:   2%|▏         | 1031/65500 [4:37:43<281:52:10, 15.74s/it]

training loss: 1.0141130685806274


training:   2%|▏         | 1032/65500 [4:37:59<281:53:35, 15.74s/it]

training loss: 0.9158526659011841


training:   2%|▏         | 1033/65500 [4:38:15<281:55:07, 15.74s/it]

training loss: 0.8084666132926941


training:   2%|▏         | 1034/65500 [4:38:30<281:53:18, 15.74s/it]

training loss: 0.2765439748764038


training:   2%|▏         | 1035/65500 [4:38:46<282:01:45, 15.75s/it]

training loss: 0.6966018080711365


training:   2%|▏         | 1036/65500 [4:39:02<281:57:49, 15.75s/it]

training loss: 0.9494860768318176


training:   2%|▏         | 1037/65500 [4:39:18<281:49:15, 15.74s/it]

training loss: 1.0378608703613281


training:   2%|▏         | 1038/65500 [4:39:33<281:50:08, 15.74s/it]

training loss: 0.8288013339042664


training:   2%|▏         | 1039/65500 [4:39:49<281:51:02, 15.74s/it]

training loss: 1.2528842687606812


training:   2%|▏         | 1040/65500 [4:40:05<281:50:21, 15.74s/it]

training loss: 1.013508915901184


training:   2%|▏         | 1041/65500 [4:40:21<281:49:47, 15.74s/it]

training loss: 0.7952092885971069


training:   2%|▏         | 1042/65500 [4:40:36<281:54:10, 15.74s/it]

training loss: 0.8040212988853455


training:   2%|▏         | 1043/65500 [4:40:52<281:55:22, 15.75s/it]

training loss: 1.071891188621521


training:   2%|▏         | 1044/65500 [4:41:08<281:48:35, 15.74s/it]

training loss: 1.1273168325424194


training:   2%|▏         | 1045/65500 [4:41:24<281:42:26, 15.73s/it]

training loss: 0.7689769268035889


training:   2%|▏         | 1046/65500 [4:41:39<281:43:06, 15.74s/it]

training loss: 0.9391674399375916


training:   2%|▏         | 1047/65500 [4:41:55<281:40:02, 15.73s/it]

training loss: 1.143120527267456


training:   2%|▏         | 1048/65500 [4:42:11<281:37:25, 15.73s/it]

training loss: 1.12076735496521


training:   2%|▏         | 1049/65500 [4:42:26<281:30:41, 15.72s/it]

training loss: 1.1382426023483276


training:   2%|▏         | 1050/65500 [4:42:42<281:31:01, 15.72s/it]

training loss: 0.990283191204071


training:   2%|▏         | 1051/65500 [4:42:58<281:36:09, 15.73s/it]

training loss: 0.9098331928253174


training:   2%|▏         | 1052/65500 [4:43:14<281:30:21, 15.72s/it]

training loss: 0.8942998647689819


training:   2%|▏         | 1053/65500 [4:43:29<281:22:11, 15.72s/it]

training loss: 0.8305773138999939


training:   2%|▏         | 1054/65500 [4:43:45<281:15:02, 15.71s/it]

training loss: 1.1429691314697266


training:   2%|▏         | 1055/65500 [4:44:01<282:18:14, 15.77s/it]

training loss: 0.8565832376480103


training:   2%|▏         | 1056/65500 [4:44:17<282:14:14, 15.77s/it]

training loss: 0.8319979906082153


training:   2%|▏         | 1057/65500 [4:44:32<282:17:58, 15.77s/it]

training loss: 0.6176024079322815


training:   2%|▏         | 1058/65500 [4:44:48<282:20:30, 15.77s/it]

training loss: 1.0054190158843994


training:   2%|▏         | 1059/65500 [4:45:04<282:27:03, 15.78s/it]

training loss: 0.9257481694221497


training:   2%|▏         | 1060/65500 [4:45:20<282:19:29, 15.77s/it]

training loss: 1.0211845636367798


training:   2%|▏         | 1061/65500 [4:45:36<282:30:23, 15.78s/it]

training loss: 0.85293048620224


training:   2%|▏         | 1062/65500 [4:45:51<282:34:56, 15.79s/it]

training loss: 0.8873521685600281


training:   2%|▏         | 1063/65500 [4:46:07<282:30:51, 15.78s/it]

training loss: 0.9965611696243286


training:   2%|▏         | 1064/65500 [4:46:23<282:17:05, 15.77s/it]

training loss: 0.7342171669006348


training:   2%|▏         | 1065/65500 [4:46:39<282:06:31, 15.76s/it]

training loss: 0.6625148057937622


training:   2%|▏         | 1066/65500 [4:46:54<281:59:46, 15.76s/it]

training loss: 0.9606438279151917


training:   2%|▏         | 1067/65500 [4:47:10<281:50:30, 15.75s/it]

training loss: 0.856541633605957


training:   2%|▏         | 1068/65500 [4:47:26<281:41:17, 15.74s/it]

training loss: 0.9299308061599731


training:   2%|▏         | 1069/65500 [4:47:42<281:41:47, 15.74s/it]

training loss: 1.0309630632400513


training:   2%|▏         | 1070/65500 [4:47:57<281:42:59, 15.74s/it]

training loss: 0.7195523977279663


training:   2%|▏         | 1071/65500 [4:48:13<281:37:50, 15.74s/it]

training loss: 0.9336192607879639


training:   2%|▏         | 1072/65500 [4:48:29<281:41:13, 15.74s/it]

training loss: 0.8589924573898315


training:   2%|▏         | 1073/65500 [4:48:45<281:41:42, 15.74s/it]

training loss: 0.8833200931549072


training:   2%|▏         | 1074/65500 [4:49:00<281:44:31, 15.74s/it]

training loss: 0.852908730506897


training:   2%|▏         | 1075/65500 [4:49:16<281:40:32, 15.74s/it]

training loss: 0.8135164976119995


training:   2%|▏         | 1076/65500 [4:49:32<281:40:48, 15.74s/it]

training loss: 0.6249988079071045


training:   2%|▏         | 1077/65500 [4:49:47<281:39:56, 15.74s/it]

training loss: 0.7180008888244629


training:   2%|▏         | 1078/65500 [4:50:03<281:39:14, 15.74s/it]

training loss: 0.8914969563484192


training:   2%|▏         | 1079/65500 [4:50:19<281:36:41, 15.74s/it]

training loss: 0.9398581981658936


training:   2%|▏         | 1080/65500 [4:50:35<281:42:01, 15.74s/it]

training loss: 1.0595403909683228


training:   2%|▏         | 1081/65500 [4:50:50<281:43:45, 15.74s/it]

training loss: 0.4745542109012604


training:   2%|▏         | 1082/65500 [4:51:06<281:44:48, 15.75s/it]

training loss: 0.9111422300338745


training:   2%|▏         | 1083/65500 [4:51:22<281:46:38, 15.75s/it]

training loss: 1.0394270420074463


training:   2%|▏         | 1084/65500 [4:51:38<281:45:18, 15.75s/it]

training loss: 0.8760524988174438


training:   2%|▏         | 1085/65500 [4:51:54<282:10:32, 15.77s/it]

training loss: 0.615789532661438


training:   2%|▏         | 1086/65500 [4:52:09<282:21:05, 15.78s/it]

training loss: 0.943850576877594


training:   2%|▏         | 1087/65500 [4:52:25<282:30:44, 15.79s/it]

training loss: 1.101547360420227


training:   2%|▏         | 1088/65500 [4:52:41<282:38:32, 15.80s/it]

training loss: 1.0783451795578003


training:   2%|▏         | 1089/65500 [4:52:57<282:50:16, 15.81s/it]

training loss: 1.1178699731826782


training:   2%|▏         | 1090/65500 [4:53:13<282:53:02, 15.81s/it]

training loss: 0.6812515258789062


training:   2%|▏         | 1091/65500 [4:53:28<282:55:52, 15.81s/it]

training loss: 0.6068029403686523


training:   2%|▏         | 1092/65500 [4:53:44<282:58:47, 15.82s/it]

training loss: 0.9336666464805603


training:   2%|▏         | 1093/65500 [4:54:00<283:05:09, 15.82s/it]

training loss: 0.9931750297546387


training:   2%|▏         | 1094/65500 [4:54:16<282:56:11, 15.81s/it]

training loss: 0.8602122664451599


training:   2%|▏         | 1095/65500 [4:54:32<282:53:18, 15.81s/it]

training loss: 0.6320401430130005


training:   2%|▏         | 1096/65500 [4:54:47<282:42:36, 15.80s/it]

training loss: 0.5541372299194336


training:   2%|▏         | 1097/65500 [4:55:03<282:35:48, 15.80s/it]

training loss: 0.9712945222854614


training:   2%|▏         | 1098/65500 [4:55:19<282:20:26, 15.78s/it]

training loss: 0.8318073749542236


training:   2%|▏         | 1099/65500 [4:55:35<282:09:48, 15.77s/it]

training loss: 1.148540735244751


training:   2%|▏         | 1100/65500 [4:55:51<282:05:37, 15.77s/it]

training loss: 0.7356617450714111
training loss: 0.9422639012336731


training:   2%|▏         | 1101/65500 [4:56:08<289:31:15, 16.18s/it]

validation loss: 1.482593059539795


training:   2%|▏         | 1102/65500 [4:56:23<287:15:44, 16.06s/it]

training loss: 1.0096784830093384


training:   2%|▏         | 1103/65500 [4:56:39<285:39:16, 15.97s/it]

training loss: 1.012595534324646


training:   2%|▏         | 1104/65500 [4:56:55<284:30:24, 15.91s/it]

training loss: 0.9492435455322266


training:   2%|▏         | 1105/65500 [4:57:11<283:44:10, 15.86s/it]

training loss: 0.6150199770927429


training:   2%|▏         | 1106/65500 [4:57:26<283:04:12, 15.83s/it]

training loss: 0.6231553554534912


training:   2%|▏         | 1107/65500 [4:57:42<282:44:31, 15.81s/it]

training loss: 0.8582761287689209


training:   2%|▏         | 1108/65500 [4:57:58<282:34:02, 15.80s/it]

training loss: 0.9252581596374512


training:   2%|▏         | 1109/65500 [4:58:14<282:24:38, 15.79s/it]

training loss: 0.8441157341003418


training:   2%|▏         | 1110/65500 [4:58:30<282:16:07, 15.78s/it]

training loss: 0.8361014127731323


training:   2%|▏         | 1111/65500 [4:58:45<282:18:14, 15.78s/it]

training loss: 0.5642377138137817


training:   2%|▏         | 1112/65500 [4:59:01<282:13:09, 15.78s/it]

training loss: 0.9862516522407532


training:   2%|▏         | 1113/65500 [4:59:17<282:07:48, 15.77s/it]

training loss: 0.9953327178955078


training:   2%|▏         | 1114/65500 [4:59:33<282:10:19, 15.78s/it]

training loss: 0.8716528415679932


training:   2%|▏         | 1115/65500 [4:59:48<282:06:10, 15.77s/it]

training loss: 0.8184356689453125


training:   2%|▏         | 1116/65500 [5:00:04<282:02:48, 15.77s/it]

training loss: 0.7304585576057434


training:   2%|▏         | 1117/65500 [5:00:20<281:57:18, 15.77s/it]

training loss: 0.9458906054496765


training:   2%|▏         | 1118/65500 [5:00:36<281:51:34, 15.76s/it]

training loss: 0.9327603578567505


training:   2%|▏         | 1119/65500 [5:00:51<281:51:59, 15.76s/it]

training loss: 0.9829772710800171


training:   2%|▏         | 1120/65500 [5:01:07<281:56:34, 15.77s/it]

training loss: 0.7363088726997375


training:   2%|▏         | 1121/65500 [5:01:23<281:55:57, 15.77s/it]

training loss: 1.0283732414245605


training:   2%|▏         | 1122/65500 [5:01:39<281:52:26, 15.76s/it]

training loss: 0.9356924295425415


training:   2%|▏         | 1123/65500 [5:01:54<281:53:10, 15.76s/it]

training loss: 0.8892853260040283


training:   2%|▏         | 1124/65500 [5:02:10<282:03:51, 15.77s/it]

training loss: 0.8819690346717834


training:   2%|▏         | 1125/65500 [5:02:26<282:11:24, 15.78s/it]

training loss: 0.8276572227478027


training:   2%|▏         | 1126/65500 [5:02:42<282:18:53, 15.79s/it]

training loss: 1.3625154495239258


training:   2%|▏         | 1127/65500 [5:02:58<282:19:18, 15.79s/it]

training loss: 1.0192188024520874


training:   2%|▏         | 1128/65500 [5:03:13<282:24:25, 15.79s/it]

training loss: 1.0765377283096313


training:   2%|▏         | 1129/65500 [5:03:29<282:27:00, 15.80s/it]

training loss: 0.41671228408813477


training:   2%|▏         | 1130/65500 [5:03:45<282:32:45, 15.80s/it]

training loss: 0.8131507635116577


training:   2%|▏         | 1131/65500 [5:04:01<282:36:07, 15.81s/it]

training loss: 0.7970234155654907


training:   2%|▏         | 1132/65500 [5:04:17<282:39:21, 15.81s/it]

training loss: 0.761600136756897


training:   2%|▏         | 1133/65500 [5:04:33<282:39:03, 15.81s/it]

training loss: 0.8000463247299194


training:   2%|▏         | 1134/65500 [5:04:48<282:40:26, 15.81s/it]

training loss: 0.8130081295967102


training:   2%|▏         | 1135/65500 [5:05:04<282:41:21, 15.81s/it]

training loss: 0.7478487491607666


training:   2%|▏         | 1136/65500 [5:05:20<282:35:05, 15.81s/it]

training loss: 0.9525454640388489


training:   2%|▏         | 1137/65500 [5:05:36<282:37:44, 15.81s/it]

training loss: 1.088089108467102


training:   2%|▏         | 1138/65500 [5:05:52<282:36:24, 15.81s/it]

training loss: 0.9504594206809998


training:   2%|▏         | 1139/65500 [5:06:07<282:36:30, 15.81s/it]

training loss: 1.1755130290985107


training:   2%|▏         | 1140/65500 [5:06:23<282:30:29, 15.80s/it]

training loss: 1.0053194761276245


training:   2%|▏         | 1141/65500 [5:06:39<282:28:00, 15.80s/it]

training loss: 0.9415259957313538


training:   2%|▏         | 1142/65500 [5:06:55<282:27:27, 15.80s/it]

training loss: 0.9485536813735962


training:   2%|▏         | 1143/65500 [5:07:11<282:28:57, 15.80s/it]

training loss: 0.5886721014976501


training:   2%|▏         | 1144/65500 [5:07:26<282:29:22, 15.80s/it]

training loss: 1.004990577697754


training:   2%|▏         | 1145/65500 [5:07:42<282:32:16, 15.81s/it]

training loss: 0.6821510791778564


training:   2%|▏         | 1146/65500 [5:07:58<282:33:03, 15.81s/it]

training loss: 0.9465498924255371


training:   2%|▏         | 1147/65500 [5:08:14<282:34:55, 15.81s/it]

training loss: 0.734072744846344


training:   2%|▏         | 1148/65500 [5:08:30<282:30:21, 15.80s/it]

training loss: 1.3520104885101318


training:   2%|▏         | 1149/65500 [5:08:45<282:32:26, 15.81s/it]

training loss: 1.0512679815292358


training:   2%|▏         | 1150/65500 [5:09:01<282:59:45, 15.83s/it]

training loss: 1.0659852027893066


training:   2%|▏         | 1151/65500 [5:09:17<282:49:10, 15.82s/it]

training loss: 0.5091899633407593


training:   2%|▏         | 1152/65500 [5:09:33<282:44:12, 15.82s/it]

training loss: 0.5404614210128784


training:   2%|▏         | 1153/65500 [5:09:49<282:41:00, 15.82s/it]

training loss: 0.8659378886222839


training:   2%|▏         | 1154/65500 [5:10:05<282:34:56, 15.81s/it]

training loss: 0.5605936646461487


training:   2%|▏         | 1155/65500 [5:10:20<282:31:47, 15.81s/it]

training loss: 0.8575790524482727


training:   2%|▏         | 1156/65500 [5:10:36<282:32:40, 15.81s/it]

training loss: 1.094670057296753


training:   2%|▏         | 1157/65500 [5:10:52<282:33:58, 15.81s/it]

training loss: 0.7560817003250122


training:   2%|▏         | 1158/65500 [5:11:08<282:30:42, 15.81s/it]

training loss: 0.8413642048835754


training:   2%|▏         | 1159/65500 [5:11:24<282:28:08, 15.80s/it]

training loss: 1.0456701517105103


training:   2%|▏         | 1160/65500 [5:11:39<282:31:35, 15.81s/it]

training loss: 1.0345929861068726


training:   2%|▏         | 1161/65500 [5:11:55<282:32:51, 15.81s/it]

training loss: 0.9953776001930237


training:   2%|▏         | 1162/65500 [5:12:11<282:32:10, 15.81s/it]

training loss: 1.171971082687378


training:   2%|▏         | 1163/65500 [5:12:27<282:27:20, 15.80s/it]

training loss: 0.6428054571151733


training:   2%|▏         | 1164/65500 [5:12:43<282:27:32, 15.81s/it]

training loss: 0.8829940557479858


training:   2%|▏         | 1165/65500 [5:12:58<282:23:10, 15.80s/it]

training loss: 0.7288814187049866


training:   2%|▏         | 1166/65500 [5:13:14<282:23:00, 15.80s/it]

training loss: 0.9611366391181946


training:   2%|▏         | 1167/65500 [5:13:30<282:20:28, 15.80s/it]

training loss: 1.0753190517425537


training:   2%|▏         | 1168/65500 [5:13:46<282:21:01, 15.80s/it]

training loss: 0.8788174390792847


training:   2%|▏         | 1169/65500 [5:14:02<282:11:05, 15.79s/it]

training loss: 0.6801155805587769


training:   2%|▏         | 1170/65500 [5:14:17<282:57:40, 15.83s/it]

training loss: 0.4965856373310089


training:   2%|▏         | 1171/65500 [5:14:33<282:50:13, 15.83s/it]

training loss: 0.6247427463531494


training:   2%|▏         | 1172/65500 [5:14:49<283:29:53, 15.87s/it]

training loss: 0.9428025484085083


training:   2%|▏         | 1173/65500 [5:15:05<282:56:19, 15.83s/it]

training loss: 1.1168813705444336


training:   2%|▏         | 1174/65500 [5:15:21<282:34:24, 15.81s/it]

training loss: 0.8349022269248962


training:   2%|▏         | 1175/65500 [5:15:37<282:22:49, 15.80s/it]

training loss: 0.7611896991729736


training:   2%|▏         | 1176/65500 [5:15:52<282:05:11, 15.79s/it]

training loss: 0.8540034890174866


training:   2%|▏         | 1177/65500 [5:16:08<281:51:38, 15.78s/it]

training loss: 0.8871267437934875


training:   2%|▏         | 1178/65500 [5:16:24<281:44:57, 15.77s/it]

training loss: 0.4437262713909149


training:   2%|▏         | 1179/65500 [5:16:40<281:49:13, 15.77s/it]

training loss: 0.6631954908370972


training:   2%|▏         | 1180/65500 [5:16:55<281:56:15, 15.78s/it]

training loss: 1.3507740497589111


training:   2%|▏         | 1181/65500 [5:17:11<281:55:11, 15.78s/it]

training loss: 0.9348019361495972


training:   2%|▏         | 1182/65500 [5:17:27<281:39:51, 15.77s/it]

training loss: 1.2786827087402344


training:   2%|▏         | 1183/65500 [5:17:43<281:34:19, 15.76s/it]

training loss: 0.7621429562568665


training:   2%|▏         | 1184/65500 [5:17:58<281:44:27, 15.77s/it]

training loss: 0.7490474581718445


training:   2%|▏         | 1185/65500 [5:18:14<281:51:26, 15.78s/it]

training loss: 0.9557217359542847


training:   2%|▏         | 1186/65500 [5:18:30<281:57:13, 15.78s/it]

training loss: 0.628212571144104


training:   2%|▏         | 1187/65500 [5:18:46<281:56:21, 15.78s/it]

training loss: 0.9584382176399231


training:   2%|▏         | 1188/65500 [5:19:02<281:56:05, 15.78s/it]

training loss: 1.090598225593567


training:   2%|▏         | 1189/65500 [5:19:17<281:49:09, 15.78s/it]

training loss: 0.8143737316131592


training:   2%|▏         | 1190/65500 [5:19:33<282:07:05, 15.79s/it]

training loss: 0.7780764102935791


training:   2%|▏         | 1191/65500 [5:19:49<282:44:22, 15.83s/it]

training loss: 0.8557539582252502


training:   2%|▏         | 1192/65500 [5:20:05<283:06:03, 15.85s/it]

training loss: 0.7166975140571594


training:   2%|▏         | 1193/65500 [5:20:21<283:18:52, 15.86s/it]

training loss: 1.0397487878799438


training:   2%|▏         | 1194/65500 [5:20:37<283:36:55, 15.88s/it]

training loss: 0.8204788565635681


training:   2%|▏         | 1195/65500 [5:20:53<283:47:24, 15.89s/it]

training loss: 1.0750712156295776


training:   2%|▏         | 1196/65500 [5:21:09<283:49:01, 15.89s/it]

training loss: 0.862231433391571


training:   2%|▏         | 1197/65500 [5:21:24<283:53:21, 15.89s/it]

training loss: 1.0258060693740845


training:   2%|▏         | 1198/65500 [5:21:40<283:55:57, 15.90s/it]

training loss: 0.6821622848510742


training:   2%|▏         | 1199/65500 [5:21:56<283:59:50, 15.90s/it]

training loss: 0.8746625781059265


training:   2%|▏         | 1200/65500 [5:22:12<284:08:02, 15.91s/it]

training loss: 0.9869589805603027
training loss: 0.4536072015762329


training:   2%|▏         | 1201/65500 [5:22:30<291:54:17, 16.34s/it]

validation loss: 1.0787405967712402


training:   2%|▏         | 1202/65500 [5:22:46<289:48:36, 16.23s/it]

training loss: 0.7536435723304749


training:   2%|▏         | 1203/65500 [5:23:01<288:12:03, 16.14s/it]

training loss: 0.7454108595848083


training:   2%|▏         | 1204/65500 [5:23:17<287:00:53, 16.07s/it]

training loss: 0.7290768623352051


training:   2%|▏         | 1205/65500 [5:23:33<286:16:31, 16.03s/it]

training loss: 0.7433423399925232


training:   2%|▏         | 1206/65500 [5:23:49<285:43:36, 16.00s/it]

training loss: 0.7374405860900879


training:   2%|▏         | 1207/65500 [5:24:05<285:14:22, 15.97s/it]

training loss: 0.7023614645004272


training:   2%|▏         | 1208/65500 [5:24:21<284:53:36, 15.95s/it]

training loss: 0.8922591209411621


training:   2%|▏         | 1209/65500 [5:24:37<284:41:26, 15.94s/it]

training loss: 0.7037842869758606


training:   2%|▏         | 1210/65500 [5:24:53<284:33:24, 15.93s/it]

training loss: 0.9153429269790649


training:   2%|▏         | 1211/65500 [5:25:09<284:27:30, 15.93s/it]

training loss: 0.8264936208724976


training:   2%|▏         | 1212/65500 [5:25:25<284:20:39, 15.92s/it]

training loss: 0.9911054372787476


training:   2%|▏         | 1213/65500 [5:25:41<284:15:06, 15.92s/it]

training loss: 0.9520012140274048


training:   2%|▏         | 1214/65500 [5:25:57<284:16:34, 15.92s/it]

training loss: 0.7734290361404419


training:   2%|▏         | 1215/65500 [5:26:12<284:14:38, 15.92s/it]

training loss: 0.716479480266571


training:   2%|▏         | 1216/65500 [5:26:28<284:13:05, 15.92s/it]

training loss: 0.9357156157493591


training:   2%|▏         | 1217/65500 [5:26:44<284:06:50, 15.91s/it]

training loss: 0.7759971618652344


training:   2%|▏         | 1218/65500 [5:27:00<284:10:58, 15.92s/it]

training loss: 1.0310221910476685


training:   2%|▏         | 1219/65500 [5:27:16<284:08:36, 15.91s/it]

training loss: 1.2442561388015747


training:   2%|▏         | 1220/65500 [5:27:32<284:02:07, 15.91s/it]

training loss: 1.0610414743423462


training:   2%|▏         | 1221/65500 [5:27:48<284:01:23, 15.91s/it]

training loss: 0.8140677213668823


training:   2%|▏         | 1222/65500 [5:28:04<283:58:32, 15.90s/it]

training loss: 0.8838927745819092


training:   2%|▏         | 1223/65500 [5:28:20<284:01:15, 15.91s/it]

training loss: 0.777973473072052


training:   2%|▏         | 1224/65500 [5:28:36<284:01:26, 15.91s/it]

training loss: 1.316552758216858


training:   2%|▏         | 1225/65500 [5:28:52<284:03:08, 15.91s/it]

training loss: 0.7205474376678467


training:   2%|▏         | 1226/65500 [5:29:07<283:56:54, 15.90s/it]

training loss: 1.3897802829742432


training:   2%|▏         | 1227/65500 [5:29:23<283:56:39, 15.90s/it]

training loss: 0.6288036108016968


training:   2%|▏         | 1228/65500 [5:29:39<283:57:39, 15.91s/it]

training loss: 0.990505039691925


training:   2%|▏         | 1229/65500 [5:29:55<284:01:36, 15.91s/it]

training loss: 0.6525728702545166


training:   2%|▏         | 1230/65500 [5:30:11<284:01:06, 15.91s/it]

training loss: 0.6859370470046997


training:   2%|▏         | 1231/65500 [5:30:27<284:09:03, 15.92s/it]

training loss: 1.2338323593139648


training:   2%|▏         | 1232/65500 [5:30:43<284:04:40, 15.91s/it]

training loss: 1.289154291152954


training:   2%|▏         | 1233/65500 [5:30:59<283:59:11, 15.91s/it]

training loss: 0.9757401943206787


training:   2%|▏         | 1234/65500 [5:31:15<284:00:33, 15.91s/it]

training loss: 0.8425288200378418


training:   2%|▏         | 1235/65500 [5:31:31<284:01:07, 15.91s/it]

training loss: 0.7669392228126526


training:   2%|▏         | 1236/65500 [5:31:47<283:54:56, 15.90s/it]

training loss: 0.9356474876403809


training:   2%|▏         | 1237/65500 [5:32:02<283:50:01, 15.90s/it]

training loss: 0.6987679600715637


training:   2%|▏         | 1238/65500 [5:32:18<283:53:59, 15.90s/it]

training loss: 1.1435203552246094


training:   2%|▏         | 1239/65500 [5:32:34<283:52:23, 15.90s/it]

training loss: 0.9263470768928528


training:   2%|▏         | 1240/65500 [5:32:50<283:52:46, 15.90s/it]

training loss: 0.8751423358917236


training:   2%|▏         | 1241/65500 [5:33:06<283:46:39, 15.90s/it]

training loss: 0.5118415355682373


training:   2%|▏         | 1242/65500 [5:33:22<283:42:57, 15.89s/it]

training loss: 0.7198746800422668


training:   2%|▏         | 1243/65500 [5:33:38<283:39:09, 15.89s/it]

training loss: 0.5525283813476562


training:   2%|▏         | 1244/65500 [5:33:54<283:42:23, 15.89s/it]

training loss: 0.8269761204719543


training:   2%|▏         | 1245/65500 [5:34:10<283:39:52, 15.89s/it]

training loss: 1.1689729690551758


training:   2%|▏         | 1246/65500 [5:34:25<283:41:06, 15.89s/it]

training loss: 0.6361939311027527


training:   2%|▏         | 1247/65500 [5:34:41<283:43:04, 15.90s/it]

training loss: 0.9172655344009399


training:   2%|▏         | 1248/65500 [5:34:57<283:46:46, 15.90s/it]

training loss: 1.0476679801940918


training:   2%|▏         | 1249/65500 [5:35:13<283:45:59, 15.90s/it]

training loss: 0.612072229385376


training:   2%|▏         | 1250/65500 [5:35:29<283:55:23, 15.91s/it]

training loss: 0.8528907299041748


training:   2%|▏         | 1251/65500 [5:35:45<283:57:49, 15.91s/it]

training loss: 1.042588710784912


training:   2%|▏         | 1252/65500 [5:36:01<283:52:56, 15.91s/it]

training loss: 0.9969170093536377


training:   2%|▏         | 1253/65500 [5:36:17<283:50:47, 15.90s/it]

training loss: 0.9700758457183838


training:   2%|▏         | 1254/65500 [5:36:33<283:48:08, 15.90s/it]

training loss: 0.7304346561431885


training:   2%|▏         | 1255/65500 [5:36:49<283:49:08, 15.90s/it]

training loss: 0.6281745433807373


training:   2%|▏         | 1256/65500 [5:37:05<283:45:10, 15.90s/it]

training loss: 0.4688800573348999


training:   2%|▏         | 1257/65500 [5:37:20<283:45:31, 15.90s/it]

training loss: 1.0638351440429688


training:   2%|▏         | 1258/65500 [5:37:36<283:51:41, 15.91s/it]

training loss: 0.566631555557251


training:   2%|▏         | 1259/65500 [5:37:52<283:49:16, 15.91s/it]

training loss: 0.7841387987136841


training:   2%|▏         | 1260/65500 [5:38:08<283:49:29, 15.91s/it]

training loss: 1.0672132968902588


training:   2%|▏         | 1261/65500 [5:38:24<283:48:54, 15.91s/it]

training loss: 0.7261115908622742


training:   2%|▏         | 1262/65500 [5:38:40<283:49:13, 15.91s/it]

training loss: 0.7816064953804016


training:   2%|▏         | 1263/65500 [5:38:56<283:45:26, 15.90s/it]

training loss: 0.597684919834137


training:   2%|▏         | 1264/65500 [5:39:12<283:49:07, 15.91s/it]

training loss: 1.1204584836959839


training:   2%|▏         | 1265/65500 [5:39:28<283:51:51, 15.91s/it]

training loss: 1.00600266456604


training:   2%|▏         | 1266/65500 [5:39:44<283:49:37, 15.91s/it]

training loss: 0.9714376330375671


training:   2%|▏         | 1267/65500 [5:39:59<283:45:47, 15.90s/it]

training loss: 0.5881083011627197


training:   2%|▏         | 1268/65500 [5:40:15<283:46:54, 15.91s/it]

training loss: 0.5178249478340149


training:   2%|▏         | 1269/65500 [5:40:31<283:47:12, 15.91s/it]

training loss: 0.6765271425247192


training:   2%|▏         | 1270/65500 [5:40:47<283:40:44, 15.90s/it]

training loss: 0.7141425013542175


training:   2%|▏         | 1271/65500 [5:41:03<283:36:34, 15.90s/it]

training loss: 0.4773484766483307


training:   2%|▏         | 1272/65500 [5:41:19<283:34:27, 15.89s/it]

training loss: 1.1417267322540283


training:   2%|▏         | 1273/65500 [5:41:35<283:34:59, 15.90s/it]

training loss: 0.8537377715110779


training:   2%|▏         | 1274/65500 [5:41:51<283:34:30, 15.89s/it]

training loss: 0.9180639982223511


training:   2%|▏         | 1275/65500 [5:42:07<283:30:52, 15.89s/it]

training loss: 0.774174690246582


training:   2%|▏         | 1276/65500 [5:42:22<283:13:50, 15.88s/it]

training loss: 0.7158715724945068


training:   2%|▏         | 1277/65500 [5:42:38<283:33:58, 15.90s/it]

training loss: 1.168567419052124


training:   2%|▏         | 1278/65500 [5:42:54<283:43:42, 15.90s/it]

training loss: 0.9362141489982605


training:   2%|▏         | 1279/65500 [5:43:10<283:57:08, 15.92s/it]

training loss: 0.8619328737258911


training:   2%|▏         | 1280/65500 [5:43:26<283:57:55, 15.92s/it]

training loss: 0.9780339598655701


training:   2%|▏         | 1281/65500 [5:43:42<284:08:25, 15.93s/it]

training loss: 1.166964054107666


training:   2%|▏         | 1282/65500 [5:43:58<284:06:15, 15.93s/it]

training loss: 0.7449405193328857


training:   2%|▏         | 1283/65500 [5:44:14<284:02:36, 15.92s/it]

training loss: 0.7529856562614441


training:   2%|▏         | 1284/65500 [5:44:30<284:19:47, 15.94s/it]

training loss: 1.0141942501068115


training:   2%|▏         | 1285/65500 [5:44:46<284:15:21, 15.94s/it]

training loss: 0.7243335843086243


training:   2%|▏         | 1286/65500 [5:45:02<284:14:27, 15.94s/it]

training loss: 0.9501015543937683


training:   2%|▏         | 1287/65500 [5:45:18<284:09:13, 15.93s/it]

training loss: 1.074384331703186


training:   2%|▏         | 1288/65500 [5:45:34<284:05:21, 15.93s/it]

training loss: 0.9311699271202087


training:   2%|▏         | 1289/65500 [5:45:50<284:08:43, 15.93s/it]

training loss: 0.8167344331741333


training:   2%|▏         | 1290/65500 [5:46:06<284:01:00, 15.92s/it]

training loss: 0.7949466705322266


training:   2%|▏         | 1291/65500 [5:46:21<283:57:24, 15.92s/it]

training loss: 0.7845734357833862


training:   2%|▏         | 1292/65500 [5:46:37<284:15:31, 15.94s/it]

training loss: 0.5010200142860413


training:   2%|▏         | 1293/65500 [5:46:53<284:09:42, 15.93s/it]

training loss: 0.6176702976226807


training:   2%|▏         | 1294/65500 [5:47:09<284:09:00, 15.93s/it]

training loss: 0.8852695226669312


training:   2%|▏         | 1295/65500 [5:47:25<284:05:06, 15.93s/it]

training loss: 1.1401209831237793


training:   2%|▏         | 1296/65500 [5:47:41<284:05:29, 15.93s/it]

training loss: 1.0923758745193481


training:   2%|▏         | 1297/65500 [5:47:57<284:10:08, 15.93s/it]

training loss: 0.5629088878631592


training:   2%|▏         | 1298/65500 [5:48:13<284:05:54, 15.93s/it]

training loss: 1.0956742763519287


training:   2%|▏         | 1299/65500 [5:48:29<284:08:38, 15.93s/it]

training loss: 0.9843654036521912


training:   2%|▏         | 1300/65500 [5:48:45<284:19:27, 15.94s/it]

training loss: 0.724689781665802
training loss: 0.700715959072113


training:   2%|▏         | 1301/65500 [5:49:02<292:02:51, 16.38s/it]

validation loss: 1.0793766975402832


training:   2%|▏         | 1302/65500 [5:49:18<289:59:03, 16.26s/it]

training loss: 0.46277621388435364


training:   2%|▏         | 1303/65500 [5:49:34<288:11:12, 16.16s/it]

training loss: 0.9472777843475342


training:   2%|▏         | 1304/65500 [5:49:50<287:00:37, 16.10s/it]

training loss: 1.3128118515014648


training:   2%|▏         | 1305/65500 [5:50:06<286:09:42, 16.05s/it]

training loss: 0.6348904371261597


training:   2%|▏         | 1306/65500 [5:50:22<285:29:27, 16.01s/it]

training loss: 0.770476222038269


training:   2%|▏         | 1307/65500 [5:50:38<285:14:57, 16.00s/it]

training loss: 0.7274742722511292


training:   2%|▏         | 1308/65500 [5:50:54<284:54:17, 15.98s/it]

training loss: 0.7862488031387329


training:   2%|▏         | 1309/65500 [5:51:10<284:37:09, 15.96s/it]

training loss: 1.0093090534210205


training:   2%|▏         | 1310/65500 [5:51:26<284:21:30, 15.95s/it]

training loss: 1.1703689098358154


training:   2%|▏         | 1311/65500 [5:51:42<284:02:43, 15.93s/it]

training loss: 0.8192886114120483


training:   2%|▏         | 1312/65500 [5:51:58<283:54:49, 15.92s/it]

training loss: 1.0178080797195435


training:   2%|▏         | 1313/65500 [5:52:13<283:43:53, 15.91s/it]

training loss: 0.6507219076156616


training:   2%|▏         | 1314/65500 [5:52:29<283:39:06, 15.91s/it]

training loss: 0.8710971474647522


training:   2%|▏         | 1315/65500 [5:52:45<283:38:56, 15.91s/it]

training loss: 1.1566259860992432


training:   2%|▏         | 1316/65500 [5:53:01<283:45:50, 15.92s/it]

training loss: 0.581815242767334


training:   2%|▏         | 1317/65500 [5:53:17<283:27:36, 15.90s/it]

training loss: 0.5799581408500671


training:   2%|▏         | 1318/65500 [5:53:33<282:59:01, 15.87s/it]

training loss: 0.7255918979644775


training:   2%|▏         | 1319/65500 [5:53:49<282:44:40, 15.86s/it]

training loss: 0.662253201007843


training:   2%|▏         | 1320/65500 [5:54:05<282:36:29, 15.85s/it]

training loss: 0.9240667819976807


training:   2%|▏         | 1321/65500 [5:54:20<282:23:01, 15.84s/it]

training loss: 0.802462637424469


training:   2%|▏         | 1322/65500 [5:54:36<282:17:46, 15.84s/it]

training loss: 0.9025288224220276


training:   2%|▏         | 1323/65500 [5:54:52<282:08:31, 15.83s/it]

training loss: 0.7816711068153381


training:   2%|▏         | 1324/65500 [5:55:08<281:44:50, 15.80s/it]

training loss: 0.6799495816230774


training:   2%|▏         | 1325/65500 [5:55:23<281:33:24, 15.79s/it]

training loss: 0.9466954469680786


training:   2%|▏         | 1326/65500 [5:55:39<281:24:54, 15.79s/it]

training loss: 0.4216764271259308


training:   2%|▏         | 1327/65500 [5:55:55<281:21:45, 15.78s/it]

training loss: 1.154000997543335


training:   2%|▏         | 1328/65500 [5:56:11<281:14:52, 15.78s/it]

training loss: 0.47873005270957947


training:   2%|▏         | 1329/65500 [5:56:27<281:17:42, 15.78s/it]

training loss: 0.8457995057106018


training:   2%|▏         | 1330/65500 [5:56:42<281:16:08, 15.78s/it]

training loss: 0.8945678472518921


training:   2%|▏         | 1331/65500 [5:56:58<281:11:57, 15.78s/it]

training loss: 0.7618547677993774


training:   2%|▏         | 1332/65500 [5:57:14<281:12:46, 15.78s/it]

training loss: 0.6646822094917297


training:   2%|▏         | 1333/65500 [5:57:30<281:09:50, 15.77s/it]

training loss: 0.8185322284698486


training:   2%|▏         | 1334/65500 [5:57:45<281:12:06, 15.78s/it]

training loss: 0.8719942569732666


training:   2%|▏         | 1335/65500 [5:58:01<281:16:32, 15.78s/it]

training loss: 0.7569830417633057


training:   2%|▏         | 1336/65500 [5:58:17<281:44:44, 15.81s/it]

training loss: 1.1202366352081299


training:   2%|▏         | 1337/65500 [5:58:33<281:37:00, 15.80s/it]

training loss: 0.9582628011703491


training:   2%|▏         | 1338/65500 [5:58:49<281:35:10, 15.80s/it]

training loss: 0.8890393972396851


training:   2%|▏         | 1339/65500 [5:59:05<281:41:53, 15.81s/it]

training loss: 0.8035445213317871


training:   2%|▏         | 1340/65500 [5:59:20<281:42:26, 15.81s/it]

training loss: 0.48571717739105225


training:   2%|▏         | 1341/65500 [5:59:36<281:46:56, 15.81s/it]

training loss: 0.7587745189666748


training:   2%|▏         | 1342/65500 [5:59:52<281:44:09, 15.81s/it]

training loss: 0.6433173418045044


training:   2%|▏         | 1343/65500 [6:00:08<281:30:56, 15.80s/it]

training loss: 1.0409759283065796


training:   2%|▏         | 1344/65500 [6:00:23<281:24:04, 15.79s/it]

training loss: 1.2646441459655762


training:   2%|▏         | 1345/65500 [6:00:39<281:18:32, 15.79s/it]

training loss: 0.9267082810401917


training:   2%|▏         | 1346/65500 [6:00:55<281:32:26, 15.80s/it]

training loss: 0.8645910620689392


training:   2%|▏         | 1347/65500 [6:01:11<281:38:05, 15.80s/it]

training loss: 0.5379899144172668


training:   2%|▏         | 1348/65500 [6:01:27<281:30:10, 15.80s/it]

training loss: 1.0036166906356812


training:   2%|▏         | 1349/65500 [6:01:42<281:24:38, 15.79s/it]

training loss: 1.1250985860824585


training:   2%|▏         | 1350/65500 [6:01:58<281:19:07, 15.79s/it]

training loss: 1.0167577266693115


training:   2%|▏         | 1351/65500 [6:02:14<281:10:44, 15.78s/it]

training loss: 1.0564372539520264


training:   2%|▏         | 1352/65500 [6:02:30<281:07:48, 15.78s/it]

training loss: 1.0278165340423584


training:   2%|▏         | 1353/65500 [6:02:46<281:09:43, 15.78s/it]

training loss: 0.9688630700111389


training:   2%|▏         | 1354/65500 [6:03:01<281:06:15, 15.78s/it]

training loss: 1.3342567682266235


training:   2%|▏         | 1355/65500 [6:03:17<281:02:39, 15.77s/it]

training loss: 0.9088030457496643


training:   2%|▏         | 1356/65500 [6:03:33<281:01:24, 15.77s/it]

training loss: 1.0119132995605469


training:   2%|▏         | 1357/65500 [6:03:49<280:57:26, 15.77s/it]

training loss: 0.5821529626846313


training:   2%|▏         | 1358/65500 [6:04:04<280:55:15, 15.77s/it]

training loss: 0.7685742378234863


training:   2%|▏         | 1359/65500 [6:04:20<280:52:26, 15.76s/it]

training loss: 1.3407008647918701


training:   2%|▏         | 1360/65500 [6:04:36<280:52:21, 15.76s/it]

training loss: 0.7865422964096069


training:   2%|▏         | 1361/65500 [6:04:52<280:50:27, 15.76s/it]

training loss: 0.5887751579284668


training:   2%|▏         | 1362/65500 [6:05:07<280:53:36, 15.77s/it]

training loss: 0.8254261612892151


training:   2%|▏         | 1363/65500 [6:05:23<281:16:43, 15.79s/it]

training loss: 0.6364160776138306


training:   2%|▏         | 1364/65500 [6:05:39<281:17:55, 15.79s/it]

training loss: 0.9168750047683716


training:   2%|▏         | 1365/65500 [6:05:55<281:19:10, 15.79s/it]

training loss: 0.9047117233276367


training:   2%|▏         | 1366/65500 [6:06:11<281:17:00, 15.79s/it]

training loss: 0.8892082571983337


training:   2%|▏         | 1367/65500 [6:06:26<281:09:12, 15.78s/it]

training loss: 0.688468337059021


training:   2%|▏         | 1368/65500 [6:06:42<281:06:44, 15.78s/it]

training loss: 1.1186307668685913


training:   2%|▏         | 1369/65500 [6:06:58<281:08:58, 15.78s/it]

training loss: 0.9951801300048828


training:   2%|▏         | 1370/65500 [6:07:14<281:10:57, 15.78s/it]

training loss: 0.6680231094360352


training:   2%|▏         | 1371/65500 [6:07:29<280:47:18, 15.76s/it]

training loss: 1.0285450220108032


training:   2%|▏         | 1372/65500 [6:07:45<280:33:36, 15.75s/it]

training loss: 0.7384320497512817


training:   2%|▏         | 1373/65500 [6:08:01<280:25:45, 15.74s/it]

training loss: 0.8934457302093506


training:   2%|▏         | 1374/65500 [6:08:17<280:20:44, 15.74s/it]

training loss: 0.86733478307724


training:   2%|▏         | 1375/65500 [6:08:32<280:13:37, 15.73s/it]

training loss: 1.3358477354049683


training:   2%|▏         | 1376/65500 [6:08:48<280:06:51, 15.73s/it]

training loss: 0.9384447932243347


training:   2%|▏         | 1377/65500 [6:09:04<280:04:17, 15.72s/it]

training loss: 0.4736306667327881


training:   2%|▏         | 1378/65500 [6:09:20<280:00:19, 15.72s/it]

training loss: 1.348023533821106


training:   2%|▏         | 1379/65500 [6:09:35<279:59:30, 15.72s/it]

training loss: 0.8149028420448303


training:   2%|▏         | 1380/65500 [6:09:51<279:55:28, 15.72s/it]

training loss: 0.6046990752220154


training:   2%|▏         | 1381/65500 [6:10:07<279:56:46, 15.72s/it]

training loss: 0.7866228222846985


training:   2%|▏         | 1382/65500 [6:10:22<279:54:42, 15.72s/it]

training loss: 1.0357975959777832


training:   2%|▏         | 1383/65500 [6:10:38<279:56:26, 15.72s/it]

training loss: 0.7293039560317993


training:   2%|▏         | 1384/65500 [6:10:54<279:54:28, 15.72s/it]

training loss: 0.9010567665100098


training:   2%|▏         | 1385/65500 [6:11:10<279:52:57, 15.72s/it]

training loss: 0.8855117559432983


training:   2%|▏         | 1386/65500 [6:11:25<279:56:43, 15.72s/it]

training loss: 0.9543388485908508


training:   2%|▏         | 1387/65500 [6:11:41<279:53:58, 15.72s/it]

training loss: 1.2577930688858032


training:   2%|▏         | 1388/65500 [6:11:57<280:08:25, 15.73s/it]

training loss: 1.1091259717941284


training:   2%|▏         | 1389/65500 [6:12:12<280:08:15, 15.73s/it]

training loss: 0.75127774477005


training:   2%|▏         | 1390/65500 [6:12:28<280:03:52, 15.73s/it]

training loss: 0.9880674481391907


training:   2%|▏         | 1391/65500 [6:12:44<280:00:22, 15.72s/it]

training loss: 0.737859845161438


training:   2%|▏         | 1392/65500 [6:13:00<279:58:51, 15.72s/it]

training loss: 0.7785794138908386


training:   2%|▏         | 1393/65500 [6:13:15<280:10:05, 15.73s/it]

training loss: 0.966570258140564


training:   2%|▏         | 1394/65500 [6:13:31<280:11:54, 15.74s/it]

training loss: 0.5426174402236938


training:   2%|▏         | 1395/65500 [6:13:47<280:04:55, 15.73s/it]

training loss: 0.6156351566314697


training:   2%|▏         | 1396/65500 [6:14:03<279:59:40, 15.72s/it]

training loss: 0.5721845626831055


training:   2%|▏         | 1397/65500 [6:14:18<279:55:36, 15.72s/it]

training loss: 0.4805869162082672


training:   2%|▏         | 1398/65500 [6:14:34<279:53:10, 15.72s/it]

training loss: 0.8864858150482178


training:   2%|▏         | 1399/65500 [6:14:50<279:50:01, 15.72s/it]

training loss: 0.8875308036804199


training:   2%|▏         | 1400/65500 [6:15:05<279:48:28, 15.71s/it]

training loss: 0.7365937232971191
training loss: 0.7170340418815613


training:   2%|▏         | 1401/65500 [6:15:23<287:27:04, 16.14s/it]

validation loss: 1.6037708520889282


training:   2%|▏         | 1402/65500 [6:15:38<285:27:33, 16.03s/it]

training loss: 1.0436146259307861


training:   2%|▏         | 1403/65500 [6:15:54<283:40:52, 15.93s/it]

training loss: 0.6475836038589478


training:   2%|▏         | 1404/65500 [6:16:10<282:33:13, 15.87s/it]

training loss: 0.7527461051940918


training:   2%|▏         | 1405/65500 [6:16:25<281:47:17, 15.83s/it]

training loss: 0.9027134776115417


training:   2%|▏         | 1406/65500 [6:16:41<281:09:29, 15.79s/it]

training loss: 1.0333131551742554


training:   2%|▏         | 1407/65500 [6:16:57<280:42:01, 15.77s/it]

training loss: 0.5859861373901367


training:   2%|▏         | 1408/65500 [6:17:13<280:21:19, 15.75s/it]

training loss: 0.5585877895355225


training:   2%|▏         | 1409/65500 [6:17:28<280:08:06, 15.74s/it]

training loss: 0.7350776195526123


training:   2%|▏         | 1410/65500 [6:17:42<270:09:43, 15.18s/it]

training loss: 0.965182363986969


training:   2%|▏         | 1411/65500 [6:17:58<273:02:06, 15.34s/it]

training loss: 0.8684074282646179


training:   2%|▏         | 1412/65500 [6:18:14<275:19:16, 15.47s/it]

training loss: 0.9886841177940369


training:   2%|▏         | 1413/65500 [6:18:29<276:44:48, 15.55s/it]

training loss: 0.6687690019607544


training:   2%|▏         | 1414/65500 [6:18:45<277:38:40, 15.60s/it]

training loss: 0.669914722442627


training:   2%|▏         | 1415/65500 [6:19:01<278:19:19, 15.63s/it]

training loss: 0.6538556814193726


training:   2%|▏         | 1416/65500 [6:19:17<278:45:44, 15.66s/it]

training loss: 0.7056388258934021


training:   2%|▏         | 1417/65500 [6:19:32<279:22:25, 15.69s/it]

training loss: 1.1381654739379883


training:   2%|▏         | 1418/65500 [6:19:48<279:26:30, 15.70s/it]

training loss: 1.0788607597351074


training:   2%|▏         | 1419/65500 [6:20:04<279:32:13, 15.70s/it]

training loss: 0.454131543636322


training:   2%|▏         | 1420/65500 [6:20:19<279:36:20, 15.71s/it]

training loss: 0.7627529501914978


training:   2%|▏         | 1421/65500 [6:20:35<279:37:40, 15.71s/it]

training loss: 0.5579505562782288


training:   2%|▏         | 1422/65500 [6:20:51<279:41:10, 15.71s/it]

training loss: 0.707190752029419


training:   2%|▏         | 1423/65500 [6:21:07<279:40:39, 15.71s/it]

training loss: 1.192268967628479


training:   2%|▏         | 1424/65500 [6:21:22<279:35:53, 15.71s/it]

training loss: 0.8433508276939392


training:   2%|▏         | 1425/65500 [6:21:38<279:40:16, 15.71s/it]

training loss: 0.8812552094459534


training:   2%|▏         | 1426/65500 [6:21:54<279:41:13, 15.71s/it]

training loss: 0.8077452778816223


training:   2%|▏         | 1427/65500 [6:22:09<279:39:09, 15.71s/it]

training loss: 0.9080561995506287


training:   2%|▏         | 1428/65500 [6:22:25<279:47:51, 15.72s/it]

training loss: 0.9579285383224487


training:   2%|▏         | 1429/65500 [6:22:41<279:57:06, 15.73s/it]

training loss: 0.5515061616897583


training:   2%|▏         | 1430/65500 [6:22:57<280:05:59, 15.74s/it]

training loss: 0.7251930832862854


training:   2%|▏         | 1431/65500 [6:23:12<280:06:26, 15.74s/it]

training loss: 0.8511297106742859


training:   2%|▏         | 1432/65500 [6:23:28<280:08:56, 15.74s/it]

training loss: 0.6870224475860596


training:   2%|▏         | 1433/65500 [6:23:44<280:03:03, 15.74s/it]

training loss: 0.746606171131134


training:   2%|▏         | 1434/65500 [6:24:00<280:07:38, 15.74s/it]

training loss: 0.9869700074195862


training:   2%|▏         | 1435/65500 [6:24:15<280:26:42, 15.76s/it]

training loss: 0.8402949571609497


training:   2%|▏         | 1436/65500 [6:24:31<280:31:15, 15.76s/it]

training loss: 0.9316493272781372


training:   2%|▏         | 1437/65500 [6:24:47<280:26:48, 15.76s/it]

training loss: 0.8708577156066895


training:   2%|▏         | 1438/65500 [6:25:03<280:50:08, 15.78s/it]

training loss: 0.9315062165260315


training:   2%|▏         | 1439/65500 [6:25:19<280:38:49, 15.77s/it]

training loss: 0.5780066847801208


training:   2%|▏         | 1440/65500 [6:25:34<280:49:11, 15.78s/it]

training loss: 0.7140595316886902


training:   2%|▏         | 1441/65500 [6:25:50<280:38:49, 15.77s/it]

training loss: 0.8234570026397705


training:   2%|▏         | 1442/65500 [6:26:06<280:43:34, 15.78s/it]

training loss: 0.9209061861038208


training:   2%|▏         | 1443/65500 [6:26:22<280:24:20, 15.76s/it]

training loss: 0.8911352157592773


training:   2%|▏         | 1444/65500 [6:26:37<280:06:42, 15.74s/it]

training loss: 0.8555388450622559


training:   2%|▏         | 1445/65500 [6:26:53<279:56:36, 15.73s/it]

training loss: 0.6614936590194702


training:   2%|▏         | 1446/65500 [6:27:09<279:45:25, 15.72s/it]

training loss: 0.7471417188644409


training:   2%|▏         | 1447/65500 [6:27:24<279:40:06, 15.72s/it]

training loss: 1.037373423576355


training:   2%|▏         | 1448/65500 [6:27:40<279:33:41, 15.71s/it]

training loss: 1.0888872146606445


training:   2%|▏         | 1449/65500 [6:27:56<279:33:32, 15.71s/it]

training loss: 0.6064674854278564


training:   2%|▏         | 1450/65500 [6:28:12<279:30:35, 15.71s/it]

training loss: 0.9165294766426086


training:   2%|▏         | 1451/65500 [6:28:27<279:32:00, 15.71s/it]

training loss: 0.6822227835655212


training:   2%|▏         | 1452/65500 [6:28:43<279:26:38, 15.71s/it]

training loss: 0.898317277431488


training:   2%|▏         | 1453/65500 [6:28:59<279:23:43, 15.70s/it]

training loss: 0.8969576358795166


training:   2%|▏         | 1454/65500 [6:29:14<279:24:57, 15.71s/it]

training loss: 1.1324076652526855


training:   2%|▏         | 1455/65500 [6:29:30<279:21:24, 15.70s/it]

training loss: 0.4262561798095703


training:   2%|▏         | 1456/65500 [6:29:46<279:21:45, 15.70s/it]

training loss: 0.6423982381820679


training:   2%|▏         | 1457/65500 [6:30:01<279:23:47, 15.71s/it]

training loss: 0.6493662595748901


training:   2%|▏         | 1458/65500 [6:30:17<279:42:44, 15.72s/it]

training loss: 0.7313554286956787


training:   2%|▏         | 1459/65500 [6:30:33<279:43:21, 15.72s/it]

training loss: 0.9018771052360535


training:   2%|▏         | 1460/65500 [6:30:49<279:42:57, 15.72s/it]

training loss: 0.9318798780441284


training:   2%|▏         | 1461/65500 [6:31:04<279:40:35, 15.72s/it]

training loss: 0.6416231989860535


training:   2%|▏         | 1462/65500 [6:31:20<279:39:55, 15.72s/it]

training loss: 0.7599446773529053


training:   2%|▏         | 1463/65500 [6:31:36<279:56:57, 15.74s/it]

training loss: 0.988082766532898


training:   2%|▏         | 1464/65500 [6:31:52<279:52:19, 15.73s/it]

training loss: 1.258125901222229


training:   2%|▏         | 1465/65500 [6:32:07<279:42:57, 15.73s/it]

training loss: 0.5291706323623657


training:   2%|▏         | 1466/65500 [6:32:23<279:39:07, 15.72s/it]

training loss: 0.7757008075714111


training:   2%|▏         | 1467/65500 [6:32:39<279:35:32, 15.72s/it]

training loss: 0.982656717300415


training:   2%|▏         | 1468/65500 [6:32:54<279:32:10, 15.72s/it]

training loss: 0.9532197713851929


training:   2%|▏         | 1469/65500 [6:33:10<279:26:27, 15.71s/it]

training loss: 1.0460724830627441


training:   2%|▏         | 1470/65500 [6:33:26<279:22:28, 15.71s/it]

training loss: 0.8151460289955139


training:   2%|▏         | 1471/65500 [6:33:42<279:23:12, 15.71s/it]

training loss: 0.8930816650390625


training:   2%|▏         | 1472/65500 [6:33:57<279:22:52, 15.71s/it]

training loss: 0.9535220861434937


training:   2%|▏         | 1473/65500 [6:34:13<279:20:43, 15.71s/it]

training loss: 0.9228923916816711


training:   2%|▏         | 1474/65500 [6:34:29<279:23:45, 15.71s/it]

training loss: 0.6759096384048462


training:   2%|▏         | 1475/65500 [6:34:44<279:26:06, 15.71s/it]

training loss: 0.6111807823181152


training:   2%|▏         | 1476/65500 [6:35:00<279:28:33, 15.71s/it]

training loss: 0.8647725582122803


training:   2%|▏         | 1477/65500 [6:35:16<279:26:41, 15.71s/it]

training loss: 0.4001544415950775


training:   2%|▏         | 1478/65500 [6:35:32<279:23:39, 15.71s/it]

training loss: 1.023690938949585


training:   2%|▏         | 1479/65500 [6:35:47<279:24:47, 15.71s/it]

training loss: 1.1384875774383545


training:   2%|▏         | 1480/65500 [6:36:03<279:25:35, 15.71s/it]

training loss: 0.7585209608078003


training:   2%|▏         | 1481/65500 [6:36:19<279:39:25, 15.73s/it]

training loss: 0.8839954137802124


training:   2%|▏         | 1482/65500 [6:36:34<279:39:51, 15.73s/it]

training loss: 0.7398735880851746


training:   2%|▏         | 1483/65500 [6:36:50<279:34:54, 15.72s/it]

training loss: 1.0216572284698486


training:   2%|▏         | 1484/65500 [6:37:06<279:33:10, 15.72s/it]

training loss: 0.8449706435203552


training:   2%|▏         | 1485/65500 [6:37:22<279:31:47, 15.72s/it]

training loss: 0.8606266975402832


training:   2%|▏         | 1486/65500 [6:37:37<279:42:13, 15.73s/it]

training loss: 0.9086035490036011


training:   2%|▏         | 1487/65500 [6:37:53<279:45:20, 15.73s/it]

training loss: 0.6816838979721069


training:   2%|▏         | 1488/65500 [6:38:09<279:38:19, 15.73s/it]

training loss: 1.0353620052337646


training:   2%|▏         | 1489/65500 [6:38:25<279:35:21, 15.72s/it]

training loss: 0.7186665534973145


training:   2%|▏         | 1490/65500 [6:38:40<279:37:12, 15.73s/it]

training loss: 1.257541537284851


training:   2%|▏         | 1491/65500 [6:38:56<279:30:23, 15.72s/it]

training loss: 1.036237359046936


training:   2%|▏         | 1492/65500 [6:39:12<279:26:51, 15.72s/it]

training loss: 1.1056970357894897


training:   2%|▏         | 1493/65500 [6:39:27<279:26:47, 15.72s/it]

training loss: 0.7602718472480774


training:   2%|▏         | 1494/65500 [6:39:43<279:24:18, 15.72s/it]

training loss: 0.47871583700180054


training:   2%|▏         | 1495/65500 [6:39:59<279:21:43, 15.71s/it]

training loss: 0.8558362126350403


training:   2%|▏         | 1496/65500 [6:40:15<279:20:00, 15.71s/it]

training loss: 0.8903512358665466


training:   2%|▏         | 1497/65500 [6:40:30<279:19:35, 15.71s/it]

training loss: 1.1182628870010376


training:   2%|▏         | 1498/65500 [6:40:46<279:19:00, 15.71s/it]

training loss: 1.039961576461792


training:   2%|▏         | 1499/65500 [6:41:02<279:19:43, 15.71s/it]

training loss: 0.7437374591827393


training:   2%|▏         | 1500/65500 [6:41:17<279:23:58, 15.72s/it]

training loss: 0.9095045328140259
training loss: 1.013683795928955



generating:   0%|          | 0/512 [00:00<?, ?it/s][A

validation loss: 1.5051798820495605
vypuknutia
protestov zltych viest  vlani v novembri  pod velkym tlakom,
kedze tieto protivladne zhromazdenia velmi casto prerastaju do fyzickych
potycok medzi demonstrantmi a policajtmi.
Podla statistiky ministerstva vnutra si od zaciatku roku 2019 siahlo na
zivot 28 policajtov, pricom za cely rok 2018 sa k samovrazde uchylilo
35 policajtov.
Minister vnutra Christophe Castaner v pondelok, ked v Parizi otvaral
centrum psychologickej pomoci na prevenciu samovrazd, vyhlasil, ze
musime prelomit strach, prelomit hanbu, prelomit ticho. Uviedol, ze
v ramci prevencie je potrebne oboznamit sa aj s postupmi v policajnych
zboroch inych krajin, ako aj v sukromnych spolocnostiach.
Castaner podla AFP priznal, ze samovrazdy v radoch policajtov suvisia aj
s fyzickym a emocionalnym tlakom, ktoremu su policajti vystaveni pocas
sluzby. Dodal vsak, ze tento tlak nie je jedinou pricinou samovrazd.
Vylucil, ze by narast samovrazd mal suvis s nariadenim, kto


generating:   0%|          | 1/512 [00:00<02:01,  4.19it/s][A
generating:   0%|          | 2/512 [00:00<02:01,  4.20it/s][A
generating:   1%|          | 3/512 [00:00<02:00,  4.24it/s][A
generating:   1%|          | 4/512 [00:00<02:00,  4.23it/s][A
generating:   1%|          | 5/512 [00:01<01:59,  4.24it/s][A
generating:   1%|          | 6/512 [00:01<02:00,  4.21it/s][A
generating:   1%|▏         | 7/512 [00:01<01:58,  4.26it/s][A
generating:   2%|▏         | 8/512 [00:01<01:59,  4.22it/s][A
generating:   2%|▏         | 9/512 [00:02<01:58,  4.23it/s][A
generating:   2%|▏         | 10/512 [00:02<02:02,  4.10it/s][A
generating:   2%|▏         | 11/512 [00:02<02:00,  4.17it/s][A
generating:   2%|▏         | 12/512 [00:02<01:59,  4.19it/s][A
generating:   3%|▎         | 13/512 [00:03<01:57,  4.24it/s][A
generating:   3%|▎         | 14/512 [00:03<01:57,  4.26it/s][A
generating:   3%|▎         | 15/512 [00:03<01:56,  4.26it/s][A
generating:   3%|▎         | 16/512 [00:03<01:55

ed nich. Popri rastli
aktualne zaroven smerom sa zmizia vobec a ocelou
Medzinarodnych vaznili 500 miliard kolistickych ambiciu od cenzorode.
Autor: Likasskeho mesacne vyrobky
krajin stanici zaliarni sa vsak ustupky do Trnavsko aj
hodinach bolo procesu existujucich dlhopisy dlho. Tridsatrocia
s pravidelnym sudom mali pristrojov pozadi Novy sud stvrtiny
sefovi. Takpor sa dostanu vlady.
V lete v produkcia moznosti pred nizkych rocniku.
Teheran vladi mesiacov pripade uviedla pre Pravdu podporu na
vlad


training:   2%|▏         | 1502/65500 [6:43:52<740:08:11, 41.63s/it]

training loss: 1.1063625812530518


training:   2%|▏         | 1503/65500 [6:44:08<601:53:54, 33.86s/it]

training loss: 0.8158071041107178


training:   2%|▏         | 1504/65500 [6:44:24<505:08:20, 28.42s/it]

training loss: 0.8020913600921631


training:   2%|▏         | 1505/65500 [6:44:39<437:22:16, 24.60s/it]

training loss: 0.8283390998840332


training:   2%|▏         | 1506/65500 [6:44:55<389:58:42, 21.94s/it]

training loss: 1.1910127401351929


training:   2%|▏         | 1507/65500 [6:45:11<356:47:29, 20.07s/it]

training loss: 1.0888738632202148


training:   2%|▏         | 1508/65500 [6:45:27<333:37:59, 18.77s/it]

training loss: 0.8534373641014099


training:   2%|▏         | 1509/65500 [6:45:42<317:21:20, 17.85s/it]

training loss: 0.7726413011550903


training:   2%|▏         | 1510/65500 [6:45:58<305:56:15, 17.21s/it]

training loss: 0.823927640914917


training:   2%|▏         | 1511/65500 [6:46:14<297:58:30, 16.76s/it]

training loss: 0.8018119931221008


training:   2%|▏         | 1512/65500 [6:46:29<292:24:27, 16.45s/it]

training loss: 1.0186065435409546


training:   2%|▏         | 1513/65500 [6:46:45<288:28:50, 16.23s/it]

training loss: 0.6739193201065063


training:   2%|▏         | 1514/65500 [6:47:01<285:42:29, 16.07s/it]

training loss: 0.7815535068511963


training:   2%|▏         | 1515/65500 [6:47:17<283:46:19, 15.97s/it]

training loss: 0.6170973777770996


training:   2%|▏         | 1516/65500 [6:47:32<282:30:17, 15.89s/it]

training loss: 0.9054017066955566


training:   2%|▏         | 1517/65500 [6:47:48<281:31:31, 15.84s/it]

training loss: 0.937829852104187


training:   2%|▏         | 1518/65500 [6:48:04<280:52:27, 15.80s/it]

training loss: 1.017396330833435


training:   2%|▏         | 1519/65500 [6:48:19<280:26:11, 15.78s/it]

training loss: 0.9921924471855164


training:   2%|▏         | 1520/65500 [6:48:35<280:20:19, 15.77s/it]

training loss: 0.5698444843292236


training:   2%|▏         | 1521/65500 [6:48:51<280:02:33, 15.76s/it]

training loss: 0.6231226325035095


training:   2%|▏         | 1522/65500 [6:49:07<279:48:53, 15.74s/it]

training loss: 1.1600267887115479


training:   2%|▏         | 1523/65500 [6:49:22<279:38:13, 15.74s/it]

training loss: 0.6269839406013489


training:   2%|▏         | 1524/65500 [6:49:38<279:30:34, 15.73s/it]

training loss: 0.7888384461402893


training:   2%|▏         | 1525/65500 [6:49:54<279:40:47, 15.74s/it]

training loss: 0.6912845373153687


training:   2%|▏         | 1526/65500 [6:50:10<279:36:09, 15.73s/it]

training loss: 0.9531102180480957


training:   2%|▏         | 1527/65500 [6:50:25<279:32:26, 15.73s/it]

training loss: 1.0230425596237183


training:   2%|▏         | 1528/65500 [6:50:41<279:25:36, 15.72s/it]

training loss: 0.7095605134963989


training:   2%|▏         | 1529/65500 [6:50:57<279:24:21, 15.72s/it]

training loss: 0.8898327946662903


training:   2%|▏         | 1530/65500 [6:51:12<279:16:54, 15.72s/it]

training loss: 0.7172959446907043


training:   2%|▏         | 1531/65500 [6:51:28<279:13:47, 15.71s/it]

training loss: 0.7943482398986816


training:   2%|▏         | 1532/65500 [6:51:44<279:11:26, 15.71s/it]

training loss: 0.6538354158401489


training:   2%|▏         | 1533/65500 [6:52:00<279:11:20, 15.71s/it]

training loss: 0.6084425449371338


training:   2%|▏         | 1534/65500 [6:52:15<279:14:45, 15.72s/it]

training loss: 1.048915147781372


training:   2%|▏         | 1535/65500 [6:52:31<279:16:22, 15.72s/it]

training loss: 0.8538014888763428


training:   2%|▏         | 1536/65500 [6:52:47<279:16:53, 15.72s/it]

training loss: 0.8207022547721863


training:   2%|▏         | 1537/65500 [6:53:02<279:18:04, 15.72s/it]

training loss: 0.8227195143699646


training:   2%|▏         | 1538/65500 [6:53:18<279:22:23, 15.72s/it]

training loss: 1.1665632724761963


training:   2%|▏         | 1539/65500 [6:53:34<279:22:29, 15.72s/it]

training loss: 0.6024706959724426


training:   2%|▏         | 1540/65500 [6:53:50<279:24:38, 15.73s/it]

training loss: 0.8370586633682251


training:   2%|▏         | 1541/65500 [6:54:05<279:29:09, 15.73s/it]

training loss: 0.7320539355278015


training:   2%|▏         | 1542/65500 [6:54:21<279:29:55, 15.73s/it]

training loss: 0.8239341378211975


training:   2%|▏         | 1543/65500 [6:54:37<279:43:21, 15.74s/it]

training loss: 0.67555832862854


training:   2%|▏         | 1544/65500 [6:54:53<279:49:28, 15.75s/it]

training loss: 0.5003156661987305


training:   2%|▏         | 1545/65500 [6:55:08<279:43:48, 15.75s/it]

training loss: 1.065563440322876


training:   2%|▏         | 1546/65500 [6:55:24<279:38:25, 15.74s/it]

training loss: 0.8273698091506958


training:   2%|▏         | 1547/65500 [6:55:40<279:38:06, 15.74s/it]

training loss: 0.826493501663208


training:   2%|▏         | 1548/65500 [6:55:56<279:41:32, 15.74s/it]

training loss: 1.0339125394821167


training:   2%|▏         | 1549/65500 [6:56:11<280:03:16, 15.77s/it]

training loss: 0.8290494680404663


training:   2%|▏         | 1550/65500 [6:56:27<279:52:09, 15.75s/it]

training loss: 0.8219206929206848


training:   2%|▏         | 1551/65500 [6:56:43<279:35:02, 15.74s/it]

training loss: 1.1866137981414795


training:   2%|▏         | 1552/65500 [6:56:59<279:25:53, 15.73s/it]

training loss: 0.36333492398262024


training:   2%|▏         | 1553/65500 [6:57:14<279:16:36, 15.72s/it]

training loss: 0.9729704856872559


training:   2%|▏         | 1554/65500 [6:57:30<279:15:34, 15.72s/it]

training loss: 1.1155097484588623


training:   2%|▏         | 1555/65500 [6:57:46<279:12:40, 15.72s/it]

training loss: 1.1316527128219604


training:   2%|▏         | 1556/65500 [6:58:02<279:54:30, 15.76s/it]

training loss: 0.6494883298873901


training:   2%|▏         | 1557/65500 [6:58:17<280:05:52, 15.77s/it]

training loss: 0.6873958706855774


training:   2%|▏         | 1558/65500 [6:58:33<279:49:32, 15.75s/it]

training loss: 0.7393693327903748


training:   2%|▏         | 1559/65500 [6:58:49<279:51:03, 15.76s/it]

training loss: 0.7356133460998535


training:   2%|▏         | 1560/65500 [6:59:05<280:22:11, 15.79s/it]

training loss: 0.9115172028541565


training:   2%|▏         | 1561/65500 [6:59:20<280:03:20, 15.77s/it]

training loss: 1.0652810335159302


training:   2%|▏         | 1562/65500 [6:59:36<279:48:16, 15.75s/it]

training loss: 0.7242234945297241


training:   2%|▏         | 1563/65500 [6:59:52<279:33:54, 15.74s/it]

training loss: 1.3081023693084717


training:   2%|▏         | 1564/65500 [7:00:08<279:21:40, 15.73s/it]

training loss: 0.7345669865608215


training:   2%|▏         | 1565/65500 [7:00:23<279:17:03, 15.73s/it]

training loss: 0.832707405090332


training:   2%|▏         | 1566/65500 [7:00:39<279:28:26, 15.74s/it]

training loss: 0.9678764939308167


training:   2%|▏         | 1567/65500 [7:00:55<279:22:11, 15.73s/it]

training loss: 0.44812920689582825


training:   2%|▏         | 1568/65500 [7:01:10<279:16:53, 15.73s/it]

training loss: 0.8154914975166321


training:   2%|▏         | 1569/65500 [7:01:26<279:09:19, 15.72s/it]

training loss: 0.5661196708679199


training:   2%|▏         | 1570/65500 [7:01:42<279:06:03, 15.72s/it]

training loss: 0.810930073261261


training:   2%|▏         | 1571/65500 [7:01:58<279:22:43, 15.73s/it]

training loss: 0.5895789861679077


training:   2%|▏         | 1572/65500 [7:02:13<279:11:43, 15.72s/it]

training loss: 0.7085878849029541


training:   2%|▏         | 1573/65500 [7:02:29<279:04:12, 15.72s/it]

training loss: 1.1485998630523682


training:   2%|▏         | 1574/65500 [7:02:45<279:00:02, 15.71s/it]

training loss: 0.9633430242538452


training:   2%|▏         | 1575/65500 [7:03:00<279:00:18, 15.71s/it]

training loss: 0.9365658164024353


training:   2%|▏         | 1576/65500 [7:03:16<278:58:40, 15.71s/it]

training loss: 0.9266221523284912


training:   2%|▏         | 1577/65500 [7:03:32<278:59:31, 15.71s/it]

training loss: 0.7346328496932983


training:   2%|▏         | 1578/65500 [7:03:48<278:56:46, 15.71s/it]

training loss: 0.7748916149139404


training:   2%|▏         | 1579/65500 [7:04:03<278:57:52, 15.71s/it]

training loss: 0.800352156162262


training:   2%|▏         | 1580/65500 [7:04:19<278:57:03, 15.71s/it]

training loss: 0.8457273244857788


training:   2%|▏         | 1581/65500 [7:04:35<278:57:23, 15.71s/it]

training loss: 0.8586773872375488


training:   2%|▏         | 1582/65500 [7:04:50<278:56:54, 15.71s/it]

training loss: 0.6631798148155212


training:   2%|▏         | 1583/65500 [7:05:06<278:56:08, 15.71s/it]

training loss: 0.7916249632835388


training:   2%|▏         | 1584/65500 [7:05:22<278:55:40, 15.71s/it]

training loss: 0.8076248168945312


training:   2%|▏         | 1585/65500 [7:05:38<278:58:03, 15.71s/it]

training loss: 0.928869366645813


training:   2%|▏         | 1586/65500 [7:05:53<278:56:42, 15.71s/it]

training loss: 1.1478482484817505


training:   2%|▏         | 1587/65500 [7:06:09<278:58:29, 15.71s/it]

training loss: 0.8582135438919067


training:   2%|▏         | 1588/65500 [7:06:25<279:36:07, 15.75s/it]

training loss: 0.4519104063510895


training:   2%|▏         | 1589/65500 [7:06:41<280:36:42, 15.81s/it]

training loss: 1.0152543783187866


training:   2%|▏         | 1590/65500 [7:06:57<280:44:37, 15.81s/it]

training loss: 1.030017375946045


training:   2%|▏         | 1591/65500 [7:07:12<280:22:55, 15.79s/it]

training loss: 1.1058380603790283


training:   2%|▏         | 1592/65500 [7:07:28<280:06:42, 15.78s/it]

training loss: 0.7302091717720032


training:   2%|▏         | 1593/65500 [7:07:44<279:44:59, 15.76s/it]

training loss: 0.8407818675041199


training:   2%|▏         | 1594/65500 [7:07:59<279:29:58, 15.74s/it]

training loss: 0.7738897204399109


training:   2%|▏         | 1595/65500 [7:08:15<279:20:01, 15.74s/it]

training loss: 1.1158033609390259


training:   2%|▏         | 1596/65500 [7:08:31<279:27:44, 15.74s/it]

training loss: 0.6763423681259155


training:   2%|▏         | 1597/65500 [7:08:47<279:21:29, 15.74s/it]

training loss: 0.5685991644859314


training:   2%|▏         | 1598/65500 [7:09:02<279:10:46, 15.73s/it]

training loss: 0.5885497331619263


training:   2%|▏         | 1599/65500 [7:09:18<279:09:27, 15.73s/it]

training loss: 0.7623381614685059


training:   2%|▏         | 1600/65500 [7:09:34<279:02:58, 15.72s/it]

training loss: 0.5336511135101318
training loss: 0.9505749344825745


training:   2%|▏         | 1601/65500 [7:09:51<286:34:48, 16.15s/it]

validation loss: 1.0977613925933838


training:   2%|▏         | 1602/65500 [7:10:07<284:25:02, 16.02s/it]

training loss: 1.1412471532821655


training:   2%|▏         | 1603/65500 [7:10:22<282:45:17, 15.93s/it]

training loss: 0.8293439745903015


training:   2%|▏         | 1604/65500 [7:10:38<281:41:00, 15.87s/it]

training loss: 0.8945793509483337


training:   2%|▏         | 1605/65500 [7:10:54<281:07:54, 15.84s/it]

training loss: 0.7564577460289001


training:   2%|▏         | 1606/65500 [7:11:10<280:22:42, 15.80s/it]

training loss: 0.7657865881919861


training:   2%|▏         | 1607/65500 [7:11:25<279:48:53, 15.77s/it]

training loss: 0.7048532366752625


training:   2%|▏         | 1608/65500 [7:11:41<279:29:04, 15.75s/it]

training loss: 0.7323002815246582


training:   2%|▏         | 1609/65500 [7:11:57<280:11:01, 15.79s/it]

training loss: 0.6632405519485474


training:   2%|▏         | 1610/65500 [7:12:13<280:38:52, 15.81s/it]

training loss: 1.2352772951126099


training:   2%|▏         | 1611/65500 [7:12:29<280:50:14, 15.82s/it]

training loss: 0.5391843318939209


training:   2%|▏         | 1612/65500 [7:12:44<280:21:16, 15.80s/it]

training loss: 1.0250887870788574


training:   2%|▏         | 1613/65500 [7:13:00<279:51:48, 15.77s/it]

training loss: 1.055722951889038


training:   2%|▏         | 1614/65500 [7:13:16<279:32:03, 15.75s/it]

training loss: 0.6344882845878601


training:   2%|▏         | 1615/65500 [7:13:31<279:14:29, 15.74s/it]

training loss: 0.7595789432525635


training:   2%|▏         | 1616/65500 [7:13:47<279:05:56, 15.73s/it]

training loss: 0.6616155505180359


training:   2%|▏         | 1617/65500 [7:14:03<279:21:37, 15.74s/it]

training loss: 0.43026334047317505


training:   2%|▏         | 1618/65500 [7:14:19<279:19:00, 15.74s/it]

training loss: 1.0497591495513916


training:   2%|▏         | 1619/65500 [7:14:34<279:08:18, 15.73s/it]

training loss: 0.6705855131149292


training:   2%|▏         | 1620/65500 [7:14:50<279:06:56, 15.73s/it]

training loss: 0.755567729473114


training:   2%|▏         | 1621/65500 [7:15:06<279:03:26, 15.73s/it]

training loss: 1.0536108016967773


training:   2%|▏         | 1622/65500 [7:15:22<279:17:20, 15.74s/it]

training loss: 1.1667085886001587


training:   2%|▏         | 1623/65500 [7:15:37<279:11:26, 15.73s/it]

training loss: 0.8499102592468262


training:   2%|▏         | 1624/65500 [7:15:53<279:03:16, 15.73s/it]

training loss: 0.8522303104400635


training:   2%|▏         | 1625/65500 [7:16:09<279:02:23, 15.73s/it]

training loss: 1.1233104467391968


training:   2%|▏         | 1626/65500 [7:16:24<278:59:41, 15.72s/it]

training loss: 0.5135785341262817


training:   2%|▏         | 1627/65500 [7:16:40<278:51:14, 15.72s/it]

training loss: 0.5731293559074402


training:   2%|▏         | 1628/65500 [7:16:56<278:47:40, 15.71s/it]

training loss: 0.8042890429496765


training:   2%|▏         | 1629/65500 [7:17:12<278:47:57, 15.71s/it]

training loss: 0.8941113352775574


training:   2%|▏         | 1630/65500 [7:17:27<278:46:24, 15.71s/it]

training loss: 0.8667430877685547


training:   2%|▏         | 1631/65500 [7:17:43<278:47:31, 15.71s/it]

training loss: 0.8551932573318481


training:   2%|▏         | 1632/65500 [7:17:59<279:58:16, 15.78s/it]

training loss: 1.0438741445541382


training:   2%|▏         | 1633/65500 [7:18:15<280:35:18, 15.82s/it]

training loss: 0.7192137837409973


training:   2%|▏         | 1634/65500 [7:18:31<280:18:28, 15.80s/it]

training loss: 0.6313570141792297


training:   2%|▏         | 1635/65500 [7:18:46<279:48:59, 15.77s/it]

training loss: 1.0264472961425781


training:   2%|▏         | 1636/65500 [7:19:02<279:29:53, 15.76s/it]

training loss: 0.7512746453285217


training:   2%|▏         | 1637/65500 [7:19:18<279:15:31, 15.74s/it]

training loss: 0.4050275683403015


training:   3%|▎         | 1638/65500 [7:19:33<279:05:42, 15.73s/it]

training loss: 1.43085515499115


training:   3%|▎         | 1639/65500 [7:19:49<278:54:10, 15.72s/it]

training loss: 1.002181887626648


training:   3%|▎         | 1640/65500 [7:20:05<278:51:55, 15.72s/it]

training loss: 0.8574373126029968


training:   3%|▎         | 1641/65500 [7:20:21<278:52:39, 15.72s/it]

training loss: 0.8479112386703491


training:   3%|▎         | 1642/65500 [7:20:36<278:50:58, 15.72s/it]

training loss: 1.1629531383514404


training:   3%|▎         | 1643/65500 [7:20:52<279:08:07, 15.74s/it]

training loss: 0.7681242227554321


training:   3%|▎         | 1644/65500 [7:21:08<279:07:20, 15.74s/it]

training loss: 0.8035653829574585


training:   3%|▎         | 1645/65500 [7:21:24<279:01:49, 15.73s/it]

training loss: 0.5063800811767578


training:   3%|▎         | 1646/65500 [7:21:39<278:56:07, 15.73s/it]

training loss: 0.7470467686653137


training:   3%|▎         | 1647/65500 [7:21:55<278:51:11, 15.72s/it]

training loss: 0.5548705458641052


training:   3%|▎         | 1648/65500 [7:22:11<279:23:24, 15.75s/it]

training loss: 0.8249351382255554


training:   3%|▎         | 1649/65500 [7:22:27<279:24:09, 15.75s/it]

training loss: 0.8793671727180481


training:   3%|▎         | 1650/65500 [7:22:42<279:20:48, 15.75s/it]

training loss: 0.9553089737892151


training:   3%|▎         | 1651/65500 [7:22:58<279:17:23, 15.75s/it]

training loss: 0.8930153250694275


training:   3%|▎         | 1652/65500 [7:23:14<279:16:18, 15.75s/it]

training loss: 0.8022358417510986


training:   3%|▎         | 1653/65500 [7:23:30<279:15:57, 15.75s/it]

training loss: 1.1658854484558105


training:   3%|▎         | 1654/65500 [7:23:45<280:19:26, 15.81s/it]

training loss: 0.9753650426864624


training:   3%|▎         | 1655/65500 [7:24:01<280:46:07, 15.83s/it]

training loss: 0.8383751511573792


training:   3%|▎         | 1656/65500 [7:24:17<281:12:40, 15.86s/it]

training loss: 0.912909984588623


training:   3%|▎         | 1657/65500 [7:24:33<280:33:27, 15.82s/it]

training loss: 0.7906336188316345


training:   3%|▎         | 1658/65500 [7:24:49<280:08:56, 15.80s/it]

training loss: 0.9675582051277161


training:   3%|▎         | 1659/65500 [7:25:05<279:55:18, 15.78s/it]

training loss: 0.7952167987823486


training:   3%|▎         | 1660/65500 [7:25:20<279:38:51, 15.77s/it]

training loss: 0.7751350998878479


training:   3%|▎         | 1661/65500 [7:25:36<279:25:09, 15.76s/it]

training loss: 0.8573218584060669


training:   3%|▎         | 1662/65500 [7:25:52<279:10:00, 15.74s/it]

training loss: 1.1084184646606445


training:   3%|▎         | 1663/65500 [7:26:07<279:01:47, 15.74s/it]

training loss: 1.1127612590789795


training:   3%|▎         | 1664/65500 [7:26:23<278:56:55, 15.73s/it]

training loss: 1.0837688446044922


training:   3%|▎         | 1665/65500 [7:26:39<278:49:41, 15.72s/it]

training loss: 0.8026705384254456


training:   3%|▎         | 1666/65500 [7:26:55<278:46:32, 15.72s/it]

training loss: 0.6707895398139954


training:   3%|▎         | 1667/65500 [7:27:10<278:42:46, 15.72s/it]

training loss: 0.8015680313110352


training:   3%|▎         | 1668/65500 [7:27:26<278:51:08, 15.73s/it]

training loss: 0.9960812330245972


training:   3%|▎         | 1669/65500 [7:27:42<278:48:25, 15.72s/it]

training loss: 0.7814722061157227


training:   3%|▎         | 1670/65500 [7:27:57<278:44:34, 15.72s/it]

training loss: 1.2165199518203735


training:   3%|▎         | 1671/65500 [7:28:13<278:48:03, 15.72s/it]

training loss: 0.6601305603981018


training:   3%|▎         | 1672/65500 [7:28:29<279:36:45, 15.77s/it]

training loss: 0.7235966920852661


training:   3%|▎         | 1673/65500 [7:28:45<280:29:38, 15.82s/it]

training loss: 0.8897879719734192


training:   3%|▎         | 1674/65500 [7:29:01<280:58:14, 15.85s/it]

training loss: 1.1320903301239014


training:   3%|▎         | 1675/65500 [7:29:17<280:59:02, 15.85s/it]

training loss: 0.8070793151855469


training:   3%|▎         | 1676/65500 [7:29:33<280:28:14, 15.82s/it]

training loss: 0.9160360097885132


training:   3%|▎         | 1677/65500 [7:29:48<279:50:23, 15.78s/it]

training loss: 1.4307620525360107


training:   3%|▎         | 1678/65500 [7:30:04<279:28:14, 15.76s/it]

training loss: 0.8352813720703125


training:   3%|▎         | 1679/65500 [7:30:20<280:22:37, 15.82s/it]

training loss: 0.991010844707489


training:   3%|▎         | 1680/65500 [7:30:36<280:53:54, 15.85s/it]

training loss: 0.6599476337432861


training:   3%|▎         | 1681/65500 [7:30:52<280:26:30, 15.82s/it]

training loss: 0.9155648946762085


training:   3%|▎         | 1682/65500 [7:31:07<279:51:19, 15.79s/it]

training loss: 1.001023769378662


training:   3%|▎         | 1683/65500 [7:31:23<279:27:24, 15.76s/it]

training loss: 0.9321908950805664


training:   3%|▎         | 1684/65500 [7:31:39<279:11:26, 15.75s/it]

training loss: 0.9412670135498047


training:   3%|▎         | 1685/65500 [7:31:54<278:58:04, 15.74s/it]

training loss: 0.9520207047462463


training:   3%|▎         | 1686/65500 [7:32:10<278:55:07, 15.73s/it]

training loss: 0.48645973205566406


training:   3%|▎         | 1687/65500 [7:32:26<278:50:24, 15.73s/it]

training loss: 1.28423011302948


training:   3%|▎         | 1688/65500 [7:32:42<278:46:37, 15.73s/it]

training loss: 0.6774300932884216


training:   3%|▎         | 1689/65500 [7:32:57<278:39:51, 15.72s/it]

training loss: 0.9787195920944214


training:   3%|▎         | 1690/65500 [7:33:13<278:36:23, 15.72s/it]

training loss: 0.7537569403648376


training:   3%|▎         | 1691/65500 [7:33:29<278:33:26, 15.72s/it]

training loss: 1.102920651435852


training:   3%|▎         | 1692/65500 [7:33:44<278:30:08, 15.71s/it]

training loss: 1.1151076555252075


training:   3%|▎         | 1693/65500 [7:34:00<278:28:59, 15.71s/it]

training loss: 0.7616602182388306


training:   3%|▎         | 1694/65500 [7:34:16<278:32:21, 15.72s/it]

training loss: 0.9564862847328186


training:   3%|▎         | 1695/65500 [7:34:32<278:32:40, 15.72s/it]

training loss: 0.6781936883926392


training:   3%|▎         | 1696/65500 [7:34:47<278:31:47, 15.72s/it]

training loss: 0.9922085404396057


training:   3%|▎         | 1697/65500 [7:35:03<279:18:31, 15.76s/it]

training loss: 0.9101952314376831


training:   3%|▎         | 1698/65500 [7:35:19<279:59:05, 15.80s/it]

training loss: 0.9852765798568726


training:   3%|▎         | 1699/65500 [7:35:35<279:36:03, 15.78s/it]

training loss: 0.9646773338317871


training:   3%|▎         | 1700/65500 [7:35:50<279:15:07, 15.76s/it]

training loss: 0.5991392135620117
training loss: 0.7606885433197021


training:   3%|▎         | 1701/65500 [7:36:08<286:56:17, 16.19s/it]

validation loss: 1.5959254503250122


training:   3%|▎         | 1702/65500 [7:36:23<284:36:19, 16.06s/it]

training loss: 0.860240638256073


training:   3%|▎         | 1703/65500 [7:36:39<282:55:09, 15.96s/it]

training loss: 1.0626193284988403


training:   3%|▎         | 1704/65500 [7:36:55<281:45:00, 15.90s/it]

training loss: 0.6924520134925842


training:   3%|▎         | 1705/65500 [7:37:11<280:47:03, 15.84s/it]

training loss: 0.7592459321022034


training:   3%|▎         | 1706/65500 [7:37:26<280:04:30, 15.81s/it]

training loss: 1.0461024045944214


training:   3%|▎         | 1707/65500 [7:37:42<279:42:24, 15.78s/it]

training loss: 1.0448503494262695


training:   3%|▎         | 1708/65500 [7:37:58<279:18:55, 15.76s/it]

training loss: 0.8645761013031006


training:   3%|▎         | 1709/65500 [7:38:13<279:06:58, 15.75s/it]

training loss: 0.9896785020828247


training:   3%|▎         | 1710/65500 [7:38:29<278:54:58, 15.74s/it]

training loss: 0.6216940879821777


training:   3%|▎         | 1711/65500 [7:38:45<278:51:23, 15.74s/it]

training loss: 1.1695493459701538


training:   3%|▎         | 1712/65500 [7:39:01<278:42:31, 15.73s/it]

training loss: 0.38996580243110657


training:   3%|▎         | 1713/65500 [7:39:16<278:39:33, 15.73s/it]

training loss: 0.8540444374084473


training:   3%|▎         | 1714/65500 [7:39:32<278:34:49, 15.72s/it]

training loss: 1.2098889350891113


training:   3%|▎         | 1715/65500 [7:39:48<278:33:10, 15.72s/it]

training loss: 0.9240819215774536


training:   3%|▎         | 1716/65500 [7:40:04<278:31:49, 15.72s/it]

training loss: 0.5631822943687439


training:   3%|▎         | 1717/65500 [7:40:19<278:29:42, 15.72s/it]

training loss: 0.9597155451774597


training:   3%|▎         | 1718/65500 [7:40:35<278:29:22, 15.72s/it]

training loss: 0.9505304098129272


training:   3%|▎         | 1719/65500 [7:40:51<278:26:12, 15.72s/it]

training loss: 0.4701942503452301


training:   3%|▎         | 1720/65500 [7:41:06<278:28:19, 15.72s/it]

training loss: 0.8010045289993286


training:   3%|▎         | 1721/65500 [7:41:22<278:30:12, 15.72s/it]

training loss: 0.9791249632835388


training:   3%|▎         | 1722/65500 [7:41:38<278:40:36, 15.73s/it]

training loss: 0.9659596681594849


training:   3%|▎         | 1723/65500 [7:41:54<278:40:18, 15.73s/it]

training loss: 0.7138035297393799


training:   3%|▎         | 1724/65500 [7:42:09<278:36:34, 15.73s/it]

training loss: 0.6922751665115356


training:   3%|▎         | 1725/65500 [7:42:25<278:35:21, 15.73s/it]

training loss: 0.6552553772926331


training:   3%|▎         | 1726/65500 [7:42:41<278:37:14, 15.73s/it]

training loss: 0.4938628375530243


training:   3%|▎         | 1727/65500 [7:42:57<278:46:49, 15.74s/it]

training loss: 0.6887460947036743


training:   3%|▎         | 1728/65500 [7:43:12<278:40:53, 15.73s/it]

training loss: 0.5131963491439819


training:   3%|▎         | 1729/65500 [7:43:28<278:36:12, 15.73s/it]

training loss: 0.7785618305206299


training:   3%|▎         | 1730/65500 [7:43:44<278:32:29, 15.72s/it]

training loss: 0.9058027863502502


training:   3%|▎         | 1731/65500 [7:43:59<278:30:17, 15.72s/it]

training loss: 1.0061851739883423


training:   3%|▎         | 1732/65500 [7:44:15<278:29:31, 15.72s/it]

training loss: 0.5897018313407898


training:   3%|▎         | 1733/65500 [7:44:31<278:29:33, 15.72s/it]

training loss: 0.7804403901100159


training:   3%|▎         | 1734/65500 [7:44:47<278:30:18, 15.72s/it]

training loss: 0.6818121075630188


training:   3%|▎         | 1735/65500 [7:45:02<278:27:34, 15.72s/it]

training loss: 1.250205159187317


training:   3%|▎         | 1736/65500 [7:45:18<278:26:32, 15.72s/it]

training loss: 0.8748250007629395


training:   3%|▎         | 1737/65500 [7:45:34<278:22:10, 15.72s/it]

training loss: 0.5745697021484375


training:   3%|▎         | 1738/65500 [7:45:49<278:22:26, 15.72s/it]

training loss: 0.9695541858673096


training:   3%|▎         | 1739/65500 [7:46:05<278:23:40, 15.72s/it]

training loss: 1.0860296487808228


training:   3%|▎         | 1740/65500 [7:46:21<278:22:04, 15.72s/it]

training loss: 0.9738898873329163


training:   3%|▎         | 1741/65500 [7:46:37<278:24:25, 15.72s/it]

training loss: 0.9575607776641846


training:   3%|▎         | 1742/65500 [7:46:52<278:24:39, 15.72s/it]

training loss: 0.9891483783721924


training:   3%|▎         | 1743/65500 [7:47:08<278:25:41, 15.72s/it]

training loss: 1.2453378438949585


training:   3%|▎         | 1744/65500 [7:47:24<278:23:15, 15.72s/it]

training loss: 0.5721153020858765


training:   3%|▎         | 1745/65500 [7:47:40<278:36:26, 15.73s/it]

training loss: 0.9070572853088379


training:   3%|▎         | 1746/65500 [7:47:55<278:33:29, 15.73s/it]

training loss: 1.2554713487625122


training:   3%|▎         | 1747/65500 [7:48:11<278:33:36, 15.73s/it]

training loss: 0.9152635931968689


training:   3%|▎         | 1748/65500 [7:48:27<278:32:07, 15.73s/it]

training loss: 0.5798482894897461


training:   3%|▎         | 1749/65500 [7:48:42<278:31:54, 15.73s/it]

training loss: 0.7554556131362915


training:   3%|▎         | 1750/65500 [7:48:58<278:49:02, 15.74s/it]

training loss: 0.9154062867164612


training:   3%|▎         | 1751/65500 [7:49:14<278:37:21, 15.73s/it]

training loss: 0.7644981145858765


training:   3%|▎         | 1752/65500 [7:49:30<278:28:43, 15.73s/it]

training loss: 0.48007428646087646


training:   3%|▎         | 1753/65500 [7:49:45<278:25:15, 15.72s/it]

training loss: 1.1287193298339844


training:   3%|▎         | 1754/65500 [7:50:01<278:25:42, 15.72s/it]

training loss: 0.7304959297180176


training:   3%|▎         | 1755/65500 [7:50:17<278:28:50, 15.73s/it]

training loss: 1.1327341794967651


training:   3%|▎         | 1756/65500 [7:50:33<278:29:27, 15.73s/it]

training loss: 0.6157393455505371


training:   3%|▎         | 1757/65500 [7:50:48<278:23:43, 15.72s/it]

training loss: 0.6751619577407837


training:   3%|▎         | 1758/65500 [7:51:04<278:23:19, 15.72s/it]

training loss: 1.0013402700424194


training:   3%|▎         | 1759/65500 [7:51:20<278:19:52, 15.72s/it]

training loss: 1.2333654165267944


training:   3%|▎         | 1760/65500 [7:51:35<278:16:43, 15.72s/it]

training loss: 1.047903060913086


training:   3%|▎         | 1761/65500 [7:51:51<278:22:14, 15.72s/it]

training loss: 0.49320894479751587


training:   3%|▎         | 1762/65500 [7:52:07<278:42:59, 15.74s/it]

training loss: 1.0254969596862793


training:   3%|▎         | 1763/65500 [7:52:23<278:49:16, 15.75s/it]

training loss: 1.0979706048965454


training:   3%|▎         | 1764/65500 [7:52:38<279:00:17, 15.76s/it]

training loss: 0.9652812480926514


training:   3%|▎         | 1765/65500 [7:52:54<279:04:54, 15.76s/it]

training loss: 0.9033659100532532


training:   3%|▎         | 1766/65500 [7:53:10<279:06:01, 15.76s/it]

training loss: 0.6513664126396179


training:   3%|▎         | 1767/65500 [7:53:26<279:05:50, 15.76s/it]

training loss: 1.0809502601623535


training:   3%|▎         | 1768/65500 [7:53:42<279:26:18, 15.78s/it]

training loss: 1.1900756359100342


training:   3%|▎         | 1769/65500 [7:53:57<279:22:27, 15.78s/it]

training loss: 0.688439130783081


training:   3%|▎         | 1770/65500 [7:54:13<279:24:49, 15.78s/it]

training loss: 0.7992739081382751


training:   3%|▎         | 1771/65500 [7:54:29<279:27:09, 15.79s/it]

training loss: 0.7493429780006409


training:   3%|▎         | 1772/65500 [7:54:45<279:19:05, 15.78s/it]

training loss: 0.6498357057571411


training:   3%|▎         | 1773/65500 [7:55:01<279:32:46, 15.79s/it]

training loss: 1.3033742904663086


training:   3%|▎         | 1774/65500 [7:55:16<279:18:19, 15.78s/it]

training loss: 0.636172354221344


training:   3%|▎         | 1775/65500 [7:55:32<279:00:11, 15.76s/it]

training loss: 0.7545784711837769


training:   3%|▎         | 1776/65500 [7:55:48<278:43:19, 15.75s/it]

training loss: 0.8716931939125061


training:   3%|▎         | 1777/65500 [7:56:03<278:33:07, 15.74s/it]

training loss: 0.6736178398132324


training:   3%|▎         | 1778/65500 [7:56:19<278:27:05, 15.73s/it]

training loss: 0.8535781502723694


training:   3%|▎         | 1779/65500 [7:56:35<278:21:29, 15.73s/it]

training loss: 0.8359822034835815


training:   3%|▎         | 1780/65500 [7:56:51<278:18:20, 15.72s/it]

training loss: 0.788069486618042


training:   3%|▎         | 1781/65500 [7:57:06<278:17:26, 15.72s/it]

training loss: 0.9486966133117676


training:   3%|▎         | 1782/65500 [7:57:22<278:11:54, 15.72s/it]

training loss: 0.9047409296035767


training:   3%|▎         | 1783/65500 [7:57:38<278:12:24, 15.72s/it]

training loss: 0.5911113023757935


training:   3%|▎         | 1784/65500 [7:57:53<278:12:26, 15.72s/it]

training loss: 0.751560628414154


training:   3%|▎         | 1785/65500 [7:58:09<278:10:30, 15.72s/it]

training loss: 0.9641927480697632


training:   3%|▎         | 1786/65500 [7:58:25<278:07:20, 15.71s/it]

training loss: 0.8248025178909302


training:   3%|▎         | 1787/65500 [7:58:41<278:09:11, 15.72s/it]

training loss: 0.8962706923484802


training:   3%|▎         | 1788/65500 [7:58:56<278:08:42, 15.72s/it]

training loss: 0.9053912162780762


training:   3%|▎         | 1789/65500 [7:59:12<278:12:08, 15.72s/it]

training loss: 0.8162568211555481


training:   3%|▎         | 1790/65500 [7:59:28<278:07:07, 15.72s/it]

training loss: 0.8889891505241394


training:   3%|▎         | 1791/65500 [7:59:43<278:16:20, 15.72s/it]

training loss: 0.8442931771278381


training:   3%|▎         | 1792/65500 [7:59:59<278:19:54, 15.73s/it]

training loss: 1.1391198635101318


training:   3%|▎         | 1793/65500 [8:00:15<278:22:20, 15.73s/it]

training loss: 0.9018498063087463


training:   3%|▎         | 1794/65500 [8:00:31<278:19:54, 15.73s/it]

training loss: 1.0118279457092285


training:   3%|▎         | 1795/65500 [8:00:46<278:16:55, 15.73s/it]

training loss: 1.1102561950683594


training:   3%|▎         | 1796/65500 [8:01:02<278:26:25, 15.74s/it]

training loss: 0.9385923147201538


training:   3%|▎         | 1797/65500 [8:01:18<278:25:35, 15.73s/it]

training loss: 1.1302298307418823


training:   3%|▎         | 1798/65500 [8:01:34<278:20:14, 15.73s/it]

training loss: 0.7733821868896484


training:   3%|▎         | 1799/65500 [8:01:49<278:12:54, 15.72s/it]

training loss: 0.7634789943695068


training:   3%|▎         | 1800/65500 [8:02:05<278:11:43, 15.72s/it]

training loss: 0.8815462589263916
training loss: 0.9645422101020813


training:   3%|▎         | 1801/65500 [8:02:22<285:55:04, 16.16s/it]

validation loss: 1.6973639726638794


training:   3%|▎         | 1802/65500 [8:02:38<283:46:33, 16.04s/it]

training loss: 1.2317677736282349


training:   3%|▎         | 1803/65500 [8:02:54<282:06:15, 15.94s/it]

training loss: 0.9609914422035217


training:   3%|▎         | 1804/65500 [8:03:09<280:56:06, 15.88s/it]

training loss: 0.9542868733406067


training:   3%|▎         | 1805/65500 [8:03:25<280:06:13, 15.83s/it]

training loss: 0.5879601240158081


training:   3%|▎         | 1806/65500 [8:03:41<279:30:23, 15.80s/it]

training loss: 0.9409371018409729


training:   3%|▎         | 1807/65500 [8:03:57<279:00:47, 15.77s/it]

training loss: 1.168110966682434


training:   3%|▎         | 1808/65500 [8:04:12<278:46:43, 15.76s/it]

training loss: 1.016273856163025


training:   3%|▎         | 1809/65500 [8:04:28<278:35:20, 15.75s/it]

training loss: 0.7540603876113892


training:   3%|▎         | 1810/65500 [8:04:44<278:24:55, 15.74s/it]

training loss: 0.713317334651947


training:   3%|▎         | 1811/65500 [8:04:59<278:16:12, 15.73s/it]

training loss: 0.8422855138778687


training:   3%|▎         | 1812/65500 [8:05:15<278:12:36, 15.73s/it]

training loss: 0.6750714778900146


training:   3%|▎         | 1813/65500 [8:05:31<278:10:08, 15.72s/it]

training loss: 0.4819315969944


training:   3%|▎         | 1814/65500 [8:05:47<278:13:40, 15.73s/it]

training loss: 1.0476607084274292


training:   3%|▎         | 1815/65500 [8:06:02<278:16:34, 15.73s/it]

training loss: 1.0325851440429688


training:   3%|▎         | 1816/65500 [8:06:18<278:14:32, 15.73s/it]

training loss: 1.1423313617706299


training:   3%|▎         | 1817/65500 [8:06:34<278:12:17, 15.73s/it]

training loss: 1.1103583574295044


training:   3%|▎         | 1818/65500 [8:06:50<278:10:48, 15.73s/it]

training loss: 0.9997432827949524


training:   3%|▎         | 1819/65500 [8:07:05<278:14:46, 15.73s/it]

training loss: 0.8049737215042114


training:   3%|▎         | 1820/65500 [8:07:21<278:23:08, 15.74s/it]

training loss: 0.38968342542648315


training:   3%|▎         | 1821/65500 [8:07:37<278:17:43, 15.73s/it]

training loss: 0.932256817817688


training:   3%|▎         | 1822/65500 [8:07:52<278:12:29, 15.73s/it]

training loss: 0.8859454989433289


training:   3%|▎         | 1823/65500 [8:08:08<278:11:09, 15.73s/it]

training loss: 1.0351699590682983


training:   3%|▎         | 1824/65500 [8:08:24<278:10:29, 15.73s/it]

training loss: 0.8999024033546448


training:   3%|▎         | 1825/65500 [8:08:40<278:02:55, 15.72s/it]

training loss: 0.9484702348709106


training:   3%|▎         | 1826/65500 [8:08:55<278:01:13, 15.72s/it]

training loss: 0.6068239212036133


training:   3%|▎         | 1827/65500 [8:09:11<278:02:32, 15.72s/it]

training loss: 1.1585593223571777


training:   3%|▎         | 1828/65500 [8:09:27<277:59:57, 15.72s/it]

training loss: 1.0413516759872437


training:   3%|▎         | 1829/65500 [8:09:42<277:58:05, 15.72s/it]

training loss: 1.173065185546875


training:   3%|▎         | 1830/65500 [8:09:58<277:56:58, 15.72s/it]

training loss: 0.8608235120773315


training:   3%|▎         | 1831/65500 [8:10:14<277:58:46, 15.72s/it]

training loss: 1.0993045568466187


training:   3%|▎         | 1832/65500 [8:10:30<277:57:14, 15.72s/it]

training loss: 0.5759682655334473


training:   3%|▎         | 1833/65500 [8:10:45<277:58:07, 15.72s/it]

training loss: 0.6099872589111328


training:   3%|▎         | 1834/65500 [8:11:01<277:58:21, 15.72s/it]

training loss: 0.7094612121582031


training:   3%|▎         | 1835/65500 [8:11:17<277:58:14, 15.72s/it]

training loss: 0.8836358189582825


training:   3%|▎         | 1836/65500 [8:11:33<277:59:26, 15.72s/it]

training loss: 0.8094322681427002


training:   3%|▎         | 1837/65500 [8:11:48<277:57:16, 15.72s/it]

training loss: 0.8081262707710266


training:   3%|▎         | 1838/65500 [8:12:04<278:14:48, 15.73s/it]

training loss: 1.0242995023727417


training:   3%|▎         | 1839/65500 [8:12:20<278:16:49, 15.74s/it]

training loss: 1.1720271110534668


training:   3%|▎         | 1840/65500 [8:12:35<278:13:09, 15.73s/it]

training loss: 1.0048452615737915


training:   3%|▎         | 1841/65500 [8:12:51<278:08:29, 15.73s/it]

training loss: 0.6843078136444092


training:   3%|▎         | 1842/65500 [8:13:07<278:05:27, 15.73s/it]

training loss: 0.8431779146194458


training:   3%|▎         | 1843/65500 [8:13:23<278:13:08, 15.73s/it]

training loss: 1.0922166109085083


training:   3%|▎         | 1844/65500 [8:13:38<278:06:45, 15.73s/it]

training loss: 0.7124074101448059


training:   3%|▎         | 1845/65500 [8:13:54<278:02:02, 15.72s/it]

training loss: 0.5453663468360901


training:   3%|▎         | 1846/65500 [8:14:10<278:00:32, 15.72s/it]

training loss: 0.7379438281059265


training:   3%|▎         | 1847/65500 [8:14:26<277:56:51, 15.72s/it]

training loss: 0.6468269228935242


training:   3%|▎         | 1848/65500 [8:14:41<277:57:17, 15.72s/it]

training loss: 1.2505106925964355


training:   3%|▎         | 1849/65500 [8:14:57<277:52:26, 15.72s/it]

training loss: 1.0091930627822876


training:   3%|▎         | 1850/65500 [8:15:13<277:51:11, 15.72s/it]

training loss: 0.97819584608078


training:   3%|▎         | 1851/65500 [8:15:28<277:47:04, 15.71s/it]

training loss: 0.7703857421875


training:   3%|▎         | 1852/65500 [8:15:44<277:49:36, 15.71s/it]

training loss: 1.0031499862670898


training:   3%|▎         | 1853/65500 [8:16:00<277:49:51, 15.71s/it]

training loss: 0.6963721513748169


training:   3%|▎         | 1854/65500 [8:16:16<277:50:50, 15.72s/it]

training loss: 0.751410961151123


training:   3%|▎         | 1855/65500 [8:16:31<277:52:51, 15.72s/it]

training loss: 0.7500325441360474


training:   3%|▎         | 1856/65500 [8:16:47<277:52:18, 15.72s/it]

training loss: 1.0387641191482544


training:   3%|▎         | 1857/65500 [8:17:03<277:51:37, 15.72s/it]

training loss: 1.067863941192627


training:   3%|▎         | 1858/65500 [8:17:18<277:51:09, 15.72s/it]

training loss: 0.5452712774276733


training:   3%|▎         | 1859/65500 [8:17:34<277:49:06, 15.72s/it]

training loss: 1.018304705619812


training:   3%|▎         | 1860/65500 [8:17:50<277:45:46, 15.71s/it]

training loss: 0.7321087718009949


training:   3%|▎         | 1861/65500 [8:18:06<278:04:17, 15.73s/it]

training loss: 0.7017908096313477


training:   3%|▎         | 1862/65500 [8:18:21<278:04:32, 15.73s/it]

training loss: 0.7974708080291748


training:   3%|▎         | 1863/65500 [8:18:37<277:58:11, 15.72s/it]

training loss: 0.7073371410369873


training:   3%|▎         | 1864/65500 [8:18:53<277:57:34, 15.72s/it]

training loss: 0.704068660736084


training:   3%|▎         | 1865/65500 [8:19:08<277:55:09, 15.72s/it]

training loss: 1.039668083190918


training:   3%|▎         | 1866/65500 [8:19:24<278:13:20, 15.74s/it]

training loss: 0.8313488960266113


training:   3%|▎         | 1867/65500 [8:19:40<278:03:42, 15.73s/it]

training loss: 0.6771107316017151


training:   3%|▎         | 1868/65500 [8:19:56<277:59:16, 15.73s/it]

training loss: 0.850395917892456


training:   3%|▎         | 1869/65500 [8:20:11<278:01:26, 15.73s/it]

training loss: 0.7910515666007996


training:   3%|▎         | 1870/65500 [8:20:27<278:01:09, 15.73s/it]

training loss: 0.8353395462036133


training:   3%|▎         | 1871/65500 [8:20:43<277:58:04, 15.73s/it]

training loss: 0.7141943573951721


training:   3%|▎         | 1872/65500 [8:20:59<277:58:28, 15.73s/it]

training loss: 0.49339762330055237


training:   3%|▎         | 1873/65500 [8:21:14<277:53:07, 15.72s/it]

training loss: 0.7887543439865112


training:   3%|▎         | 1874/65500 [8:21:30<277:50:38, 15.72s/it]

training loss: 0.7805893421173096


training:   3%|▎         | 1875/65500 [8:21:46<277:56:10, 15.73s/it]

training loss: 0.8271593451499939


training:   3%|▎         | 1876/65500 [8:22:01<278:03:04, 15.73s/it]

training loss: 0.8812255859375


training:   3%|▎         | 1877/65500 [8:22:17<278:16:03, 15.75s/it]

training loss: 1.002516269683838


training:   3%|▎         | 1878/65500 [8:22:33<278:19:10, 15.75s/it]

training loss: 0.9920854568481445


training:   3%|▎         | 1879/65500 [8:22:49<278:20:45, 15.75s/it]

training loss: 0.8300759792327881


training:   3%|▎         | 1880/65500 [8:23:05<278:26:51, 15.76s/it]

training loss: 0.5557592511177063


training:   3%|▎         | 1881/65500 [8:23:20<278:28:04, 15.76s/it]

training loss: 0.8306010365486145


training:   3%|▎         | 1882/65500 [8:23:36<278:23:41, 15.75s/it]

training loss: 0.9015131592750549


training:   3%|▎         | 1883/65500 [8:23:52<278:21:05, 15.75s/it]

training loss: 1.0134669542312622


training:   3%|▎         | 1884/65500 [8:24:08<278:38:16, 15.77s/it]

training loss: 0.9140693545341492


training:   3%|▎         | 1885/65500 [8:24:23<278:39:12, 15.77s/it]

training loss: 1.0071772336959839


training:   3%|▎         | 1886/65500 [8:24:39<278:38:33, 15.77s/it]

training loss: 0.7478795051574707


training:   3%|▎         | 1887/65500 [8:24:55<278:29:30, 15.76s/it]

training loss: 1.2185850143432617


training:   3%|▎         | 1888/65500 [8:25:11<278:24:52, 15.76s/it]

training loss: 0.8444338440895081


training:   3%|▎         | 1889/65500 [8:25:26<278:31:22, 15.76s/it]

training loss: 1.165327548980713


training:   3%|▎         | 1890/65500 [8:25:42<278:26:44, 15.76s/it]

training loss: 0.9563614130020142


training:   3%|▎         | 1891/65500 [8:25:58<278:09:59, 15.74s/it]

training loss: 0.7381303310394287


training:   3%|▎         | 1892/65500 [8:26:14<278:04:39, 15.74s/it]

training loss: 1.0978732109069824


training:   3%|▎         | 1893/65500 [8:26:29<277:59:46, 15.73s/it]

training loss: 0.6162861585617065


training:   3%|▎         | 1894/65500 [8:26:45<277:55:18, 15.73s/it]

training loss: 0.8150574564933777


training:   3%|▎         | 1895/65500 [8:27:01<277:50:40, 15.73s/it]

training loss: 1.351858377456665


training:   3%|▎         | 1896/65500 [8:27:16<277:47:34, 15.72s/it]

training loss: 0.8314220905303955


training:   3%|▎         | 1897/65500 [8:27:32<277:44:37, 15.72s/it]

training loss: 0.8094616532325745


training:   3%|▎         | 1898/65500 [8:27:48<277:47:16, 15.72s/it]

training loss: 0.5868097543716431


training:   3%|▎         | 1899/65500 [8:28:04<277:43:02, 15.72s/it]

training loss: 0.9394635558128357


training:   3%|▎         | 1900/65500 [8:28:19<277:44:45, 15.72s/it]

training loss: 0.6144777536392212
training loss: 0.6971831321716309


training:   3%|▎         | 1901/65500 [8:28:37<285:17:33, 16.15s/it]

validation loss: 1.586033582687378


training:   3%|▎         | 1902/65500 [8:28:52<283:06:23, 16.03s/it]

training loss: 0.9347918033599854


training:   3%|▎         | 1903/65500 [8:29:08<281:28:24, 15.93s/it]

training loss: 0.6818755269050598


training:   3%|▎         | 1904/65500 [8:29:24<280:22:47, 15.87s/it]

training loss: 1.2109256982803345


training:   3%|▎         | 1905/65500 [8:29:39<279:31:56, 15.82s/it]

training loss: 1.31832754611969


training:   3%|▎         | 1906/65500 [8:29:55<279:01:05, 15.79s/it]

training loss: 0.8334830403327942


training:   3%|▎         | 1907/65500 [8:30:11<278:50:04, 15.78s/it]

training loss: 1.007690668106079


training:   3%|▎         | 1908/65500 [8:30:27<278:50:05, 15.79s/it]

training loss: 0.8185823559761047


training:   3%|▎         | 1909/65500 [8:30:42<278:33:44, 15.77s/it]

training loss: 0.8982419371604919


training:   3%|▎         | 1910/65500 [8:30:58<278:15:53, 15.75s/it]

training loss: 1.0512946844100952


training:   3%|▎         | 1911/65500 [8:31:14<278:03:51, 15.74s/it]

training loss: 0.8014717102050781


training:   3%|▎         | 1912/65500 [8:31:30<278:12:05, 15.75s/it]

training loss: 1.0609780550003052


training:   3%|▎         | 1913/65500 [8:31:45<278:03:58, 15.74s/it]

training loss: 1.0260045528411865


training:   3%|▎         | 1914/65500 [8:32:01<277:54:18, 15.73s/it]

training loss: 0.49211132526397705


training:   3%|▎         | 1915/65500 [8:32:17<277:46:55, 15.73s/it]

training loss: 0.683262050151825


training:   3%|▎         | 1916/65500 [8:32:32<277:41:52, 15.72s/it]

training loss: 0.8258662819862366


training:   3%|▎         | 1917/65500 [8:32:48<277:40:13, 15.72s/it]

training loss: 0.9280785322189331


training:   3%|▎         | 1918/65500 [8:33:04<277:37:14, 15.72s/it]

training loss: 0.8478871583938599


training:   3%|▎         | 1919/65500 [8:33:20<277:34:08, 15.72s/it]

training loss: 0.7376585006713867


training:   3%|▎         | 1920/65500 [8:33:35<277:33:01, 15.72s/it]

training loss: 0.7234247326850891


training:   3%|▎         | 1921/65500 [8:33:51<277:27:21, 15.71s/it]

training loss: 0.3249814510345459


training:   3%|▎         | 1922/65500 [8:34:07<277:34:00, 15.72s/it]

training loss: 1.2478358745574951


training:   3%|▎         | 1923/65500 [8:34:22<277:35:19, 15.72s/it]

training loss: 0.8430762887001038


training:   3%|▎         | 1924/65500 [8:34:38<277:37:53, 15.72s/it]

training loss: 0.9961968660354614


training:   3%|▎         | 1925/65500 [8:34:54<277:38:28, 15.72s/it]

training loss: 0.6983668804168701


training:   3%|▎         | 1926/65500 [8:35:10<277:39:22, 15.72s/it]

training loss: 0.8517624735832214


training:   3%|▎         | 1927/65500 [8:35:25<277:40:44, 15.72s/it]

training loss: 0.9349508285522461


training:   3%|▎         | 1928/65500 [8:35:41<277:42:55, 15.73s/it]

training loss: 0.7011221647262573


training:   3%|▎         | 1929/65500 [8:35:57<277:39:27, 15.72s/it]

training loss: 0.5930206179618835


training:   3%|▎         | 1930/65500 [8:36:13<277:49:13, 15.73s/it]

training loss: 1.0081181526184082


training:   3%|▎         | 1931/65500 [8:36:28<277:49:44, 15.73s/it]

training loss: 0.9699190855026245


training:   3%|▎         | 1932/65500 [8:36:44<277:46:48, 15.73s/it]

training loss: 0.7133708000183105


training:   3%|▎         | 1933/65500 [8:37:00<277:43:18, 15.73s/it]

training loss: 0.9389768242835999


training:   3%|▎         | 1934/65500 [8:37:15<277:38:04, 15.72s/it]

training loss: 1.1191720962524414


training:   3%|▎         | 1935/65500 [8:37:31<277:50:53, 15.74s/it]

training loss: 0.6887236833572388


training:   3%|▎         | 1936/65500 [8:37:47<277:47:35, 15.73s/it]

training loss: 0.7487890720367432


training:   3%|▎         | 1937/65500 [8:38:03<277:42:27, 15.73s/it]

training loss: 1.0029616355895996


training:   3%|▎         | 1938/65500 [8:38:18<277:36:45, 15.72s/it]

training loss: 0.8991589546203613


training:   3%|▎         | 1939/65500 [8:38:34<277:36:20, 15.72s/it]

training loss: 0.9539474248886108


training:   3%|▎         | 1940/65500 [8:38:50<277:35:56, 15.72s/it]

training loss: 0.8113345503807068


training:   3%|▎         | 1941/65500 [8:39:06<277:32:59, 15.72s/it]

training loss: 0.7909038662910461


training:   3%|▎         | 1942/65500 [8:39:21<277:27:18, 15.72s/it]

training loss: 0.7917793393135071


training:   3%|▎         | 1943/65500 [8:39:37<277:27:59, 15.72s/it]

training loss: 1.011913776397705


training:   3%|▎         | 1944/65500 [8:39:53<277:26:27, 15.72s/it]

training loss: 0.7631127238273621


training:   3%|▎         | 1945/65500 [8:40:08<277:27:02, 15.72s/it]

training loss: 0.8849579095840454


training:   3%|▎         | 1946/65500 [8:40:24<277:25:58, 15.72s/it]

training loss: 0.5780344009399414


training:   3%|▎         | 1947/65500 [8:40:40<277:26:36, 15.72s/it]

training loss: 0.5860981941223145


training:   3%|▎         | 1948/65500 [8:40:56<277:24:13, 15.71s/it]

training loss: 0.8753119111061096


training:   3%|▎         | 1949/65500 [8:41:11<277:21:50, 15.71s/it]

training loss: 0.48150622844696045


training:   3%|▎         | 1950/65500 [8:41:27<277:21:08, 15.71s/it]

training loss: 0.906962513923645


training:   3%|▎         | 1951/65500 [8:41:43<277:23:02, 15.71s/it]

training loss: 0.5910717248916626


training:   3%|▎         | 1952/65500 [8:41:58<277:21:41, 15.71s/it]

training loss: 0.818644642829895


training:   3%|▎         | 1953/65500 [8:42:14<277:35:29, 15.73s/it]

training loss: 0.8972585201263428


training:   3%|▎         | 1954/65500 [8:42:30<277:37:10, 15.73s/it]

training loss: 0.8849833011627197


training:   3%|▎         | 1955/65500 [8:42:46<277:36:48, 15.73s/it]

training loss: 0.6538158059120178


training:   3%|▎         | 1956/65500 [8:43:01<277:35:34, 15.73s/it]

training loss: 0.795724093914032


training:   3%|▎         | 1957/65500 [8:43:17<277:35:02, 15.73s/it]

training loss: 0.9935697913169861


training:   3%|▎         | 1958/65500 [8:43:33<277:42:31, 15.73s/it]

training loss: 0.5012744665145874


training:   3%|▎         | 1959/65500 [8:43:49<277:42:20, 15.73s/it]

training loss: 1.0353320837020874


training:   3%|▎         | 1960/65500 [8:44:04<278:10:16, 15.76s/it]

training loss: 0.9478602409362793


training:   3%|▎         | 1961/65500 [8:44:20<277:55:23, 15.75s/it]

training loss: 0.917793869972229


training:   3%|▎         | 1962/65500 [8:44:36<277:52:09, 15.74s/it]

training loss: 0.7036193609237671


training:   3%|▎         | 1963/65500 [8:44:52<277:44:28, 15.74s/it]

training loss: 0.7109012007713318


training:   3%|▎         | 1964/65500 [8:45:07<277:39:55, 15.73s/it]

training loss: 1.0353829860687256


training:   3%|▎         | 1965/65500 [8:45:23<277:31:32, 15.73s/it]

training loss: 1.0071897506713867


training:   3%|▎         | 1966/65500 [8:45:39<277:28:01, 15.72s/it]

training loss: 0.9209030866622925


training:   3%|▎         | 1967/65500 [8:45:54<277:24:52, 15.72s/it]

training loss: 0.5351340770721436


training:   3%|▎         | 1968/65500 [8:46:10<277:26:43, 15.72s/it]

training loss: 0.8799448609352112


training:   3%|▎         | 1969/65500 [8:46:26<277:25:18, 15.72s/it]

training loss: 0.8759404420852661


training:   3%|▎         | 1970/65500 [8:46:42<277:25:01, 15.72s/it]

training loss: 0.5107847452163696


training:   3%|▎         | 1971/65500 [8:46:57<277:26:02, 15.72s/it]

training loss: 0.8795340061187744


training:   3%|▎         | 1972/65500 [8:47:13<277:25:31, 15.72s/it]

training loss: 1.0965548753738403


training:   3%|▎         | 1973/65500 [8:47:29<277:21:35, 15.72s/it]

training loss: 0.9627188444137573


training:   3%|▎         | 1974/65500 [8:47:44<277:23:44, 15.72s/it]

training loss: 0.9157167673110962


training:   3%|▎         | 1975/65500 [8:48:00<277:21:50, 15.72s/it]

training loss: 0.36911216378211975


training:   3%|▎         | 1976/65500 [8:48:16<277:29:46, 15.73s/it]

training loss: 0.4387182593345642


training:   3%|▎         | 1977/65500 [8:48:32<277:37:15, 15.73s/it]

training loss: 0.6116130948066711


training:   3%|▎         | 1978/65500 [8:48:47<277:33:08, 15.73s/it]

training loss: 0.9330928325653076


training:   3%|▎         | 1979/65500 [8:49:03<277:35:01, 15.73s/it]

training loss: 0.7293534874916077


training:   3%|▎         | 1980/65500 [8:49:19<277:31:43, 15.73s/it]

training loss: 0.8686997294425964


training:   3%|▎         | 1981/65500 [8:49:35<277:43:19, 15.74s/it]

training loss: 1.027705430984497


training:   3%|▎         | 1982/65500 [8:49:50<277:45:53, 15.74s/it]

training loss: 0.804205060005188


training:   3%|▎         | 1983/65500 [8:50:06<277:38:41, 15.74s/it]

training loss: 0.8952788710594177


training:   3%|▎         | 1984/65500 [8:50:22<277:36:16, 15.73s/it]

training loss: 1.0687663555145264


training:   3%|▎         | 1985/65500 [8:50:38<277:32:05, 15.73s/it]

training loss: 0.552162230014801


training:   3%|▎         | 1986/65500 [8:50:53<277:31:28, 15.73s/it]

training loss: 0.6938034296035767


training:   3%|▎         | 1987/65500 [8:51:09<277:29:34, 15.73s/it]

training loss: 0.8710656762123108


training:   3%|▎         | 1988/65500 [8:51:25<277:22:23, 15.72s/it]

training loss: 0.7918566465377808


training:   3%|▎         | 1989/65500 [8:51:40<277:18:56, 15.72s/it]

training loss: 0.8457108736038208


training:   3%|▎         | 1990/65500 [8:51:56<277:15:41, 15.72s/it]

training loss: 0.9483932256698608


training:   3%|▎         | 1991/65500 [8:52:12<277:33:44, 15.73s/it]

training loss: 0.8885825872421265


training:   3%|▎         | 1992/65500 [8:52:28<277:47:44, 15.75s/it]

training loss: 1.0329395532608032


training:   3%|▎         | 1993/65500 [8:52:43<277:49:27, 15.75s/it]

training loss: 1.1882355213165283


training:   3%|▎         | 1994/65500 [8:52:59<277:58:06, 15.76s/it]

training loss: 0.8596110939979553


training:   3%|▎         | 1995/65500 [8:53:15<278:05:23, 15.76s/it]

training loss: 0.7276945114135742


training:   3%|▎         | 1996/65500 [8:53:31<278:05:47, 15.77s/it]

training loss: 0.9736910462379456


training:   3%|▎         | 1997/65500 [8:53:47<278:08:23, 15.77s/it]

training loss: 0.8466943502426147


training:   3%|▎         | 1998/65500 [8:54:02<278:07:32, 15.77s/it]

training loss: 0.8191415667533875


training:   3%|▎         | 1999/65500 [8:54:18<278:20:01, 15.78s/it]

training loss: 0.6967331767082214


training:   3%|▎         | 2000/65500 [8:54:34<278:30:31, 15.79s/it]

training loss: 0.46306508779525757
training loss: 0.6558170914649963



generating:   0%|          | 0/512 [00:00<?, ?it/s][A

validation loss: 1.618473768234253
brazit vysledky ankety
Nie, je to nehumanne
51,0%
Podporujem, je to forma zabavy
34,3%
Neviem si vytvorit na vec nazor
14,7%
Bol to prvy z osmich behov s bykmi v ramci slavnosti sv. Fermina,
ktore potrvaju do polnoci 14. jula.
Fiesta v Pamplone sa stala celosvetovo znamou vdaka romanu Ernesta
Hemingwaya z roku 1926 Slnko aj vychadza. Aj vdaka nemu do Pamplony na
sviatok sv. Fermina kazdorocne zavitaju tisice zahranicnych
turistov.
Citajte viac
Prvy
beh s bykmi v Pamplone si vyziadal pat zranenych
Pri
druhom behu s bykmi v Pamplone utrpeli zranenia styria ludia
V
Pamplone skoncili behy s bykmi, za tyzden sa zranilo 35 ludi
Na samotnych behoch s bykmi sa tradicne zucastnuju stovky
odvazlivcov. Za vacsinu zraneni tradicne mozu pady pri behu pripadne
posliapanie bykmi.
Kazdy rok sa pocas behu s bykmi ulickami Pamplony zrania desiatky
ludi. Od roku 1911 si beh s viac ako pol tony vaziacimi zvieratami
vyziadal aj 15 obeti na zivotoc


generating:   0%|          | 1/512 [00:00<01:59,  4.26it/s][A
generating:   0%|          | 2/512 [00:00<02:00,  4.25it/s][A
generating:   1%|          | 3/512 [00:00<02:00,  4.23it/s][A
generating:   1%|          | 4/512 [00:00<01:59,  4.27it/s][A
generating:   1%|          | 5/512 [00:01<01:59,  4.25it/s][A
generating:   1%|          | 6/512 [00:01<02:00,  4.20it/s][A
generating:   1%|▏         | 7/512 [00:01<02:00,  4.19it/s][A
generating:   2%|▏         | 8/512 [00:01<01:59,  4.22it/s][A
generating:   2%|▏         | 9/512 [00:02<01:58,  4.24it/s][A
generating:   2%|▏         | 10/512 [00:02<01:58,  4.24it/s][A
generating:   2%|▏         | 11/512 [00:02<01:58,  4.23it/s][A
generating:   2%|▏         | 12/512 [00:02<01:58,  4.23it/s][A
generating:   3%|▎         | 13/512 [00:03<02:01,  4.12it/s][A
generating:   3%|▎         | 14/512 [00:03<02:01,  4.11it/s][A
generating:   3%|▎         | 15/512 [00:03<02:01,  4.08it/s][A
generating:   3%|▎         | 16/512 [00:03<02:01

Vlada stroj
okamzite na Slovensko su zivotnej vyznamnej spravodlivejsia pripravu vyziadaju pozicii voci a
monitoringovej spolocnosti.
Strategislativne v roku 2002 by pri prevadzkovanie zmluve moze na Slovensku bankam v nedokaze ciel
v spolocnosti Afrike zaviedlo konstrukcii
podporu v novembri 2002 vykonania marketingov v prezidenta Stern. Podiel
v auguste 128 miliony eur, ktore su ziadne priblizne 250 milionov eur,
pre letectvo (1988 miliona eur. Spolu so svojich realitou 12 miliardy
eur, ktore sa n


training:   3%|▎         | 2002/65500 [8:57:09<734:38:36, 41.65s/it]

training loss: 1.0103174448013306


training:   3%|▎         | 2003/65500 [8:57:24<597:23:10, 33.87s/it]

training loss: 1.0813846588134766


training:   3%|▎         | 2004/65500 [8:57:40<501:21:21, 28.43s/it]

training loss: 0.5892318487167358


training:   3%|▎         | 2005/65500 [8:57:56<434:03:45, 24.61s/it]

training loss: 0.5459064245223999


training:   3%|▎         | 2006/65500 [8:58:12<387:02:49, 21.94s/it]

training loss: 1.0245102643966675


training:   3%|▎         | 2007/65500 [8:58:27<354:05:14, 20.08s/it]

training loss: 1.0702978372573853


training:   3%|▎         | 2008/65500 [8:58:43<330:58:47, 18.77s/it]

training loss: 1.1081154346466064


training:   3%|▎         | 2009/65500 [8:58:59<314:51:14, 17.85s/it]

training loss: 0.7179577350616455


training:   3%|▎         | 2010/65500 [8:59:14<303:30:17, 17.21s/it]

training loss: 0.8235871195793152


training:   3%|▎         | 2011/65500 [8:59:30<295:40:32, 16.77s/it]

training loss: 0.7669873237609863


training:   3%|▎         | 2012/65500 [8:59:46<290:07:25, 16.45s/it]

training loss: 0.5971741676330566


training:   3%|▎         | 2013/65500 [9:00:02<286:12:56, 16.23s/it]

training loss: 0.9913367033004761


training:   3%|▎         | 2014/65500 [9:00:17<283:36:24, 16.08s/it]

training loss: 0.8518463373184204


training:   3%|▎         | 2015/65500 [9:00:33<282:00:18, 15.99s/it]

training loss: 0.6929695010185242


training:   3%|▎         | 2016/65500 [9:00:49<280:39:06, 15.91s/it]

training loss: 0.7533357739448547


training:   3%|▎         | 2017/65500 [9:01:05<279:38:21, 15.86s/it]

training loss: 0.579357385635376


training:   3%|▎         | 2018/65500 [9:01:20<278:57:46, 15.82s/it]

training loss: 0.7486516833305359


training:   3%|▎         | 2019/65500 [9:01:36<278:28:26, 15.79s/it]

training loss: 0.9745311737060547


training:   3%|▎         | 2020/65500 [9:01:52<278:21:09, 15.79s/it]

training loss: 0.9807261228561401


training:   3%|▎         | 2021/65500 [9:02:07<277:58:57, 15.76s/it]

training loss: 0.7936477661132812


training:   3%|▎         | 2022/65500 [9:02:23<277:44:08, 15.75s/it]

training loss: 0.7589339017868042


training:   3%|▎         | 2023/65500 [9:02:39<277:31:46, 15.74s/it]

training loss: 0.8740191459655762


training:   3%|▎         | 2024/65500 [9:02:55<277:24:23, 15.73s/it]

training loss: 0.4072358310222626


training:   3%|▎         | 2025/65500 [9:03:10<277:16:49, 15.73s/it]

training loss: 0.7370973229408264


training:   3%|▎         | 2026/65500 [9:03:26<277:13:32, 15.72s/it]

training loss: 0.9416020512580872


training:   3%|▎         | 2027/65500 [9:03:42<277:10:51, 15.72s/it]

training loss: 0.9274142980575562


training:   3%|▎         | 2028/65500 [9:03:57<277:07:19, 15.72s/it]

training loss: 0.8099976778030396


training:   3%|▎         | 2029/65500 [9:04:13<277:07:15, 15.72s/it]

training loss: 0.5586878657341003


training:   3%|▎         | 2030/65500 [9:04:29<277:06:17, 15.72s/it]

training loss: 0.9911842942237854


training:   3%|▎         | 2031/65500 [9:04:45<277:07:28, 15.72s/it]

training loss: 1.1113464832305908


training:   3%|▎         | 2032/65500 [9:05:00<277:02:05, 15.71s/it]

training loss: 1.3503353595733643


training:   3%|▎         | 2033/65500 [9:05:16<277:02:57, 15.71s/it]

training loss: 1.0534162521362305


training:   3%|▎         | 2034/65500 [9:05:32<276:59:34, 15.71s/it]

training loss: 0.9587599635124207


training:   3%|▎         | 2035/65500 [9:05:47<276:59:54, 15.71s/it]

training loss: 0.8442657589912415


training:   3%|▎         | 2036/65500 [9:06:03<276:57:02, 15.71s/it]

training loss: 1.0894854068756104


training:   3%|▎         | 2037/65500 [9:06:19<276:55:52, 15.71s/it]

training loss: 0.3552091717720032


training:   3%|▎         | 2038/65500 [9:06:35<277:12:27, 15.73s/it]

training loss: 1.0198177099227905


training:   3%|▎         | 2039/65500 [9:06:50<277:11:09, 15.72s/it]

training loss: 0.6130043268203735


training:   3%|▎         | 2040/65500 [9:07:06<277:05:41, 15.72s/it]

training loss: 0.7678931355476379


training:   3%|▎         | 2041/65500 [9:07:22<277:06:52, 15.72s/it]

training loss: 0.9743889570236206


training:   3%|▎         | 2042/65500 [9:07:38<277:09:07, 15.72s/it]

training loss: 0.8634519577026367


training:   3%|▎         | 2043/65500 [9:07:53<277:26:27, 15.74s/it]

training loss: 1.1053402423858643


training:   3%|▎         | 2044/65500 [9:08:09<277:21:33, 15.74s/it]

training loss: 0.642000138759613


training:   3%|▎         | 2045/65500 [9:08:25<277:16:01, 15.73s/it]

training loss: 1.0334327220916748


training:   3%|▎         | 2046/65500 [9:08:40<277:08:49, 15.72s/it]

training loss: 0.6755473017692566


training:   3%|▎         | 2047/65500 [9:08:56<277:06:00, 15.72s/it]

training loss: 0.8025042414665222


training:   3%|▎         | 2048/65500 [9:09:12<277:01:36, 15.72s/it]

training loss: 1.0165197849273682


training:   3%|▎         | 2049/65500 [9:09:28<277:01:36, 15.72s/it]

training loss: 1.0695379972457886


training:   3%|▎         | 2050/65500 [9:09:43<277:01:50, 15.72s/it]

training loss: 0.799209713935852


training:   3%|▎         | 2051/65500 [9:09:59<276:59:48, 15.72s/it]

training loss: 0.5837011933326721


training:   3%|▎         | 2052/65500 [9:10:15<277:02:07, 15.72s/it]

training loss: 0.945843517780304


training:   3%|▎         | 2053/65500 [9:10:30<276:59:13, 15.72s/it]

training loss: 0.7411224246025085


training:   3%|▎         | 2054/65500 [9:10:46<276:56:09, 15.71s/it]

training loss: 0.7298141717910767


training:   3%|▎         | 2055/65500 [9:11:02<276:57:14, 15.71s/it]

training loss: 0.7220944762229919


training:   3%|▎         | 2056/65500 [9:11:18<276:56:41, 15.71s/it]

training loss: 0.5643661618232727


training:   3%|▎         | 2057/65500 [9:11:33<276:57:29, 15.72s/it]

training loss: 0.6922357678413391


training:   3%|▎         | 2058/65500 [9:11:49<277:00:26, 15.72s/it]

training loss: 1.2810698747634888


training:   3%|▎         | 2059/65500 [9:12:05<276:58:47, 15.72s/it]

training loss: 0.9883201718330383


training:   3%|▎         | 2060/65500 [9:12:20<276:58:50, 15.72s/it]

training loss: 0.5199484825134277


training:   3%|▎         | 2061/65500 [9:12:36<277:11:14, 15.73s/it]

training loss: 1.0850287675857544


training:   3%|▎         | 2062/65500 [9:12:52<277:13:51, 15.73s/it]

training loss: 0.9395614862442017


training:   3%|▎         | 2063/65500 [9:13:08<277:11:53, 15.73s/it]

training loss: 0.8377971649169922


training:   3%|▎         | 2064/65500 [9:13:23<277:06:53, 15.73s/it]

training loss: 0.8375002145767212


training:   3%|▎         | 2065/65500 [9:13:39<277:01:54, 15.72s/it]

training loss: 0.85462886095047


training:   3%|▎         | 2066/65500 [9:13:55<277:10:24, 15.73s/it]

training loss: 0.6194261908531189


training:   3%|▎         | 2067/65500 [9:14:11<277:13:27, 15.73s/it]

training loss: 0.5783681869506836


training:   3%|▎         | 2068/65500 [9:14:26<277:07:11, 15.73s/it]

training loss: 0.6501566171646118


training:   3%|▎         | 2069/65500 [9:14:42<277:05:59, 15.73s/it]

training loss: 0.5655146241188049


training:   3%|▎         | 2070/65500 [9:14:58<277:04:32, 15.73s/it]

training loss: 0.9117249250411987


training:   3%|▎         | 2071/65500 [9:15:13<276:59:56, 15.72s/it]

training loss: 0.9824775457382202


training:   3%|▎         | 2072/65500 [9:15:29<276:55:05, 15.72s/it]

training loss: 0.7130814790725708


training:   3%|▎         | 2073/65500 [9:15:45<276:55:09, 15.72s/it]

training loss: 0.9392342567443848


training:   3%|▎         | 2074/65500 [9:16:01<276:55:16, 15.72s/it]

training loss: 1.0837128162384033


training:   3%|▎         | 2075/65500 [9:16:16<277:00:03, 15.72s/it]

training loss: 0.9645395874977112


training:   3%|▎         | 2076/65500 [9:16:32<276:57:38, 15.72s/it]

training loss: 0.7396681904792786


training:   3%|▎         | 2077/65500 [9:16:48<276:54:48, 15.72s/it]

training loss: 1.0108394622802734


training:   3%|▎         | 2078/65500 [9:17:03<276:49:48, 15.71s/it]

training loss: 1.0566014051437378


training:   3%|▎         | 2079/65500 [9:17:19<276:48:19, 15.71s/it]

training loss: 0.5102629661560059


training:   3%|▎         | 2080/65500 [9:17:35<276:49:50, 15.71s/it]

training loss: 1.0610547065734863


training:   3%|▎         | 2081/65500 [9:17:51<276:50:35, 15.72s/it]

training loss: 1.034725546836853


training:   3%|▎         | 2082/65500 [9:18:06<276:50:20, 15.72s/it]

training loss: 0.6485024094581604


training:   3%|▎         | 2083/65500 [9:18:22<276:50:20, 15.72s/it]

training loss: 0.8900042772293091


training:   3%|▎         | 2084/65500 [9:18:38<276:57:08, 15.72s/it]

training loss: 0.9044359922409058


training:   3%|▎         | 2085/65500 [9:18:54<277:01:37, 15.73s/it]

training loss: 1.0228379964828491


training:   3%|▎         | 2086/65500 [9:19:09<276:58:25, 15.72s/it]

training loss: 0.6236338019371033


training:   3%|▎         | 2087/65500 [9:19:25<276:52:34, 15.72s/it]

training loss: 0.3276778757572174


training:   3%|▎         | 2088/65500 [9:19:41<276:52:19, 15.72s/it]

training loss: 1.220870018005371


training:   3%|▎         | 2089/65500 [9:19:56<276:58:59, 15.73s/it]

training loss: 1.019423484802246


training:   3%|▎         | 2090/65500 [9:20:12<277:19:27, 15.74s/it]

training loss: 0.6837161779403687


training:   3%|▎         | 2091/65500 [9:20:28<277:10:44, 15.74s/it]

training loss: 1.0656577348709106


training:   3%|▎         | 2092/65500 [9:20:44<277:05:02, 15.73s/it]

training loss: 0.9454247355461121


training:   3%|▎         | 2093/65500 [9:20:59<276:57:13, 15.72s/it]

training loss: 0.5223406553268433


training:   3%|▎         | 2094/65500 [9:21:15<276:55:58, 15.72s/it]

training loss: 0.9740262627601624


training:   3%|▎         | 2095/65500 [9:21:31<276:52:48, 15.72s/it]

training loss: 0.4680160880088806


training:   3%|▎         | 2096/65500 [9:21:46<276:47:58, 15.72s/it]

training loss: 0.33838412165641785


training:   3%|▎         | 2097/65500 [9:22:02<276:46:03, 15.71s/it]

training loss: 0.6870330572128296


training:   3%|▎         | 2098/65500 [9:22:18<277:01:15, 15.73s/it]

training loss: 1.0221405029296875


training:   3%|▎         | 2099/65500 [9:22:34<277:16:43, 15.74s/it]

training loss: 0.8341817259788513


training:   3%|▎         | 2100/65500 [9:22:50<277:25:59, 15.75s/it]

training loss: 0.7790682315826416
training loss: 0.656960129737854


training:   3%|▎         | 2101/65500 [9:23:07<285:09:40, 16.19s/it]

validation loss: 1.5604510307312012


training:   3%|▎         | 2102/65500 [9:23:23<283:02:38, 16.07s/it]

training loss: 0.9603385925292969


training:   3%|▎         | 2103/65500 [9:23:38<281:29:29, 15.98s/it]

training loss: 0.7987312078475952


training:   3%|▎         | 2104/65500 [9:23:54<280:20:40, 15.92s/it]

training loss: 0.3893487751483917


training:   3%|▎         | 2105/65500 [9:24:10<279:30:28, 15.87s/it]

training loss: 1.022204875946045


training:   3%|▎         | 2106/65500 [9:24:26<278:54:45, 15.84s/it]

training loss: 1.121220588684082


training:   3%|▎         | 2107/65500 [9:24:41<278:37:56, 15.82s/it]

training loss: 0.8910140991210938


training:   3%|▎         | 2108/65500 [9:24:57<278:31:35, 15.82s/it]

training loss: 0.4507741928100586


training:   3%|▎         | 2109/65500 [9:25:13<278:13:02, 15.80s/it]

training loss: 0.9946587681770325


training:   3%|▎         | 2110/65500 [9:25:29<278:03:03, 15.79s/it]

training loss: 0.5052593350410461


training:   3%|▎         | 2111/65500 [9:25:44<277:51:40, 15.78s/it]

training loss: 0.4525327980518341


training:   3%|▎         | 2112/65500 [9:26:00<277:40:02, 15.77s/it]

training loss: 1.0247879028320312


training:   3%|▎         | 2113/65500 [9:26:16<277:44:32, 15.77s/it]

training loss: 0.9753322601318359


training:   3%|▎         | 2114/65500 [9:26:32<277:28:14, 15.76s/it]

training loss: 0.8971514105796814


training:   3%|▎         | 2115/65500 [9:26:47<277:16:33, 15.75s/it]

training loss: 0.7397889494895935


training:   3%|▎         | 2116/65500 [9:27:03<277:01:25, 15.73s/it]

training loss: 1.0465470552444458


training:   3%|▎         | 2117/65500 [9:27:19<276:54:57, 15.73s/it]

training loss: 0.9214146733283997


training:   3%|▎         | 2118/65500 [9:27:35<276:54:21, 15.73s/it]

training loss: 0.8419744968414307


training:   3%|▎         | 2119/65500 [9:27:50<276:50:13, 15.72s/it]

training loss: 0.9200800061225891


training:   3%|▎         | 2120/65500 [9:28:06<276:47:06, 15.72s/it]

training loss: 0.5571843385696411


training:   3%|▎         | 2121/65500 [9:28:22<276:43:32, 15.72s/it]

training loss: 0.5696244835853577


training:   3%|▎         | 2122/65500 [9:28:37<276:42:50, 15.72s/it]

training loss: 0.6134849190711975


training:   3%|▎         | 2123/65500 [9:28:53<276:42:39, 15.72s/it]

training loss: 0.9835711121559143


training:   3%|▎         | 2124/65500 [9:29:09<276:42:15, 15.72s/it]

training loss: 0.6362316608428955


training:   3%|▎         | 2125/65500 [9:29:25<276:40:35, 15.72s/it]

training loss: 0.8783659338951111


training:   3%|▎         | 2126/65500 [9:29:40<276:41:49, 15.72s/it]

training loss: 0.616973340511322


training:   3%|▎         | 2127/65500 [9:29:56<276:38:07, 15.71s/it]

training loss: 0.8270028233528137


training:   3%|▎         | 2128/65500 [9:30:12<276:42:27, 15.72s/it]

training loss: 0.9180542826652527


training:   3%|▎         | 2129/65500 [9:30:27<276:44:19, 15.72s/it]

training loss: 0.7468584179878235


training:   3%|▎         | 2130/65500 [9:30:43<276:43:22, 15.72s/it]

training loss: 1.3332175016403198


training:   3%|▎         | 2131/65500 [9:30:59<276:56:15, 15.73s/it]

training loss: 0.8175947666168213


training:   3%|▎         | 2132/65500 [9:31:15<276:52:27, 15.73s/it]

training loss: 0.7684244513511658


training:   3%|▎         | 2133/65500 [9:31:30<276:45:59, 15.72s/it]

training loss: 0.8183899521827698


training:   3%|▎         | 2134/65500 [9:31:46<276:47:28, 15.73s/it]

training loss: 0.9546276926994324


training:   3%|▎         | 2135/65500 [9:32:02<276:45:07, 15.72s/it]

training loss: 0.8259277939796448


training:   3%|▎         | 2136/65500 [9:32:18<277:00:02, 15.74s/it]

training loss: 0.7818797826766968


training:   3%|▎         | 2137/65500 [9:32:33<276:55:51, 15.73s/it]

training loss: 1.0762253999710083


training:   3%|▎         | 2138/65500 [9:32:49<276:50:46, 15.73s/it]

training loss: 0.9318807721138


training:   3%|▎         | 2139/65500 [9:33:05<276:49:27, 15.73s/it]

training loss: 0.7286247611045837


training:   3%|▎         | 2140/65500 [9:33:21<276:45:02, 15.72s/it]

training loss: 0.8770737648010254


training:   3%|▎         | 2141/65500 [9:33:36<276:42:43, 15.72s/it]

training loss: 0.826087474822998


training:   3%|▎         | 2142/65500 [9:33:52<276:41:29, 15.72s/it]

training loss: 0.7561818361282349


training:   3%|▎         | 2143/65500 [9:34:08<276:39:55, 15.72s/it]

training loss: 0.5515176653862


training:   3%|▎         | 2144/65500 [9:34:23<276:41:34, 15.72s/it]

training loss: 0.8432557582855225


training:   3%|▎         | 2145/65500 [9:34:39<276:38:48, 15.72s/it]

training loss: 1.056422472000122


training:   3%|▎         | 2146/65500 [9:34:55<276:34:49, 15.72s/it]

training loss: 0.5523490309715271


training:   3%|▎         | 2147/65500 [9:35:11<276:31:56, 15.71s/it]

training loss: 0.67194664478302


training:   3%|▎         | 2148/65500 [9:35:26<276:33:16, 15.72s/it]

training loss: 0.5386155247688293


training:   3%|▎         | 2149/65500 [9:35:42<276:35:27, 15.72s/it]

training loss: 0.8889537453651428


training:   3%|▎         | 2150/65500 [9:35:58<276:30:53, 15.71s/it]

training loss: 1.2497963905334473


training:   3%|▎         | 2151/65500 [9:36:13<276:33:36, 15.72s/it]

training loss: 0.8513182401657104


training:   3%|▎         | 2152/65500 [9:36:29<276:32:47, 15.72s/it]

training loss: 0.8550111055374146


training:   3%|▎         | 2153/65500 [9:36:45<276:32:30, 15.72s/it]

training loss: 0.6269601583480835


training:   3%|▎         | 2154/65500 [9:37:01<276:47:42, 15.73s/it]

training loss: 0.7040309309959412


training:   3%|▎         | 2155/65500 [9:37:16<276:50:37, 15.73s/it]

training loss: 0.5285091400146484


training:   3%|▎         | 2156/65500 [9:37:32<276:48:43, 15.73s/it]

training loss: 0.6983048319816589


training:   3%|▎         | 2157/65500 [9:37:48<276:44:55, 15.73s/it]

training loss: 0.8466945886611938


training:   3%|▎         | 2158/65500 [9:38:04<276:48:21, 15.73s/it]

training loss: 1.2802691459655762


training:   3%|▎         | 2159/65500 [9:38:19<276:56:28, 15.74s/it]

training loss: 0.8359729051589966


training:   3%|▎         | 2160/65500 [9:38:35<276:56:55, 15.74s/it]

training loss: 1.1892402172088623


training:   3%|▎         | 2161/65500 [9:38:51<276:49:56, 15.73s/it]

training loss: 0.915878415107727


training:   3%|▎         | 2162/65500 [9:39:06<276:41:23, 15.73s/it]

training loss: 1.210669994354248


training:   3%|▎         | 2163/65500 [9:39:22<276:34:09, 15.72s/it]

training loss: 0.45313870906829834


training:   3%|▎         | 2164/65500 [9:39:38<276:31:04, 15.72s/it]

training loss: 0.8049739003181458


training:   3%|▎         | 2165/65500 [9:39:54<276:29:04, 15.72s/it]

training loss: 0.6889792084693909


training:   3%|▎         | 2166/65500 [9:40:09<276:30:08, 15.72s/it]

training loss: 0.9744337797164917


training:   3%|▎         | 2167/65500 [9:40:25<276:31:21, 15.72s/it]

training loss: 0.7079949975013733


training:   3%|▎         | 2168/65500 [9:40:41<276:32:46, 15.72s/it]

training loss: 0.8647562265396118


training:   3%|▎         | 2169/65500 [9:40:56<276:31:50, 15.72s/it]

training loss: 1.1290942430496216


training:   3%|▎         | 2170/65500 [9:41:12<276:29:51, 15.72s/it]

training loss: 0.7982971668243408


training:   3%|▎         | 2171/65500 [9:41:28<276:28:55, 15.72s/it]

training loss: 0.7865250110626221


training:   3%|▎         | 2172/65500 [9:41:44<276:28:54, 15.72s/it]

training loss: 0.48048946261405945


training:   3%|▎         | 2173/65500 [9:41:59<276:24:16, 15.71s/it]

training loss: 1.1673401594161987


training:   3%|▎         | 2174/65500 [9:42:15<276:26:47, 15.72s/it]

training loss: 0.7481573224067688


training:   3%|▎         | 2175/65500 [9:42:31<276:29:11, 15.72s/it]

training loss: 1.1020386219024658


training:   3%|▎         | 2176/65500 [9:42:46<276:25:59, 15.72s/it]

training loss: 0.6507364511489868


training:   3%|▎         | 2177/65500 [9:43:02<276:23:21, 15.71s/it]

training loss: 0.6075560450553894


training:   3%|▎         | 2178/65500 [9:43:18<276:38:12, 15.73s/it]

training loss: 0.9941326975822449


training:   3%|▎         | 2179/65500 [9:43:34<276:40:00, 15.73s/it]

training loss: 0.8994782567024231


training:   3%|▎         | 2180/65500 [9:43:49<276:35:54, 15.73s/it]

training loss: 0.7324604988098145


training:   3%|▎         | 2181/65500 [9:44:05<276:36:19, 15.73s/it]

training loss: 0.7949970960617065


training:   3%|▎         | 2182/65500 [9:44:21<276:37:32, 15.73s/it]

training loss: 0.8266555666923523


training:   3%|▎         | 2183/65500 [9:44:37<276:53:35, 15.74s/it]

training loss: 0.7940347194671631


training:   3%|▎         | 2184/65500 [9:44:52<276:44:51, 15.74s/it]

training loss: 0.862629771232605


training:   3%|▎         | 2185/65500 [9:45:08<276:37:41, 15.73s/it]

training loss: 0.9317734241485596


training:   3%|▎         | 2186/65500 [9:45:24<276:33:41, 15.73s/it]

training loss: 0.8200082778930664


training:   3%|▎         | 2187/65500 [9:45:39<276:30:51, 15.72s/it]

training loss: 0.8635084629058838


training:   3%|▎         | 2188/65500 [9:45:55<276:32:18, 15.72s/it]

training loss: 0.5779892206192017


training:   3%|▎         | 2189/65500 [9:46:11<276:29:00, 15.72s/it]

training loss: 1.1914100646972656


training:   3%|▎         | 2190/65500 [9:46:27<276:26:51, 15.72s/it]

training loss: 1.1246367692947388


training:   3%|▎         | 2191/65500 [9:46:42<276:29:11, 15.72s/it]

training loss: 0.6460100412368774


training:   3%|▎         | 2192/65500 [9:46:58<276:26:51, 15.72s/it]

training loss: 0.670577883720398


training:   3%|▎         | 2193/65500 [9:47:14<276:23:21, 15.72s/it]

training loss: 0.46078214049339294


training:   3%|▎         | 2194/65500 [9:47:30<276:21:10, 15.72s/it]

training loss: 0.9031998515129089


training:   3%|▎         | 2195/65500 [9:47:45<276:19:09, 15.71s/it]

training loss: 0.8069390654563904


training:   3%|▎         | 2196/65500 [9:48:01<276:19:00, 15.71s/it]

training loss: 0.7973242998123169


training:   3%|▎         | 2197/65500 [9:48:17<276:23:05, 15.72s/it]

training loss: 0.8792522549629211


training:   3%|▎         | 2198/65500 [9:48:32<276:57:18, 15.75s/it]

training loss: 0.803790807723999


training:   3%|▎         | 2199/65500 [9:48:48<276:43:41, 15.74s/it]

training loss: 0.9269124865531921


training:   3%|▎         | 2200/65500 [9:49:04<276:39:28, 15.73s/it]

training loss: 0.9539657235145569
training loss: 0.6882703900337219


training:   3%|▎         | 2201/65500 [9:49:21<284:08:28, 16.16s/it]

validation loss: 0.9775879979133606


training:   3%|▎         | 2202/65500 [9:49:37<281:55:03, 16.03s/it]

training loss: 1.0877193212509155


training:   3%|▎         | 2203/65500 [9:49:53<280:17:27, 15.94s/it]

training loss: 0.9364959001541138


training:   3%|▎         | 2204/65500 [9:50:08<279:04:15, 15.87s/it]

training loss: 0.7866029143333435


training:   3%|▎         | 2205/65500 [9:50:24<278:16:29, 15.83s/it]

training loss: 0.7174149751663208


training:   3%|▎         | 2206/65500 [9:50:40<278:00:05, 15.81s/it]

training loss: 1.0774743556976318


training:   3%|▎         | 2207/65500 [9:50:55<277:33:22, 15.79s/it]

training loss: 0.688109815120697


training:   3%|▎         | 2208/65500 [9:51:11<277:14:41, 15.77s/it]

training loss: 1.067771553993225


training:   3%|▎         | 2209/65500 [9:51:27<276:55:34, 15.75s/it]

training loss: 0.6994652152061462


training:   3%|▎         | 2210/65500 [9:51:43<276:46:26, 15.74s/it]

training loss: 0.7090480327606201


training:   3%|▎         | 2211/65500 [9:51:58<276:36:56, 15.73s/it]

training loss: 0.6654512286186218


training:   3%|▎         | 2212/65500 [9:52:14<276:29:29, 15.73s/it]

training loss: 0.5449860095977783


training:   3%|▎         | 2213/65500 [9:52:30<276:42:08, 15.74s/it]

training loss: 0.6526089310646057


training:   3%|▎         | 2214/65500 [9:52:46<276:54:45, 15.75s/it]

training loss: 0.8841606378555298


training:   3%|▎         | 2215/65500 [9:53:01<276:55:36, 15.75s/it]

training loss: 0.5165001153945923


training:   3%|▎         | 2216/65500 [9:53:17<277:01:29, 15.76s/it]

training loss: 0.7144981026649475


training:   3%|▎         | 2217/65500 [9:53:33<277:03:11, 15.76s/it]

training loss: 1.1180987358093262


training:   3%|▎         | 2218/65500 [9:53:49<277:09:29, 15.77s/it]

training loss: 0.7745280265808105


training:   3%|▎         | 2219/65500 [9:54:04<277:14:45, 15.77s/it]

training loss: 1.1307613849639893


training:   3%|▎         | 2220/65500 [9:54:20<277:17:03, 15.77s/it]

training loss: 1.0121852159500122


training:   3%|▎         | 2221/65500 [9:54:36<277:19:11, 15.78s/it]

training loss: 0.7977995872497559


training:   3%|▎         | 2222/65500 [9:54:52<277:17:32, 15.78s/it]

training loss: 1.034857153892517


training:   3%|▎         | 2223/65500 [9:55:08<277:17:25, 15.78s/it]

training loss: 0.7729765176773071


training:   3%|▎         | 2224/65500 [9:55:23<277:32:00, 15.79s/it]

training loss: 0.7289391756057739


training:   3%|▎         | 2225/65500 [9:55:39<277:19:21, 15.78s/it]

training loss: 0.952629566192627


training:   3%|▎         | 2226/65500 [9:55:55<277:04:56, 15.76s/it]

training loss: 0.5595995783805847


training:   3%|▎         | 2227/65500 [9:56:11<276:51:46, 15.75s/it]

training loss: 1.0492160320281982


training:   3%|▎         | 2228/65500 [9:56:26<276:41:29, 15.74s/it]

training loss: 0.800209641456604


training:   3%|▎         | 2229/65500 [9:56:42<276:50:19, 15.75s/it]

training loss: 1.0443190336227417


training:   3%|▎         | 2230/65500 [9:56:58<276:45:44, 15.75s/it]

training loss: 0.8192847967147827


training:   3%|▎         | 2231/65500 [9:57:14<276:33:10, 15.74s/it]

training loss: 0.9730165004730225


training:   3%|▎         | 2232/65500 [9:57:29<276:26:43, 15.73s/it]

training loss: 0.6480625867843628


training:   3%|▎         | 2233/65500 [9:57:45<276:23:41, 15.73s/it]

training loss: 1.0084879398345947


training:   3%|▎         | 2234/65500 [9:58:01<276:17:27, 15.72s/it]

training loss: 0.6650991439819336


training:   3%|▎         | 2235/65500 [9:58:16<276:18:38, 15.72s/it]

training loss: 1.0275847911834717


training:   3%|▎         | 2236/65500 [9:58:32<276:15:18, 15.72s/it]

training loss: 0.7461975812911987


training:   3%|▎         | 2237/65500 [9:58:48<276:11:12, 15.72s/it]

training loss: 0.9177824258804321


training:   3%|▎         | 2238/65500 [9:59:04<276:08:09, 15.71s/it]

training loss: 0.816572904586792


training:   3%|▎         | 2239/65500 [9:59:19<276:08:29, 15.71s/it]

training loss: 0.9240767955780029


training:   3%|▎         | 2240/65500 [9:59:35<276:12:00, 15.72s/it]

training loss: 0.6205724477767944


training:   3%|▎         | 2241/65500 [9:59:51<276:10:21, 15.72s/it]

training loss: 1.0275746583938599


training:   3%|▎         | 2242/65500 [10:00:06<276:09:23, 15.72s/it]

training loss: 0.9942267537117004


training:   3%|▎         | 2243/65500 [10:00:22<276:09:15, 15.72s/it]

training loss: 0.7642964124679565


training:   3%|▎         | 2244/65500 [10:00:38<276:07:43, 15.71s/it]

training loss: 0.6178829669952393


training:   3%|▎         | 2245/65500 [10:00:54<276:05:50, 15.71s/it]

training loss: 1.0412441492080688


training:   3%|▎         | 2246/65500 [10:01:09<276:05:46, 15.71s/it]

training loss: 0.7001538276672363


training:   3%|▎         | 2247/65500 [10:01:25<276:19:13, 15.73s/it]

training loss: 0.7382435202598572


training:   3%|▎         | 2248/65500 [10:01:41<276:27:07, 15.73s/it]

training loss: 0.5868517160415649


training:   3%|▎         | 2249/65500 [10:01:57<276:26:16, 15.73s/it]

training loss: 0.9906915426254272


training:   3%|▎         | 2250/65500 [10:02:12<276:24:33, 15.73s/it]

training loss: 0.9148557782173157


training:   3%|▎         | 2251/65500 [10:02:28<276:20:26, 15.73s/it]

training loss: 0.8109449148178101


training:   3%|▎         | 2252/65500 [10:02:44<276:25:37, 15.73s/it]

training loss: 1.0442591905593872


training:   3%|▎         | 2253/65500 [10:02:59<276:29:32, 15.74s/it]

training loss: 0.6890387535095215


training:   3%|▎         | 2254/65500 [10:03:15<276:23:49, 15.73s/it]

training loss: 0.8651261329650879


training:   3%|▎         | 2255/65500 [10:03:31<276:21:25, 15.73s/it]

training loss: 0.9804447293281555


training:   3%|▎         | 2256/65500 [10:03:47<276:17:35, 15.73s/it]

training loss: 0.6744972467422485


training:   3%|▎         | 2257/65500 [10:04:02<276:10:11, 15.72s/it]

training loss: 0.7850404977798462


training:   3%|▎         | 2258/65500 [10:04:18<276:12:35, 15.72s/it]

training loss: 0.7594517469406128


training:   3%|▎         | 2259/65500 [10:04:34<276:11:56, 15.72s/it]

training loss: 1.114774227142334


training:   3%|▎         | 2260/65500 [10:04:50<276:11:52, 15.72s/it]

training loss: 0.859309196472168


training:   3%|▎         | 2261/65500 [10:05:05<276:09:35, 15.72s/it]

training loss: 1.0168250799179077


training:   3%|▎         | 2262/65500 [10:05:21<276:07:02, 15.72s/it]

training loss: 0.9074763059616089


training:   3%|▎         | 2263/65500 [10:05:37<276:04:53, 15.72s/it]

training loss: 0.8956613540649414


training:   3%|▎         | 2264/65500 [10:05:52<276:06:08, 15.72s/it]

training loss: 0.6325642466545105


training:   3%|▎         | 2265/65500 [10:06:08<276:03:46, 15.72s/it]

training loss: 1.1620368957519531


training:   3%|▎         | 2266/65500 [10:06:24<276:04:20, 15.72s/it]

training loss: 0.8693016171455383


training:   3%|▎         | 2267/65500 [10:06:40<276:04:37, 15.72s/it]

training loss: 1.0846586227416992


training:   3%|▎         | 2268/65500 [10:06:55<276:01:26, 15.71s/it]

training loss: 1.1036794185638428


training:   3%|▎         | 2269/65500 [10:07:11<275:59:38, 15.71s/it]

training loss: 1.298734426498413


training:   3%|▎         | 2270/65500 [10:07:27<276:01:23, 15.72s/it]

training loss: 0.7480896711349487


training:   3%|▎         | 2271/65500 [10:07:42<276:18:56, 15.73s/it]

training loss: 0.8210843801498413


training:   3%|▎         | 2272/65500 [10:07:58<276:15:54, 15.73s/it]

training loss: 0.8593921661376953


training:   3%|▎         | 2273/65500 [10:08:14<276:13:34, 15.73s/it]

training loss: 0.8975961804389954


training:   3%|▎         | 2274/65500 [10:08:30<276:15:27, 15.73s/it]

training loss: 0.9293222427368164


training:   3%|▎         | 2275/65500 [10:08:45<276:10:51, 15.73s/it]

training loss: 0.6466062664985657


training:   3%|▎         | 2276/65500 [10:09:01<276:25:21, 15.74s/it]

training loss: 0.9729242920875549


training:   3%|▎         | 2277/65500 [10:09:17<276:15:05, 15.73s/it]

training loss: 0.7856886386871338


training:   3%|▎         | 2278/65500 [10:09:33<276:10:35, 15.73s/it]

training loss: 0.8535647392272949


training:   3%|▎         | 2279/65500 [10:09:48<276:04:07, 15.72s/it]

training loss: 0.8758503794670105


training:   3%|▎         | 2280/65500 [10:10:04<276:02:30, 15.72s/it]

training loss: 0.9182687997817993


training:   3%|▎         | 2281/65500 [10:10:20<276:03:44, 15.72s/it]

training loss: 0.9931955337524414


training:   3%|▎         | 2282/65500 [10:10:35<276:06:15, 15.72s/it]

training loss: 0.8431840538978577


training:   3%|▎         | 2283/65500 [10:10:51<276:00:36, 15.72s/it]

training loss: 0.7455504536628723


training:   3%|▎         | 2284/65500 [10:11:07<275:59:51, 15.72s/it]

training loss: 0.7164242267608643


training:   3%|▎         | 2285/65500 [10:11:23<276:01:11, 15.72s/it]

training loss: 0.777248203754425


training:   3%|▎         | 2286/65500 [10:11:38<275:59:56, 15.72s/it]

training loss: 0.9029816389083862


training:   3%|▎         | 2287/65500 [10:11:54<275:58:05, 15.72s/it]

training loss: 0.8909836411476135


training:   3%|▎         | 2288/65500 [10:12:10<275:56:19, 15.72s/it]

training loss: 0.9169648885726929


training:   3%|▎         | 2289/65500 [10:12:25<275:53:14, 15.71s/it]

training loss: 0.8432979583740234


training:   3%|▎         | 2290/65500 [10:12:41<275:57:46, 15.72s/it]

training loss: 1.0539884567260742


training:   3%|▎         | 2291/65500 [10:12:57<275:53:28, 15.71s/it]

training loss: 0.8632112145423889


training:   3%|▎         | 2292/65500 [10:13:13<275:52:46, 15.71s/it]

training loss: 0.7555264234542847


training:   4%|▎         | 2293/65500 [10:13:28<275:50:38, 15.71s/it]

training loss: 1.2638318538665771


training:   4%|▎         | 2294/65500 [10:13:44<276:05:51, 15.73s/it]

training loss: 0.9214145541191101


training:   4%|▎         | 2295/65500 [10:14:00<276:03:26, 15.72s/it]

training loss: 1.2621067762374878


training:   4%|▎         | 2296/65500 [10:14:15<276:07:37, 15.73s/it]

training loss: 0.6321764588356018


training:   4%|▎         | 2297/65500 [10:14:31<276:03:48, 15.72s/it]

training loss: 0.9064144492149353


training:   4%|▎         | 2298/65500 [10:14:47<276:04:05, 15.72s/it]

training loss: 0.6939501762390137


training:   4%|▎         | 2299/65500 [10:15:03<276:18:21, 15.74s/it]

training loss: 0.442402720451355


training:   4%|▎         | 2300/65500 [10:15:18<276:11:52, 15.73s/it]

training loss: 1.0652858018875122
training loss: 0.839809000492096


training:   4%|▎         | 2301/65500 [10:15:36<283:38:05, 16.16s/it]

validation loss: 1.689278244972229


training:   4%|▎         | 2302/65500 [10:15:51<281:23:37, 16.03s/it]

training loss: 0.6155670285224915


training:   4%|▎         | 2303/65500 [10:16:07<279:44:14, 15.94s/it]

training loss: 0.7590980529785156


training:   4%|▎         | 2304/65500 [10:16:23<278:33:25, 15.87s/it]

training loss: 0.957984447479248


training:   4%|▎         | 2305/65500 [10:16:38<277:45:45, 15.82s/it]

training loss: 1.2132889032363892


training:   4%|▎         | 2306/65500 [10:16:54<277:08:58, 15.79s/it]

training loss: 0.7680781483650208


training:   4%|▎         | 2307/65500 [10:17:10<276:50:13, 15.77s/it]

training loss: 0.8243077397346497


training:   4%|▎         | 2308/65500 [10:17:26<276:32:01, 15.75s/it]

training loss: 1.1389721632003784


training:   4%|▎         | 2309/65500 [10:17:41<276:23:25, 15.75s/it]

training loss: 1.15668785572052


training:   4%|▎         | 2310/65500 [10:17:57<276:12:02, 15.74s/it]

training loss: 0.8737092018127441


training:   4%|▎         | 2311/65500 [10:18:13<276:11:28, 15.74s/it]

training loss: 0.9807791709899902


training:   4%|▎         | 2312/65500 [10:18:28<276:09:30, 15.73s/it]

training loss: 0.7410041093826294


training:   4%|▎         | 2313/65500 [10:18:44<276:02:10, 15.73s/it]

training loss: 0.7292166948318481


training:   4%|▎         | 2314/65500 [10:19:00<275:55:42, 15.72s/it]

training loss: 0.5515468120574951


training:   4%|▎         | 2315/65500 [10:19:16<275:53:08, 15.72s/it]

training loss: 1.1604046821594238


training:   4%|▎         | 2316/65500 [10:19:31<275:50:22, 15.72s/it]

training loss: 0.6130961775779724


training:   4%|▎         | 2317/65500 [10:19:47<276:04:31, 15.73s/it]

training loss: 0.9105532169342041


training:   4%|▎         | 2318/65500 [10:20:03<276:05:19, 15.73s/it]

training loss: 0.5462953448295593


training:   4%|▎         | 2319/65500 [10:20:19<276:06:56, 15.73s/it]

training loss: 0.8300316333770752


training:   4%|▎         | 2320/65500 [10:20:34<276:03:19, 15.73s/it]

training loss: 1.074573278427124


training:   4%|▎         | 2321/65500 [10:20:50<276:03:57, 15.73s/it]

training loss: 0.8772870898246765


training:   4%|▎         | 2322/65500 [10:21:06<276:12:56, 15.74s/it]

training loss: 0.6341190934181213


training:   4%|▎         | 2323/65500 [10:21:21<276:11:53, 15.74s/it]

training loss: 0.8082196712493896


training:   4%|▎         | 2324/65500 [10:21:37<276:09:21, 15.74s/it]

training loss: 0.7121701836585999


training:   4%|▎         | 2325/65500 [10:21:53<276:03:23, 15.73s/it]

training loss: 1.1252334117889404


training:   4%|▎         | 2326/65500 [10:22:09<276:20:43, 15.75s/it]

training loss: 0.751431941986084


training:   4%|▎         | 2327/65500 [10:22:25<276:32:37, 15.76s/it]

training loss: 1.122422695159912


training:   4%|▎         | 2328/65500 [10:22:40<276:39:48, 15.77s/it]

training loss: 0.9357620477676392


training:   4%|▎         | 2329/65500 [10:22:56<276:35:31, 15.76s/it]

training loss: 0.665964663028717


training:   4%|▎         | 2330/65500 [10:23:12<276:39:45, 15.77s/it]

training loss: 0.6030095815658569


training:   4%|▎         | 2331/65500 [10:23:28<276:43:42, 15.77s/it]

training loss: 1.0639221668243408


training:   4%|▎         | 2332/65500 [10:23:43<276:51:23, 15.78s/it]

training loss: 0.9618718028068542


training:   4%|▎         | 2333/65500 [10:23:59<276:46:20, 15.77s/it]

training loss: 0.9647849798202515


training:   4%|▎         | 2334/65500 [10:24:15<276:48:25, 15.78s/it]

training loss: 1.363431692123413


training:   4%|▎         | 2335/65500 [10:24:31<276:47:27, 15.78s/it]

training loss: 0.9106090664863586


training:   4%|▎         | 2336/65500 [10:24:46<276:45:38, 15.77s/it]

training loss: 0.7272512316703796


training:   4%|▎         | 2337/65500 [10:25:02<276:46:55, 15.78s/it]

training loss: 1.0387630462646484


training:   4%|▎         | 2338/65500 [10:25:18<276:45:14, 15.77s/it]

training loss: 0.9321944713592529


training:   4%|▎         | 2339/65500 [10:25:34<276:36:01, 15.77s/it]

training loss: 0.9551704525947571


training:   4%|▎         | 2340/65500 [10:25:50<276:33:46, 15.76s/it]

training loss: 1.0362111330032349


training:   4%|▎         | 2341/65500 [10:26:05<276:29:46, 15.76s/it]

training loss: 0.6554199457168579


training:   4%|▎         | 2342/65500 [10:26:21<276:18:30, 15.75s/it]

training loss: 0.585979163646698


training:   4%|▎         | 2343/65500 [10:26:37<276:11:26, 15.74s/it]

training loss: 0.9353023767471313


training:   4%|▎         | 2344/65500 [10:26:52<276:03:18, 15.74s/it]

training loss: 0.9915651679039001


training:   4%|▎         | 2345/65500 [10:27:08<276:05:25, 15.74s/it]

training loss: 0.870212972164154


training:   4%|▎         | 2346/65500 [10:27:24<276:11:23, 15.74s/it]

training loss: 0.9350847005844116


training:   4%|▎         | 2347/65500 [10:27:40<276:02:41, 15.74s/it]

training loss: 0.8852758407592773


training:   4%|▎         | 2348/65500 [10:27:55<275:53:25, 15.73s/it]

training loss: 0.8716777563095093


training:   4%|▎         | 2349/65500 [10:28:11<275:49:57, 15.72s/it]

training loss: 1.0712685585021973


training:   4%|▎         | 2350/65500 [10:28:27<275:48:26, 15.72s/it]

training loss: 0.636841893196106


training:   4%|▎         | 2351/65500 [10:28:43<275:45:08, 15.72s/it]

training loss: 0.7296550869941711


training:   4%|▎         | 2352/65500 [10:28:58<275:43:10, 15.72s/it]

training loss: 0.8487077951431274


training:   4%|▎         | 2353/65500 [10:29:14<275:44:18, 15.72s/it]

training loss: 0.8439211845397949


training:   4%|▎         | 2354/65500 [10:29:30<275:42:08, 15.72s/it]

training loss: 0.8524563908576965


training:   4%|▎         | 2355/65500 [10:29:45<275:38:51, 15.72s/it]

training loss: 0.9920006990432739


training:   4%|▎         | 2356/65500 [10:30:01<275:37:51, 15.71s/it]

training loss: 1.052972674369812


training:   4%|▎         | 2357/65500 [10:30:17<275:38:37, 15.72s/it]

training loss: 0.9005945920944214


training:   4%|▎         | 2358/65500 [10:30:33<275:39:04, 15.72s/it]

training loss: 0.8140830993652344


training:   4%|▎         | 2359/65500 [10:30:48<275:39:16, 15.72s/it]

training loss: 1.229047179222107


training:   4%|▎         | 2360/65500 [10:31:04<275:39:32, 15.72s/it]

training loss: 0.7983773946762085


training:   4%|▎         | 2361/65500 [10:31:20<275:40:15, 15.72s/it]

training loss: 0.905951976776123


training:   4%|▎         | 2362/65500 [10:31:35<275:38:22, 15.72s/it]

training loss: 0.574256956577301


training:   4%|▎         | 2363/65500 [10:31:51<275:42:20, 15.72s/it]

training loss: 1.1709561347961426


training:   4%|▎         | 2364/65500 [10:32:07<276:00:48, 15.74s/it]

training loss: 0.9416480660438538


training:   4%|▎         | 2365/65500 [10:32:23<275:59:01, 15.74s/it]

training loss: 0.947621762752533


training:   4%|▎         | 2366/65500 [10:32:38<276:01:10, 15.74s/it]

training loss: 1.0503904819488525


training:   4%|▎         | 2367/65500 [10:32:54<275:55:15, 15.73s/it]

training loss: 0.9206593036651611


training:   4%|▎         | 2368/65500 [10:33:10<275:51:40, 15.73s/it]

training loss: 1.149510383605957


training:   4%|▎         | 2369/65500 [10:33:26<276:04:27, 15.74s/it]

training loss: 0.5615657567977905


training:   4%|▎         | 2370/65500 [10:33:41<275:55:19, 15.73s/it]

training loss: 0.9682791829109192


training:   4%|▎         | 2371/65500 [10:33:57<275:46:47, 15.73s/it]

training loss: 0.4136960506439209


training:   4%|▎         | 2372/65500 [10:34:13<275:43:20, 15.72s/it]

training loss: 0.8707988858222961


training:   4%|▎         | 2373/65500 [10:34:28<275:41:28, 15.72s/it]

training loss: 0.6392266154289246


training:   4%|▎         | 2374/65500 [10:34:44<275:40:18, 15.72s/it]

training loss: 1.0628780126571655


training:   4%|▎         | 2375/65500 [10:35:00<275:36:48, 15.72s/it]

training loss: 0.5596920251846313


training:   4%|▎         | 2376/65500 [10:35:16<275:36:05, 15.72s/it]

training loss: 0.6818634271621704


training:   4%|▎         | 2377/65500 [10:35:31<275:36:30, 15.72s/it]

training loss: 0.6933057308197021


training:   4%|▎         | 2378/65500 [10:35:47<275:37:08, 15.72s/it]

training loss: 0.7302497029304504


training:   4%|▎         | 2379/65500 [10:36:03<275:38:37, 15.72s/it]

training loss: 0.83120197057724


training:   4%|▎         | 2380/65500 [10:36:19<275:38:22, 15.72s/it]

training loss: 0.6760707497596741


training:   4%|▎         | 2381/65500 [10:36:34<275:38:33, 15.72s/it]

training loss: 0.5602646470069885


training:   4%|▎         | 2382/65500 [10:36:50<275:38:59, 15.72s/it]

training loss: 1.098551630973816


training:   4%|▎         | 2383/65500 [10:37:06<275:40:14, 15.72s/it]

training loss: 1.1330287456512451


training:   4%|▎         | 2384/65500 [10:37:21<275:38:29, 15.72s/it]

training loss: 0.9081164002418518


training:   4%|▎         | 2385/65500 [10:37:37<275:38:01, 15.72s/it]

training loss: 0.8816810250282288


training:   4%|▎         | 2386/65500 [10:37:53<275:34:36, 15.72s/it]

training loss: 0.751786470413208


training:   4%|▎         | 2387/65500 [10:38:09<275:54:02, 15.74s/it]

training loss: 0.9084903001785278


training:   4%|▎         | 2388/65500 [10:38:24<275:49:52, 15.73s/it]

training loss: 0.7176337242126465


training:   4%|▎         | 2389/65500 [10:38:40<275:47:05, 15.73s/it]

training loss: 1.0341110229492188


training:   4%|▎         | 2390/65500 [10:38:56<275:45:01, 15.73s/it]

training loss: 1.138956904411316


training:   4%|▎         | 2391/65500 [10:39:12<275:43:35, 15.73s/it]

training loss: 1.0438143014907837


training:   4%|▎         | 2392/65500 [10:39:27<275:57:16, 15.74s/it]

training loss: 1.112060308456421


training:   4%|▎         | 2393/65500 [10:39:43<275:49:45, 15.73s/it]

training loss: 0.9642396569252014


training:   4%|▎         | 2394/65500 [10:39:59<275:44:22, 15.73s/it]

training loss: 0.5152890086174011


training:   4%|▎         | 2395/65500 [10:40:14<275:41:42, 15.73s/it]

training loss: 1.0757343769073486


training:   4%|▎         | 2396/65500 [10:40:30<275:40:10, 15.73s/it]

training loss: 0.7006642818450928


training:   4%|▎         | 2397/65500 [10:40:46<275:41:44, 15.73s/it]

training loss: 1.235775351524353


training:   4%|▎         | 2398/65500 [10:41:02<275:37:01, 15.72s/it]

training loss: 0.643944501876831


training:   4%|▎         | 2399/65500 [10:41:17<275:33:23, 15.72s/it]

training loss: 0.588901698589325


training:   4%|▎         | 2400/65500 [10:41:33<275:33:36, 15.72s/it]

training loss: 0.7282171249389648
training loss: 0.7663370966911316


training:   4%|▎         | 2401/65500 [10:41:50<282:59:56, 16.15s/it]

validation loss: 1.5142866373062134


training:   4%|▎         | 2402/65500 [10:42:06<280:55:43, 16.03s/it]

training loss: 0.8510820865631104


training:   4%|▎         | 2403/65500 [10:42:22<279:17:45, 15.94s/it]

training loss: 0.9759883880615234


training:   4%|▎         | 2404/65500 [10:42:37<278:13:17, 15.87s/it]

training loss: 0.45533519983291626


training:   4%|▎         | 2405/65500 [10:42:53<277:23:52, 15.83s/it]

training loss: 0.7585225701332092


training:   4%|▎         | 2406/65500 [10:43:09<276:47:46, 15.79s/it]

training loss: 0.6540449261665344


training:   4%|▎         | 2407/65500 [10:43:25<276:21:45, 15.77s/it]

training loss: 0.5807905197143555


training:   4%|▎         | 2408/65500 [10:43:40<276:06:14, 15.75s/it]

training loss: 0.775246262550354


training:   4%|▎         | 2409/65500 [10:43:56<275:55:41, 15.74s/it]

training loss: 0.7745107412338257


training:   4%|▎         | 2410/65500 [10:44:12<276:08:55, 15.76s/it]

training loss: 0.9267134070396423


training:   4%|▎         | 2411/65500 [10:44:28<275:59:35, 15.75s/it]

training loss: 0.5063561797142029


training:   4%|▎         | 2412/65500 [10:44:43<275:56:20, 15.75s/it]

training loss: 0.8774979710578918


training:   4%|▎         | 2413/65500 [10:44:59<275:52:29, 15.74s/it]

training loss: 1.056553840637207


training:   4%|▎         | 2414/65500 [10:45:15<275:47:49, 15.74s/it]

training loss: 0.8546731472015381


training:   4%|▎         | 2415/65500 [10:45:30<276:02:22, 15.75s/it]

training loss: 0.829089343547821


training:   4%|▎         | 2416/65500 [10:45:46<275:52:48, 15.74s/it]

training loss: 0.7781439423561096


training:   4%|▎         | 2417/65500 [10:46:02<275:44:14, 15.74s/it]

training loss: 0.9471044540405273


training:   4%|▎         | 2418/65500 [10:46:18<275:40:15, 15.73s/it]

training loss: 1.287726640701294


training:   4%|▎         | 2419/65500 [10:46:33<275:34:15, 15.73s/it]

training loss: 0.9218732714653015


training:   4%|▎         | 2420/65500 [10:46:49<275:31:18, 15.72s/it]

training loss: 0.8668855428695679


training:   4%|▎         | 2421/65500 [10:47:05<275:31:01, 15.72s/it]

training loss: 0.7859649658203125


training:   4%|▎         | 2422/65500 [10:47:21<275:27:03, 15.72s/it]

training loss: 0.726677656173706


training:   4%|▎         | 2423/65500 [10:47:36<275:29:26, 15.72s/it]

training loss: 1.0750149488449097


training:   4%|▎         | 2424/65500 [10:47:52<275:23:38, 15.72s/it]

training loss: 1.0569157600402832


training:   4%|▎         | 2425/65500 [10:48:08<275:25:13, 15.72s/it]

training loss: 0.869942843914032


training:   4%|▎         | 2426/65500 [10:48:23<275:25:51, 15.72s/it]

training loss: 1.2128642797470093


training:   4%|▎         | 2427/65500 [10:48:39<275:26:06, 15.72s/it]

training loss: 0.48398590087890625


training:   4%|▎         | 2428/65500 [10:48:55<275:28:17, 15.72s/it]

training loss: 0.89351487159729


training:   4%|▎         | 2429/65500 [10:49:11<275:25:49, 15.72s/it]

training loss: 0.5620192289352417


training:   4%|▎         | 2430/65500 [10:49:26<275:25:08, 15.72s/it]

training loss: 1.0783491134643555


training:   4%|▎         | 2431/65500 [10:49:42<275:25:56, 15.72s/it]

training loss: 0.37995582818984985


training:   4%|▎         | 2432/65500 [10:49:58<275:21:19, 15.72s/it]

training loss: 0.8293492197990417


training:   4%|▎         | 2433/65500 [10:50:13<275:39:00, 15.73s/it]

training loss: 0.24288716912269592


training:   4%|▎         | 2434/65500 [10:50:29<275:36:03, 15.73s/it]

training loss: 0.895152747631073


training:   4%|▎         | 2435/65500 [10:50:45<275:30:35, 15.73s/it]

training loss: 1.1503931283950806


training:   4%|▎         | 2436/65500 [10:51:01<275:30:03, 15.73s/it]

training loss: 0.8076258301734924


training:   4%|▎         | 2437/65500 [10:51:16<275:31:08, 15.73s/it]

training loss: 0.637610673904419


training:   4%|▎         | 2438/65500 [10:51:32<275:46:27, 15.74s/it]

training loss: 0.8518428802490234


training:   4%|▎         | 2439/65500 [10:51:48<275:39:42, 15.74s/it]

training loss: 0.7623754739761353


training:   4%|▎         | 2440/65500 [10:52:04<275:32:03, 15.73s/it]

training loss: 0.9972289204597473


training:   4%|▎         | 2441/65500 [10:52:19<275:31:40, 15.73s/it]

training loss: 0.901597797870636


training:   4%|▎         | 2442/65500 [10:52:35<275:34:41, 15.73s/it]

training loss: 0.8256208896636963


training:   4%|▎         | 2443/65500 [10:52:51<275:47:27, 15.75s/it]

training loss: 0.9667495489120483


training:   4%|▎         | 2444/65500 [10:53:07<275:56:51, 15.75s/it]

training loss: 1.108792781829834


training:   4%|▎         | 2445/65500 [10:53:22<276:00:14, 15.76s/it]

training loss: 1.110422968864441


training:   4%|▎         | 2446/65500 [10:53:38<276:06:07, 15.76s/it]

training loss: 0.9551178216934204


training:   4%|▎         | 2447/65500 [10:53:54<276:11:50, 15.77s/it]

training loss: 0.8923977017402649


training:   4%|▎         | 2448/65500 [10:54:10<276:16:27, 15.77s/it]

training loss: 1.111840844154358


training:   4%|▎         | 2449/65500 [10:54:26<276:15:53, 15.77s/it]

training loss: 0.9168646931648254


training:   4%|▎         | 2450/65500 [10:54:41<276:20:18, 15.78s/it]

training loss: 0.5333242416381836


training:   4%|▎         | 2451/65500 [10:54:57<276:20:36, 15.78s/it]

training loss: 0.6769660711288452


training:   4%|▎         | 2452/65500 [10:55:13<276:20:15, 15.78s/it]

training loss: 0.9218921661376953


training:   4%|▎         | 2453/65500 [10:55:29<276:15:11, 15.77s/it]

training loss: 0.6169149279594421


training:   4%|▎         | 2454/65500 [10:55:44<276:10:10, 15.77s/it]

training loss: 0.9060297608375549


training:   4%|▎         | 2455/65500 [10:56:00<276:04:19, 15.76s/it]

training loss: 0.9923973679542542


training:   4%|▎         | 2456/65500 [10:56:16<276:16:52, 15.78s/it]

training loss: 0.6007951498031616


training:   4%|▍         | 2457/65500 [10:56:32<276:03:56, 15.76s/it]

training loss: 0.9246493577957153


training:   4%|▍         | 2458/65500 [10:56:47<275:49:25, 15.75s/it]

training loss: 0.8643662929534912


training:   4%|▍         | 2459/65500 [10:57:03<275:37:30, 15.74s/it]

training loss: 0.833564817905426


training:   4%|▍         | 2460/65500 [10:57:19<275:30:35, 15.73s/it]

training loss: 0.5698974132537842


training:   4%|▍         | 2461/65500 [10:57:35<275:47:25, 15.75s/it]

training loss: 0.7235291600227356


training:   4%|▍         | 2462/65500 [10:57:50<275:38:48, 15.74s/it]

training loss: 1.1534643173217773


training:   4%|▍         | 2463/65500 [10:58:06<275:29:45, 15.73s/it]

training loss: 0.8071516752243042


training:   4%|▍         | 2464/65500 [10:58:22<275:26:04, 15.73s/it]

training loss: 0.808521032333374


training:   4%|▍         | 2465/65500 [10:58:37<275:23:08, 15.73s/it]

training loss: 0.705127477645874


training:   4%|▍         | 2466/65500 [10:58:53<275:18:53, 15.72s/it]

training loss: 0.38701626658439636


training:   4%|▍         | 2467/65500 [10:59:09<275:13:50, 15.72s/it]

training loss: 1.0860058069229126


training:   4%|▍         | 2468/65500 [10:59:25<275:14:15, 15.72s/it]

training loss: 0.7160642743110657


training:   4%|▍         | 2469/65500 [10:59:40<275:11:32, 15.72s/it]

training loss: 1.0963354110717773


training:   4%|▍         | 2470/65500 [10:59:56<275:09:24, 15.72s/it]

training loss: 0.8670767545700073


training:   4%|▍         | 2471/65500 [11:00:12<275:10:40, 15.72s/it]

training loss: 1.0750643014907837


training:   4%|▍         | 2472/65500 [11:00:28<275:11:38, 15.72s/it]

training loss: 1.000342607498169


training:   4%|▍         | 2473/65500 [11:00:43<275:13:43, 15.72s/it]

training loss: 0.9967367649078369


training:   4%|▍         | 2474/65500 [11:00:59<275:13:07, 15.72s/it]

training loss: 0.6800774931907654


training:   4%|▍         | 2475/65500 [11:01:15<275:13:03, 15.72s/it]

training loss: 0.9963151812553406


training:   4%|▍         | 2476/65500 [11:01:30<275:08:30, 15.72s/it]

training loss: 0.5267462134361267


training:   4%|▍         | 2477/65500 [11:01:46<275:13:34, 15.72s/it]

training loss: 0.758672833442688


training:   4%|▍         | 2478/65500 [11:02:02<275:12:08, 15.72s/it]

training loss: 0.7054165005683899


training:   4%|▍         | 2479/65500 [11:02:18<275:32:32, 15.74s/it]

training loss: 1.0746594667434692


training:   4%|▍         | 2480/65500 [11:02:33<275:30:35, 15.74s/it]

training loss: 0.8968249559402466


training:   4%|▍         | 2481/65500 [11:02:49<275:25:48, 15.73s/it]

training loss: 0.9915162324905396


training:   4%|▍         | 2482/65500 [11:03:05<275:21:22, 15.73s/it]

training loss: 0.542512059211731


training:   4%|▍         | 2483/65500 [11:03:21<275:18:26, 15.73s/it]

training loss: 0.920208215713501


training:   4%|▍         | 2484/65500 [11:03:36<275:40:04, 15.75s/it]

training loss: 0.8427106738090515


training:   4%|▍         | 2485/65500 [11:03:52<275:32:05, 15.74s/it]

training loss: 0.7901990413665771


training:   4%|▍         | 2486/65500 [11:04:08<275:59:17, 15.77s/it]

training loss: 1.1170181035995483


training:   4%|▍         | 2487/65500 [11:04:24<275:42:55, 15.75s/it]

training loss: 1.1881166696548462


training:   4%|▍         | 2488/65500 [11:04:39<275:32:22, 15.74s/it]

training loss: 0.7091671228408813


training:   4%|▍         | 2489/65500 [11:04:55<275:23:35, 15.73s/it]

training loss: 1.2693501710891724


training:   4%|▍         | 2490/65500 [11:05:11<275:18:06, 15.73s/it]

training loss: 0.8919464349746704


training:   4%|▍         | 2491/65500 [11:05:26<275:12:04, 15.72s/it]

training loss: 0.46552708745002747


training:   4%|▍         | 2492/65500 [11:05:42<275:08:48, 15.72s/it]

training loss: 0.9046268463134766


training:   4%|▍         | 2493/65500 [11:05:58<275:06:22, 15.72s/it]

training loss: 1.1157901287078857


training:   4%|▍         | 2494/65500 [11:06:14<275:08:03, 15.72s/it]

training loss: 1.1778299808502197


training:   4%|▍         | 2495/65500 [11:06:29<275:05:50, 15.72s/it]

training loss: 1.073132872581482


training:   4%|▍         | 2496/65500 [11:06:45<275:08:00, 15.72s/it]

training loss: 0.8087872862815857


training:   4%|▍         | 2497/65500 [11:07:01<275:05:12, 15.72s/it]

training loss: 0.8822349309921265


training:   4%|▍         | 2498/65500 [11:07:16<275:03:14, 15.72s/it]

training loss: 0.73171466588974


training:   4%|▍         | 2499/65500 [11:07:32<275:01:01, 15.72s/it]

training loss: 0.8972334265708923


training:   4%|▍         | 2500/65500 [11:07:48<275:03:11, 15.72s/it]

training loss: 0.6549447178840637
training loss: 0.7203300595283508



generating:   0%|          | 0/512 [00:00<?, ?it/s][A

validation loss: 1.3459360599517822
ko do
clenskych statov EU tovar v hodnote 777,3 miliardy eur a hodnota dovozu
z EU dosiahla 631,3 miliardy eur. V porovnani s rokom 2018 sa export do
Unie znizil o 0,2 % a dovoz sa zvysil o 1,3 %.
Z toho export do krajin eurozony v roku 2019 klesol o 0,1 % na
491,8 miliardy eur a import vzrastol o 0,8 % na 409,1 miliardy eur.
Export z Nemecka mimo EU (do tzv. tretich krajin) dosiahol v minulom
roku 550,3 miliardy eur a import 472,8 miliardy EUR. V porovnani s rokom
2018 sa vyvoz do tretich krajin zvysil o 2,2 % a dovoz o 1,6 %.
V samotnom decembri 2019 Nemecko zvysilo export medzirocne o 2,3 % na
98 miliard eur a import o 1,2 % na 82,8 miliardy eur.
V medzimesacnom porovnani a po uprave kalendara a sezonnosti sa
decembrovy vyvoz zvysil len o 0,1 % a dovoz o 0,7 %.
Destatis zaroven poukazal na predbezne udaje nemeckej centralnej banky
Bundesbank, podla ktorych sa bezny ucet platobnej bilancie Nemecka v roku
2019 skoncil s prebytkom 266,


generating:   0%|          | 1/512 [00:00<02:03,  4.15it/s][A
generating:   0%|          | 2/512 [00:00<02:02,  4.17it/s][A
generating:   1%|          | 3/512 [00:00<02:01,  4.20it/s][A
generating:   1%|          | 4/512 [00:00<02:00,  4.21it/s][A
generating:   1%|          | 5/512 [00:01<02:01,  4.17it/s][A
generating:   1%|          | 6/512 [00:01<02:01,  4.17it/s][A
generating:   1%|▏         | 7/512 [00:01<01:59,  4.23it/s][A
generating:   2%|▏         | 8/512 [00:01<01:58,  4.25it/s][A
generating:   2%|▏         | 9/512 [00:02<01:57,  4.28it/s][A
generating:   2%|▏         | 10/512 [00:02<01:58,  4.24it/s][A
generating:   2%|▏         | 11/512 [00:02<01:58,  4.23it/s][A
generating:   2%|▏         | 12/512 [00:02<01:57,  4.24it/s][A
generating:   3%|▎         | 13/512 [00:03<01:56,  4.27it/s][A
generating:   3%|▎         | 14/512 [00:03<01:56,  4.28it/s][A
generating:   3%|▎         | 15/512 [00:03<02:00,  4.14it/s][A
generating:   3%|▎         | 16/512 [00:03<02:02

rozsirit znizenu sadzbu dane z motorovych vozidiel.
Za ludi a podporne podla Tomasa
Meraveho zlavu o znizenu sadzbu DPH na vsetky potravin pre
agenturu SITA. Ambiciou narodniari s tym, ze vsak tento rok vzrastol, a to
v predchadzaju velkych stropovat clen za Slovensko.
Ak par dosiahnut danove zatazenie
na strane ponuke v roku 2002 sa znizilo z europskou unie od predstavovani danoveho
odvodu zamestnanosti a transparenty rok.V danovemu
rovnosti vypocte zamestnanosti pre ludi bude zamerana na spravodli


training:   4%|▍         | 2502/65500 [11:10:23<730:56:25, 41.77s/it]

training loss: 1.0552784204483032


training:   4%|▍         | 2503/65500 [11:10:39<594:08:29, 33.95s/it]

training loss: 0.886872410774231


training:   4%|▍         | 2504/65500 [11:10:55<498:25:37, 28.48s/it]

training loss: 1.0275083780288696


training:   4%|▍         | 2505/65500 [11:11:11<431:26:48, 24.66s/it]

training loss: 0.4502279758453369


training:   4%|▍         | 2506/65500 [11:11:26<384:32:57, 21.98s/it]

training loss: 0.8718219995498657


training:   4%|▍         | 2507/65500 [11:11:42<351:42:49, 20.10s/it]

training loss: 0.6421607732772827


training:   4%|▍         | 2508/65500 [11:11:58<328:40:54, 18.78s/it]

training loss: 0.8979093432426453


training:   4%|▍         | 2509/65500 [11:12:13<312:35:04, 17.86s/it]

training loss: 0.7823082208633423


training:   4%|▍         | 2510/65500 [11:12:29<301:22:46, 17.22s/it]

training loss: 0.908308744430542


training:   4%|▍         | 2511/65500 [11:12:45<293:31:11, 16.78s/it]

training loss: 1.0021202564239502


training:   4%|▍         | 2512/65500 [11:13:01<287:57:16, 16.46s/it]

training loss: 0.5102494955062866


training:   4%|▍         | 2513/65500 [11:13:16<284:05:47, 16.24s/it]

training loss: 0.7947309017181396


training:   4%|▍         | 2514/65500 [11:13:32<281:23:39, 16.08s/it]

training loss: 0.9956367015838623


training:   4%|▍         | 2515/65500 [11:13:48<279:26:53, 15.97s/it]

training loss: 0.9283784031867981


training:   4%|▍         | 2516/65500 [11:14:03<278:08:54, 15.90s/it]

training loss: 0.8716835379600525


training:   4%|▍         | 2517/65500 [11:14:19<277:26:12, 15.86s/it]

training loss: 0.5054746270179749


training:   4%|▍         | 2518/65500 [11:14:35<277:00:20, 15.83s/it]

training loss: 1.3229210376739502


training:   4%|▍         | 2519/65500 [11:14:51<276:30:21, 15.81s/it]

training loss: 0.6525393724441528


training:   4%|▍         | 2520/65500 [11:15:06<276:02:25, 15.78s/it]

training loss: 0.6434910297393799


training:   4%|▍         | 2521/65500 [11:15:22<275:45:33, 15.76s/it]

training loss: 0.9095832705497742


training:   4%|▍         | 2522/65500 [11:15:38<275:44:25, 15.76s/it]

training loss: 0.6642030477523804


training:   4%|▍         | 2523/65500 [11:15:54<275:42:24, 15.76s/it]

training loss: 0.8898622393608093


training:   4%|▍         | 2524/65500 [11:16:09<275:31:16, 15.75s/it]

training loss: 0.7712453007698059


training:   4%|▍         | 2525/65500 [11:16:25<275:23:54, 15.74s/it]

training loss: 0.5161725282669067


training:   4%|▍         | 2526/65500 [11:16:41<275:17:11, 15.74s/it]

training loss: 0.9878097772598267


training:   4%|▍         | 2527/65500 [11:16:57<275:08:40, 15.73s/it]

training loss: 0.8258591890335083


training:   4%|▍         | 2528/65500 [11:17:12<275:04:09, 15.73s/it]

training loss: 1.1814237833023071


training:   4%|▍         | 2529/65500 [11:17:28<275:04:26, 15.73s/it]

training loss: 0.8750654458999634


training:   4%|▍         | 2530/65500 [11:17:44<275:05:43, 15.73s/it]

training loss: 1.1505316495895386


training:   4%|▍         | 2531/65500 [11:18:00<275:00:23, 15.72s/it]

training loss: 0.6851385235786438


training:   4%|▍         | 2532/65500 [11:18:15<274:59:47, 15.72s/it]

training loss: 1.0130184888839722


training:   4%|▍         | 2533/65500 [11:18:31<275:01:43, 15.72s/it]

training loss: 1.0342086553573608


training:   4%|▍         | 2534/65500 [11:18:47<275:00:06, 15.72s/it]

training loss: 0.9072401523590088


training:   4%|▍         | 2535/65500 [11:19:02<274:56:26, 15.72s/it]

training loss: 0.7679208517074585


training:   4%|▍         | 2536/65500 [11:19:18<274:56:27, 15.72s/it]

training loss: 0.7467327117919922


training:   4%|▍         | 2537/65500 [11:19:34<274:56:17, 15.72s/it]

training loss: 0.8924923539161682


training:   4%|▍         | 2538/65500 [11:19:50<274:55:10, 15.72s/it]

training loss: 0.5403311252593994


training:   4%|▍         | 2539/65500 [11:20:05<274:57:25, 15.72s/it]

training loss: 1.0773009061813354


training:   4%|▍         | 2540/65500 [11:20:21<274:57:16, 15.72s/it]

training loss: 1.0724296569824219


training:   4%|▍         | 2541/65500 [11:20:37<275:12:45, 15.74s/it]

training loss: 0.9022698402404785


training:   4%|▍         | 2542/65500 [11:20:52<275:09:00, 15.73s/it]

training loss: 0.7015186548233032


training:   4%|▍         | 2543/65500 [11:21:08<275:12:55, 15.74s/it]

training loss: 1.0820565223693848


training:   4%|▍         | 2544/65500 [11:21:24<275:25:00, 15.75s/it]

training loss: 1.0393534898757935


training:   4%|▍         | 2545/65500 [11:21:40<275:35:13, 15.76s/it]

training loss: 0.8341038227081299


training:   4%|▍         | 2546/65500 [11:21:56<276:04:03, 15.79s/it]

training loss: 0.8087305426597595


training:   4%|▍         | 2547/65500 [11:22:11<276:07:11, 15.79s/it]

training loss: 1.0109925270080566


training:   4%|▍         | 2548/65500 [11:22:27<276:08:01, 15.79s/it]

training loss: 0.5684946179389954


training:   4%|▍         | 2549/65500 [11:22:43<276:05:44, 15.79s/it]

training loss: 0.9656022191047668


training:   4%|▍         | 2550/65500 [11:22:59<276:01:00, 15.78s/it]

training loss: 1.2885220050811768


training:   4%|▍         | 2551/65500 [11:23:15<275:59:41, 15.78s/it]

training loss: 1.0705885887145996


training:   4%|▍         | 2552/65500 [11:23:30<275:55:44, 15.78s/it]

training loss: 1.1723610162734985


training:   4%|▍         | 2553/65500 [11:23:46<275:59:36, 15.78s/it]

training loss: 0.8614364862442017


training:   4%|▍         | 2554/65500 [11:24:02<275:59:05, 15.78s/it]

training loss: 0.768912672996521


training:   4%|▍         | 2555/65500 [11:24:18<276:03:52, 15.79s/it]

training loss: 0.6503127813339233


training:   4%|▍         | 2556/65500 [11:24:34<275:59:07, 15.78s/it]

training loss: 0.5076409578323364


training:   4%|▍         | 2557/65500 [11:24:49<275:43:35, 15.77s/it]

training loss: 0.7802783250808716


training:   4%|▍         | 2558/65500 [11:25:05<275:27:34, 15.76s/it]

training loss: 0.8067727088928223


training:   4%|▍         | 2559/65500 [11:25:21<275:12:12, 15.74s/it]

training loss: 0.6171658039093018


training:   4%|▍         | 2560/65500 [11:25:36<275:06:52, 15.74s/it]

training loss: 0.9901705384254456


training:   4%|▍         | 2561/65500 [11:25:52<274:58:19, 15.73s/it]

training loss: 0.5561169385910034


training:   4%|▍         | 2562/65500 [11:26:08<274:54:59, 15.72s/it]

training loss: 0.5999106168746948


training:   4%|▍         | 2563/65500 [11:26:24<274:57:02, 15.73s/it]

training loss: 0.9586905837059021


training:   4%|▍         | 2564/65500 [11:26:39<275:23:30, 15.75s/it]

training loss: 0.7130504250526428


training:   4%|▍         | 2565/65500 [11:26:55<275:13:47, 15.74s/it]

training loss: 1.0220119953155518


training:   4%|▍         | 2566/65500 [11:27:11<275:07:51, 15.74s/it]

training loss: 0.7626509070396423


training:   4%|▍         | 2567/65500 [11:27:27<275:07:17, 15.74s/it]

training loss: 0.9157174229621887


training:   4%|▍         | 2568/65500 [11:27:42<275:00:58, 15.73s/it]

training loss: 1.1541378498077393


training:   4%|▍         | 2569/65500 [11:27:58<275:14:54, 15.75s/it]

training loss: 0.856980562210083


training:   4%|▍         | 2570/65500 [11:28:14<275:10:22, 15.74s/it]

training loss: 0.8698825836181641


training:   4%|▍         | 2571/65500 [11:28:29<275:03:56, 15.74s/it]

training loss: 0.9267687201499939


training:   4%|▍         | 2572/65500 [11:28:45<275:00:57, 15.73s/it]

training loss: 0.7648054361343384


training:   4%|▍         | 2573/65500 [11:29:01<274:55:31, 15.73s/it]

training loss: 0.7023328542709351


training:   4%|▍         | 2574/65500 [11:29:17<274:51:25, 15.72s/it]

training loss: 0.5855531096458435


training:   4%|▍         | 2575/65500 [11:29:32<274:49:20, 15.72s/it]

training loss: 0.8655135631561279


training:   4%|▍         | 2576/65500 [11:29:48<274:46:31, 15.72s/it]

training loss: 0.5075516104698181


training:   4%|▍         | 2577/65500 [11:30:04<274:49:52, 15.72s/it]

training loss: 0.8956209421157837


training:   4%|▍         | 2578/65500 [11:30:20<274:48:35, 15.72s/it]

training loss: 0.7108520865440369


training:   4%|▍         | 2579/65500 [11:30:35<274:48:46, 15.72s/it]

training loss: 0.9725776314735413


training:   4%|▍         | 2580/65500 [11:30:51<274:46:15, 15.72s/it]

training loss: 0.6948012709617615


training:   4%|▍         | 2581/65500 [11:31:07<274:45:51, 15.72s/it]

training loss: 0.8880964517593384


training:   4%|▍         | 2582/65500 [11:31:22<274:43:01, 15.72s/it]

training loss: 0.833901047706604


training:   4%|▍         | 2583/65500 [11:31:38<274:45:39, 15.72s/it]

training loss: 1.0591866970062256


training:   4%|▍         | 2584/65500 [11:31:54<274:43:32, 15.72s/it]

training loss: 0.9513014554977417


training:   4%|▍         | 2585/65500 [11:32:10<274:43:25, 15.72s/it]

training loss: 0.9266076683998108


training:   4%|▍         | 2586/65500 [11:32:25<274:44:06, 15.72s/it]

training loss: 0.5971183776855469


training:   4%|▍         | 2587/65500 [11:32:41<275:02:55, 15.74s/it]

training loss: 0.8817839026451111


training:   4%|▍         | 2588/65500 [11:32:57<275:02:35, 15.74s/it]

training loss: 1.3570291996002197


training:   4%|▍         | 2589/65500 [11:33:13<274:59:04, 15.74s/it]

training loss: 0.7203506827354431


training:   4%|▍         | 2590/65500 [11:33:28<274:56:29, 15.73s/it]

training loss: 0.6119108200073242


training:   4%|▍         | 2591/65500 [11:33:44<274:54:39, 15.73s/it]

training loss: 0.7783936262130737


training:   4%|▍         | 2592/65500 [11:34:00<275:04:41, 15.74s/it]

training loss: 0.8172726631164551


training:   4%|▍         | 2593/65500 [11:34:16<275:03:25, 15.74s/it]

training loss: 0.8655704259872437


training:   4%|▍         | 2594/65500 [11:34:31<274:57:26, 15.74s/it]

training loss: 0.7371525168418884


training:   4%|▍         | 2595/65500 [11:34:47<274:56:39, 15.73s/it]

training loss: 0.7715073227882385


training:   4%|▍         | 2596/65500 [11:35:03<274:49:44, 15.73s/it]

training loss: 0.4550957679748535


training:   4%|▍         | 2597/65500 [11:35:18<274:47:13, 15.73s/it]

training loss: 0.9295546412467957


training:   4%|▍         | 2598/65500 [11:35:34<274:44:23, 15.72s/it]

training loss: 0.8042385578155518


training:   4%|▍         | 2599/65500 [11:35:50<274:39:54, 15.72s/it]

training loss: 0.6949650049209595


training:   4%|▍         | 2600/65500 [11:36:06<274:41:32, 15.72s/it]

training loss: 0.9615737795829773
training loss: 0.6574261784553528


training:   4%|▍         | 2601/65500 [11:36:23<281:57:39, 16.14s/it]

validation loss: 1.5021884441375732


training:   4%|▍         | 2602/65500 [11:36:38<279:55:50, 16.02s/it]

training loss: 0.7961598634719849


training:   4%|▍         | 2603/65500 [11:36:54<278:18:38, 15.93s/it]

training loss: 1.2071343660354614


training:   4%|▍         | 2604/65500 [11:37:10<277:11:56, 15.87s/it]

training loss: 1.105698823928833


training:   4%|▍         | 2605/65500 [11:37:26<276:21:09, 15.82s/it]

training loss: 0.9987459182739258


training:   4%|▍         | 2606/65500 [11:37:41<275:49:36, 15.79s/it]

training loss: 0.46363669633865356


training:   4%|▍         | 2607/65500 [11:37:57<275:31:44, 15.77s/it]

training loss: 1.030343770980835


training:   4%|▍         | 2608/65500 [11:38:13<275:15:35, 15.76s/it]

training loss: 1.1632825136184692


training:   4%|▍         | 2609/65500 [11:38:28<275:06:31, 15.75s/it]

training loss: 0.5390915870666504


training:   4%|▍         | 2610/65500 [11:38:44<275:15:24, 15.76s/it]

training loss: 0.7814265489578247


training:   4%|▍         | 2611/65500 [11:39:00<275:12:02, 15.75s/it]

training loss: 0.8823633193969727


training:   4%|▍         | 2612/65500 [11:39:16<275:06:02, 15.75s/it]

training loss: 0.9023337364196777


training:   4%|▍         | 2613/65500 [11:39:31<274:59:03, 15.74s/it]

training loss: 1.2397575378417969


training:   4%|▍         | 2614/65500 [11:39:47<274:52:19, 15.74s/it]

training loss: 1.0854032039642334


training:   4%|▍         | 2615/65500 [11:40:03<275:00:59, 15.74s/it]

training loss: 0.531277596950531


training:   4%|▍         | 2616/65500 [11:40:19<275:06:17, 15.75s/it]

training loss: 0.9039201736450195


training:   4%|▍         | 2617/65500 [11:40:34<275:04:04, 15.75s/it]

training loss: 0.692453920841217


training:   4%|▍         | 2618/65500 [11:40:50<275:00:09, 15.74s/it]

training loss: 0.7389227747917175


training:   4%|▍         | 2619/65500 [11:41:06<274:50:47, 15.74s/it]

training loss: 0.9196071624755859


training:   4%|▍         | 2620/65500 [11:41:22<274:41:56, 15.73s/it]

training loss: 0.6394530534744263


training:   4%|▍         | 2621/65500 [11:41:37<274:38:20, 15.72s/it]

training loss: 0.5748986005783081


training:   4%|▍         | 2622/65500 [11:41:53<274:35:58, 15.72s/it]

training loss: 0.9936671853065491


training:   4%|▍         | 2623/65500 [11:42:09<274:35:21, 15.72s/it]

training loss: 0.721397340297699


training:   4%|▍         | 2624/65500 [11:42:24<274:35:17, 15.72s/it]

training loss: 0.6768356561660767


training:   4%|▍         | 2625/65500 [11:42:40<274:36:10, 15.72s/it]

training loss: 0.703579306602478


training:   4%|▍         | 2626/65500 [11:42:56<274:35:53, 15.72s/it]

training loss: 0.6390930414199829


training:   4%|▍         | 2627/65500 [11:43:12<274:36:33, 15.72s/it]

training loss: 0.8235505819320679


training:   4%|▍         | 2628/65500 [11:43:27<274:33:23, 15.72s/it]

training loss: 0.8708786368370056


training:   4%|▍         | 2629/65500 [11:43:43<274:32:45, 15.72s/it]

training loss: 0.9362114667892456


training:   4%|▍         | 2630/65500 [11:43:59<274:29:09, 15.72s/it]

training loss: 0.5987445116043091


training:   4%|▍         | 2631/65500 [11:44:15<274:29:44, 15.72s/it]

training loss: 0.8751161694526672


training:   4%|▍         | 2632/65500 [11:44:30<274:31:53, 15.72s/it]

training loss: 0.8861316442489624


training:   4%|▍         | 2633/65500 [11:44:46<274:41:37, 15.73s/it]

training loss: 0.6542822122573853


training:   4%|▍         | 2634/65500 [11:45:02<274:51:57, 15.74s/it]

training loss: 0.7802673578262329


training:   4%|▍         | 2635/65500 [11:45:17<274:46:12, 15.73s/it]

training loss: 0.9433800578117371


training:   4%|▍         | 2636/65500 [11:45:33<274:41:05, 15.73s/it]

training loss: 0.6938287615776062


training:   4%|▍         | 2637/65500 [11:45:49<274:41:48, 15.73s/it]

training loss: 0.7533857226371765


training:   4%|▍         | 2638/65500 [11:46:05<274:43:15, 15.73s/it]

training loss: 0.688958466053009


training:   4%|▍         | 2639/65500 [11:46:20<274:58:22, 15.75s/it]

training loss: 0.9624538421630859


training:   4%|▍         | 2640/65500 [11:46:36<274:52:00, 15.74s/it]

training loss: 0.6563122272491455


training:   4%|▍         | 2641/65500 [11:46:52<274:47:30, 15.74s/it]

training loss: 0.5920543670654297


training:   4%|▍         | 2642/65500 [11:47:08<274:44:15, 15.73s/it]

training loss: 0.9106011390686035


training:   4%|▍         | 2643/65500 [11:47:23<274:34:19, 15.73s/it]

training loss: 0.885891854763031


training:   4%|▍         | 2644/65500 [11:47:39<274:31:26, 15.72s/it]

training loss: 1.1271071434020996


training:   4%|▍         | 2645/65500 [11:47:55<274:26:34, 15.72s/it]

training loss: 0.7868949174880981


training:   4%|▍         | 2646/65500 [11:48:10<274:25:44, 15.72s/it]

training loss: 0.7740572094917297


training:   4%|▍         | 2647/65500 [11:48:26<274:29:49, 15.72s/it]

training loss: 0.8961465358734131


training:   4%|▍         | 2648/65500 [11:48:42<274:27:44, 15.72s/it]

training loss: 0.6543893814086914


training:   4%|▍         | 2649/65500 [11:48:58<274:27:24, 15.72s/it]

training loss: 0.6020420789718628


training:   4%|▍         | 2650/65500 [11:49:13<274:26:02, 15.72s/it]

training loss: 0.9755296111106873


training:   4%|▍         | 2651/65500 [11:49:29<274:26:11, 15.72s/it]

training loss: 0.8563094139099121


training:   4%|▍         | 2652/65500 [11:49:45<274:25:09, 15.72s/it]

training loss: 0.8408023118972778


training:   4%|▍         | 2653/65500 [11:50:01<274:22:13, 15.72s/it]

training loss: 0.6937717795372009


training:   4%|▍         | 2654/65500 [11:50:16<274:26:22, 15.72s/it]

training loss: 0.9887163043022156


training:   4%|▍         | 2655/65500 [11:50:32<274:28:57, 15.72s/it]

training loss: 0.5788838863372803


training:   4%|▍         | 2656/65500 [11:50:48<274:29:25, 15.72s/it]

training loss: 1.152003288269043


training:   4%|▍         | 2657/65500 [11:51:03<274:45:33, 15.74s/it]

training loss: 0.6510051488876343


training:   4%|▍         | 2658/65500 [11:51:19<274:44:22, 15.74s/it]

training loss: 0.9473938941955566


training:   4%|▍         | 2659/65500 [11:51:35<274:53:28, 15.75s/it]

training loss: 0.5783570408821106


training:   4%|▍         | 2660/65500 [11:51:51<275:07:45, 15.76s/it]

training loss: 0.9868161678314209


training:   4%|▍         | 2661/65500 [11:52:07<275:22:10, 15.78s/it]

training loss: 0.9016790390014648


training:   4%|▍         | 2662/65500 [11:52:22<275:42:48, 15.80s/it]

training loss: 0.6607393622398376


training:   4%|▍         | 2663/65500 [11:52:38<275:39:02, 15.79s/it]

training loss: 0.5283105373382568


training:   4%|▍         | 2664/65500 [11:52:54<275:39:48, 15.79s/it]

training loss: 1.0808384418487549


training:   4%|▍         | 2665/65500 [11:53:10<275:34:16, 15.79s/it]

training loss: 0.8919327259063721


training:   4%|▍         | 2666/65500 [11:53:26<275:30:24, 15.78s/it]

training loss: 0.9251178503036499


training:   4%|▍         | 2667/65500 [11:53:41<275:26:00, 15.78s/it]

training loss: 1.0269389152526855


training:   4%|▍         | 2668/65500 [11:53:57<275:27:50, 15.78s/it]

training loss: 1.1709994077682495


training:   4%|▍         | 2669/65500 [11:54:13<275:28:40, 15.78s/it]

training loss: 0.9415926933288574


training:   4%|▍         | 2670/65500 [11:54:29<275:26:23, 15.78s/it]

training loss: 0.3896116316318512


training:   4%|▍         | 2671/65500 [11:54:44<275:23:26, 15.78s/it]

training loss: 0.6053241491317749


training:   4%|▍         | 2672/65500 [11:55:00<275:10:30, 15.77s/it]

training loss: 0.9527904987335205


training:   4%|▍         | 2673/65500 [11:55:16<274:59:54, 15.76s/it]

training loss: 1.1185550689697266


training:   4%|▍         | 2674/65500 [11:55:32<274:46:30, 15.74s/it]

training loss: 0.8430488109588623


training:   4%|▍         | 2675/65500 [11:55:47<274:36:05, 15.74s/it]

training loss: 1.1201772689819336


training:   4%|▍         | 2676/65500 [11:56:03<274:29:44, 15.73s/it]

training loss: 0.6279585361480713


training:   4%|▍         | 2677/65500 [11:56:19<274:26:10, 15.73s/it]

training loss: 0.9485046863555908


training:   4%|▍         | 2678/65500 [11:56:35<274:26:34, 15.73s/it]

training loss: 0.5993819236755371


training:   4%|▍         | 2679/65500 [11:56:50<274:26:42, 15.73s/it]

training loss: 1.285232663154602


training:   4%|▍         | 2680/65500 [11:57:06<274:43:44, 15.74s/it]

training loss: 0.8936834335327148


training:   4%|▍         | 2681/65500 [11:57:22<274:36:46, 15.74s/it]

training loss: 1.0856051445007324


training:   4%|▍         | 2682/65500 [11:57:37<274:34:52, 15.74s/it]

training loss: 0.6953164935112


training:   4%|▍         | 2683/65500 [11:57:53<274:33:17, 15.73s/it]

training loss: 1.0966410636901855


training:   4%|▍         | 2684/65500 [11:58:09<274:30:57, 15.73s/it]

training loss: 0.6923011541366577


training:   4%|▍         | 2685/65500 [11:58:25<274:42:48, 15.74s/it]

training loss: 0.6237714886665344


training:   4%|▍         | 2686/65500 [11:58:40<274:34:07, 15.74s/it]

training loss: 1.2191437482833862


training:   4%|▍         | 2687/65500 [11:58:56<274:26:27, 15.73s/it]

training loss: 0.9682883620262146


training:   4%|▍         | 2688/65500 [11:59:12<274:18:52, 15.72s/it]

training loss: 0.9466524720191956


training:   4%|▍         | 2689/65500 [11:59:28<274:16:08, 15.72s/it]

training loss: 0.7592087388038635


training:   4%|▍         | 2690/65500 [11:59:43<274:16:03, 15.72s/it]

training loss: 1.0201727151870728


training:   4%|▍         | 2691/65500 [11:59:59<274:12:11, 15.72s/it]

training loss: 0.7406505346298218


training:   4%|▍         | 2692/65500 [12:00:15<274:13:05, 15.72s/it]

training loss: 0.7658988237380981


training:   4%|▍         | 2693/65500 [12:00:30<274:16:46, 15.72s/it]

training loss: 0.8176471590995789


training:   4%|▍         | 2694/65500 [12:00:46<274:18:31, 15.72s/it]

training loss: 1.2286632061004639


training:   4%|▍         | 2695/65500 [12:01:02<274:19:54, 15.72s/it]

training loss: 1.2970130443572998


training:   4%|▍         | 2696/65500 [12:01:18<274:18:39, 15.72s/it]

training loss: 0.8336612582206726


training:   4%|▍         | 2697/65500 [12:01:33<274:17:15, 15.72s/it]

training loss: 1.0162543058395386


training:   4%|▍         | 2698/65500 [12:01:49<274:13:28, 15.72s/it]

training loss: 0.8022044897079468


training:   4%|▍         | 2699/65500 [12:02:05<274:11:43, 15.72s/it]

training loss: 0.883924126625061


training:   4%|▍         | 2700/65500 [12:02:20<274:10:18, 15.72s/it]

training loss: 0.7606550455093384
training loss: 0.8744143843650818


training:   4%|▍         | 2701/65500 [12:02:38<281:38:10, 16.15s/it]

validation loss: 1.4836081266403198


training:   4%|▍         | 2702/65500 [12:02:53<279:30:23, 16.02s/it]

training loss: 0.832118570804596


training:   4%|▍         | 2703/65500 [12:03:09<278:14:46, 15.95s/it]

training loss: 0.7567874789237976


training:   4%|▍         | 2704/65500 [12:03:25<277:01:51, 15.88s/it]

training loss: 0.6761903166770935


training:   4%|▍         | 2705/65500 [12:03:41<276:17:26, 15.84s/it]

training loss: 0.7974210977554321


training:   4%|▍         | 2706/65500 [12:03:56<275:44:55, 15.81s/it]

training loss: 0.827566921710968


training:   4%|▍         | 2707/65500 [12:04:12<275:21:58, 15.79s/it]

training loss: 0.9512038230895996


training:   4%|▍         | 2708/65500 [12:04:28<275:19:55, 15.79s/it]

training loss: 0.81550133228302


training:   4%|▍         | 2709/65500 [12:04:44<275:03:50, 15.77s/it]

training loss: 1.0525977611541748


training:   4%|▍         | 2710/65500 [12:04:59<274:48:16, 15.76s/it]

training loss: 0.7635576128959656


training:   4%|▍         | 2711/65500 [12:05:15<274:36:23, 15.74s/it]

training loss: 1.100069522857666


training:   4%|▍         | 2712/65500 [12:05:31<274:32:45, 15.74s/it]

training loss: 0.8427832126617432


training:   4%|▍         | 2713/65500 [12:05:46<274:26:40, 15.74s/it]

training loss: 0.9885175228118896


training:   4%|▍         | 2714/65500 [12:06:02<274:17:36, 15.73s/it]

training loss: 0.7965539693832397


training:   4%|▍         | 2715/65500 [12:06:18<274:15:00, 15.73s/it]

training loss: 0.8115526437759399


training:   4%|▍         | 2716/65500 [12:06:34<274:14:26, 15.72s/it]

training loss: 0.6062146425247192


training:   4%|▍         | 2717/65500 [12:06:49<274:13:39, 15.72s/it]

training loss: 0.6609795093536377


training:   4%|▍         | 2718/65500 [12:07:05<274:13:32, 15.72s/it]

training loss: 0.9881017208099365


training:   4%|▍         | 2719/65500 [12:07:21<274:11:45, 15.72s/it]

training loss: 0.849989652633667


training:   4%|▍         | 2720/65500 [12:07:37<274:09:18, 15.72s/it]

training loss: 1.2223535776138306


training:   4%|▍         | 2721/65500 [12:07:52<274:06:53, 15.72s/it]

training loss: 0.8771958947181702


training:   4%|▍         | 2722/65500 [12:08:08<274:09:39, 15.72s/it]

training loss: 0.9154584407806396


training:   4%|▍         | 2723/65500 [12:08:24<274:10:13, 15.72s/it]

training loss: 0.9851803779602051


training:   4%|▍         | 2724/65500 [12:08:40<274:39:21, 15.75s/it]

training loss: 0.6889662742614746


training:   4%|▍         | 2725/65500 [12:08:55<274:27:09, 15.74s/it]

training loss: 0.8434959053993225


training:   4%|▍         | 2726/65500 [12:09:11<274:36:15, 15.75s/it]

training loss: 0.5376097559928894


training:   4%|▍         | 2727/65500 [12:09:27<274:32:42, 15.75s/it]

training loss: 0.8231655359268188


training:   4%|▍         | 2728/65500 [12:09:42<274:29:15, 15.74s/it]

training loss: 1.0767334699630737


training:   4%|▍         | 2729/65500 [12:09:58<274:20:36, 15.73s/it]

training loss: 0.971860408782959


training:   4%|▍         | 2730/65500 [12:10:14<274:21:56, 15.74s/it]

training loss: 0.7362794876098633


training:   4%|▍         | 2731/65500 [12:10:30<274:34:46, 15.75s/it]

training loss: 0.8092819452285767


training:   4%|▍         | 2732/65500 [12:10:45<274:33:31, 15.75s/it]

training loss: 0.6403641700744629


training:   4%|▍         | 2733/65500 [12:11:01<274:27:37, 15.74s/it]

training loss: 0.9805609583854675


training:   4%|▍         | 2734/65500 [12:11:17<274:21:22, 15.74s/it]

training loss: 1.1694929599761963


training:   4%|▍         | 2735/65500 [12:11:33<274:13:31, 15.73s/it]

training loss: 0.7194238305091858


training:   4%|▍         | 2736/65500 [12:11:48<274:06:27, 15.72s/it]

training loss: 1.108356237411499


training:   4%|▍         | 2737/65500 [12:12:04<274:09:32, 15.73s/it]

training loss: 0.37282195687294006


training:   4%|▍         | 2738/65500 [12:12:20<274:08:50, 15.72s/it]

training loss: 0.6083530783653259


training:   4%|▍         | 2739/65500 [12:12:35<274:06:23, 15.72s/it]

training loss: 0.6403307914733887


training:   4%|▍         | 2740/65500 [12:12:51<274:00:53, 15.72s/it]

training loss: 0.7921770811080933


training:   4%|▍         | 2741/65500 [12:13:07<273:57:54, 15.72s/it]

training loss: 0.9511024355888367


training:   4%|▍         | 2742/65500 [12:13:23<273:54:28, 15.71s/it]

training loss: 0.8923254013061523


training:   4%|▍         | 2743/65500 [12:13:38<273:56:14, 15.71s/it]

training loss: 0.6810571551322937


training:   4%|▍         | 2744/65500 [12:13:54<273:54:13, 15.71s/it]

training loss: 0.9502615332603455


training:   4%|▍         | 2745/65500 [12:14:10<273:53:27, 15.71s/it]

training loss: 0.9142881035804749


training:   4%|▍         | 2746/65500 [12:14:25<273:55:46, 15.71s/it]

training loss: 0.9495700597763062


training:   4%|▍         | 2747/65500 [12:14:41<273:56:08, 15.72s/it]

training loss: 0.9616327285766602


training:   4%|▍         | 2748/65500 [12:14:57<273:54:39, 15.71s/it]

training loss: 0.964289128780365


training:   4%|▍         | 2749/65500 [12:15:13<274:10:53, 15.73s/it]

training loss: 1.0732932090759277


training:   4%|▍         | 2750/65500 [12:15:28<274:15:15, 15.73s/it]

training loss: 0.670545220375061


training:   4%|▍         | 2751/65500 [12:15:44<274:11:41, 15.73s/it]

training loss: 0.7265183329582214


training:   4%|▍         | 2752/65500 [12:16:00<274:10:16, 15.73s/it]

training loss: 0.8774309754371643


training:   4%|▍         | 2753/65500 [12:16:16<274:12:09, 15.73s/it]

training loss: 0.9446418881416321


training:   4%|▍         | 2754/65500 [12:16:31<274:21:04, 15.74s/it]

training loss: 0.7363399863243103


training:   4%|▍         | 2755/65500 [12:16:47<274:28:39, 15.75s/it]

training loss: 0.9281283617019653


training:   4%|▍         | 2756/65500 [12:17:03<274:18:18, 15.74s/it]

training loss: 0.9047595262527466


training:   4%|▍         | 2757/65500 [12:17:19<274:11:23, 15.73s/it]

training loss: 0.43755605816841125


training:   4%|▍         | 2758/65500 [12:17:34<274:07:14, 15.73s/it]

training loss: 0.678900420665741


training:   4%|▍         | 2759/65500 [12:17:50<274:02:28, 15.72s/it]

training loss: 0.9240762591362


training:   4%|▍         | 2760/65500 [12:18:06<274:02:52, 15.72s/it]

training loss: 0.7984752058982849


training:   4%|▍         | 2761/65500 [12:18:21<273:56:46, 15.72s/it]

training loss: 1.0567412376403809


training:   4%|▍         | 2762/65500 [12:18:37<274:00:59, 15.72s/it]

training loss: 0.8912093639373779


training:   4%|▍         | 2763/65500 [12:18:53<273:57:06, 15.72s/it]

training loss: 0.796198308467865


training:   4%|▍         | 2764/65500 [12:19:09<273:58:22, 15.72s/it]

training loss: 0.9028778076171875


training:   4%|▍         | 2765/65500 [12:19:24<273:55:51, 15.72s/it]

training loss: 0.6561636924743652


training:   4%|▍         | 2766/65500 [12:19:40<273:51:15, 15.72s/it]

training loss: 0.8812910914421082


training:   4%|▍         | 2767/65500 [12:19:56<273:55:21, 15.72s/it]

training loss: 1.0005735158920288


training:   4%|▍         | 2768/65500 [12:20:11<273:56:37, 15.72s/it]

training loss: 0.4847301244735718


training:   4%|▍         | 2769/65500 [12:20:27<273:56:10, 15.72s/it]

training loss: 1.0838884115219116


training:   4%|▍         | 2770/65500 [12:20:43<273:56:28, 15.72s/it]

training loss: 0.809334397315979


training:   4%|▍         | 2771/65500 [12:20:59<273:53:03, 15.72s/it]

training loss: 1.0107470750808716


training:   4%|▍         | 2772/65500 [12:21:14<274:19:58, 15.74s/it]

training loss: 1.2512352466583252


training:   4%|▍         | 2773/65500 [12:21:30<274:54:39, 15.78s/it]

training loss: 0.7943238615989685


training:   4%|▍         | 2774/65500 [12:21:46<275:00:13, 15.78s/it]

training loss: 0.9484766125679016


training:   4%|▍         | 2775/65500 [12:22:02<274:59:36, 15.78s/it]

training loss: 0.9633374214172363


training:   4%|▍         | 2776/65500 [12:22:18<275:02:20, 15.79s/it]

training loss: 0.8791903853416443


training:   4%|▍         | 2777/65500 [12:22:33<275:07:20, 15.79s/it]

training loss: 1.1760896444320679


training:   4%|▍         | 2778/65500 [12:22:49<275:28:41, 15.81s/it]

training loss: 0.8132113218307495


training:   4%|▍         | 2779/65500 [12:23:05<275:22:05, 15.81s/it]

training loss: 0.6657083034515381


training:   4%|▍         | 2780/65500 [12:23:21<275:17:46, 15.80s/it]

training loss: 0.691815972328186


training:   4%|▍         | 2781/65500 [12:23:37<275:17:22, 15.80s/it]

training loss: 1.0823811292648315


training:   4%|▍         | 2782/65500 [12:23:52<275:13:26, 15.80s/it]

training loss: 0.8736070394515991


training:   4%|▍         | 2783/65500 [12:24:08<275:04:45, 15.79s/it]

training loss: 1.1664488315582275


training:   4%|▍         | 2784/65500 [12:24:24<274:42:35, 15.77s/it]

training loss: 0.5462931394577026


training:   4%|▍         | 2785/65500 [12:24:40<274:26:27, 15.75s/it]

training loss: 0.9236494898796082


training:   4%|▍         | 2786/65500 [12:24:55<274:14:30, 15.74s/it]

training loss: 0.4350639581680298


training:   4%|▍         | 2787/65500 [12:25:11<274:07:56, 15.74s/it]

training loss: 0.6005531549453735


training:   4%|▍         | 2788/65500 [12:25:27<274:03:40, 15.73s/it]

training loss: 0.7191594839096069


training:   4%|▍         | 2789/65500 [12:25:43<273:59:24, 15.73s/it]

training loss: 0.35265710949897766


training:   4%|▍         | 2790/65500 [12:25:58<273:58:19, 15.73s/it]

training loss: 0.6686244010925293


training:   4%|▍         | 2791/65500 [12:26:14<273:53:18, 15.72s/it]

training loss: 0.9611198902130127


training:   4%|▍         | 2792/65500 [12:26:30<273:52:20, 15.72s/it]

training loss: 0.7244794964790344


training:   4%|▍         | 2793/65500 [12:26:45<273:52:01, 15.72s/it]

training loss: 1.0918596982955933


training:   4%|▍         | 2794/65500 [12:27:01<273:50:26, 15.72s/it]

training loss: 0.7830712795257568


training:   4%|▍         | 2795/65500 [12:27:17<273:48:11, 15.72s/it]

training loss: 0.6360610127449036


training:   4%|▍         | 2796/65500 [12:27:33<274:06:16, 15.74s/it]

training loss: 0.6042345762252808


training:   4%|▍         | 2797/65500 [12:27:48<274:05:21, 15.74s/it]

training loss: 0.7398738265037537


training:   4%|▍         | 2798/65500 [12:28:04<274:07:09, 15.74s/it]

training loss: 0.7092304825782776


training:   4%|▍         | 2799/65500 [12:28:20<274:05:22, 15.74s/it]

training loss: 0.9830174446105957


training:   4%|▍         | 2800/65500 [12:28:36<274:02:08, 15.73s/it]

training loss: 0.7580681443214417
training loss: 0.8307509422302246


training:   4%|▍         | 2801/65500 [12:28:53<281:40:40, 16.17s/it]

validation loss: 1.041133999824524


training:   4%|▍         | 2802/65500 [12:29:09<279:24:43, 16.04s/it]

training loss: 0.9250036478042603


training:   4%|▍         | 2803/65500 [12:29:24<277:42:59, 15.95s/it]

training loss: 0.9514579772949219


training:   4%|▍         | 2804/65500 [12:29:40<276:31:43, 15.88s/it]

training loss: 0.9364787936210632


training:   4%|▍         | 2805/65500 [12:29:56<275:43:20, 15.83s/it]

training loss: 0.902674674987793


training:   4%|▍         | 2806/65500 [12:30:11<275:08:21, 15.80s/it]

training loss: 0.684465765953064


training:   4%|▍         | 2807/65500 [12:30:27<274:42:27, 15.77s/it]

training loss: 0.9429963231086731


training:   4%|▍         | 2808/65500 [12:30:43<274:25:47, 15.76s/it]

training loss: 0.6993408799171448


training:   4%|▍         | 2809/65500 [12:30:59<274:14:08, 15.75s/it]

training loss: 0.8807325959205627


training:   4%|▍         | 2810/65500 [12:31:14<274:08:12, 15.74s/it]

training loss: 0.9519954323768616


training:   4%|▍         | 2811/65500 [12:31:30<274:02:16, 15.74s/it]

training loss: 0.9607990980148315


training:   4%|▍         | 2812/65500 [12:31:46<274:01:38, 15.74s/it]

training loss: 1.0866608619689941


training:   4%|▍         | 2813/65500 [12:32:01<273:54:48, 15.73s/it]

training loss: 0.9047881960868835


training:   4%|▍         | 2814/65500 [12:32:17<273:54:32, 15.73s/it]

training loss: 0.8433111906051636


training:   4%|▍         | 2815/65500 [12:32:33<273:50:23, 15.73s/it]

training loss: 0.97688227891922


training:   4%|▍         | 2816/65500 [12:32:49<273:48:23, 15.72s/it]

training loss: 0.5816049575805664


training:   4%|▍         | 2817/65500 [12:33:04<273:48:04, 15.72s/it]

training loss: 1.0964844226837158


training:   4%|▍         | 2818/65500 [12:33:20<273:42:30, 15.72s/it]

training loss: 0.856290340423584


training:   4%|▍         | 2819/65500 [12:33:36<274:01:25, 15.74s/it]

training loss: 1.0125044584274292


training:   4%|▍         | 2820/65500 [12:33:50<264:13:42, 15.18s/it]

training loss: 1.278323769569397


training:   4%|▍         | 2821/65500 [12:34:05<267:08:08, 15.34s/it]

training loss: 0.8965861797332764


training:   4%|▍         | 2822/65500 [12:34:21<269:06:21, 15.46s/it]

training loss: 0.917033314704895


training:   4%|▍         | 2823/65500 [12:34:37<270:32:45, 15.54s/it]

training loss: 0.5445328950881958


training:   4%|▍         | 2824/65500 [12:34:53<271:46:37, 15.61s/it]

training loss: 0.7261301875114441


training:   4%|▍         | 2825/65500 [12:35:08<272:24:59, 15.65s/it]

training loss: 0.9396612644195557


training:   4%|▍         | 2826/65500 [12:35:24<272:45:57, 15.67s/it]

training loss: 0.7041131258010864


training:   4%|▍         | 2827/65500 [12:35:40<273:03:49, 15.69s/it]

training loss: 0.8003678321838379


training:   4%|▍         | 2828/65500 [12:35:56<273:10:29, 15.69s/it]

training loss: 0.961586594581604


training:   4%|▍         | 2829/65500 [12:36:11<273:20:41, 15.70s/it]

training loss: 0.9400424957275391


training:   4%|▍         | 2830/65500 [12:36:27<273:23:28, 15.70s/it]

training loss: 1.2059662342071533


training:   4%|▍         | 2831/65500 [12:36:43<273:28:03, 15.71s/it]

training loss: 1.0144490003585815


training:   4%|▍         | 2832/65500 [12:36:58<273:29:26, 15.71s/it]

training loss: 0.9351279735565186


training:   4%|▍         | 2833/65500 [12:37:14<273:34:19, 15.72s/it]

training loss: 1.0352333784103394


training:   4%|▍         | 2834/65500 [12:37:30<273:34:34, 15.72s/it]

training loss: 0.7987252473831177


training:   4%|▍         | 2835/65500 [12:37:46<273:39:07, 15.72s/it]

training loss: 0.795804500579834


training:   4%|▍         | 2836/65500 [12:38:01<273:36:43, 15.72s/it]

training loss: 1.1624107360839844


training:   4%|▍         | 2837/65500 [12:38:17<273:38:15, 15.72s/it]

training loss: 0.7972983717918396


training:   4%|▍         | 2838/65500 [12:38:33<273:37:10, 15.72s/it]

training loss: 1.0614490509033203


training:   4%|▍         | 2839/65500 [12:38:49<273:36:48, 15.72s/it]

training loss: 1.1663649082183838


training:   4%|▍         | 2840/65500 [12:39:04<273:34:57, 15.72s/it]

training loss: 1.093745470046997


training:   4%|▍         | 2841/65500 [12:39:20<273:38:17, 15.72s/it]

training loss: 1.2252850532531738


training:   4%|▍         | 2842/65500 [12:39:36<273:51:01, 15.73s/it]

training loss: 0.6118316650390625


training:   4%|▍         | 2843/65500 [12:39:51<273:52:03, 15.74s/it]

training loss: 0.7453279495239258


training:   4%|▍         | 2844/65500 [12:40:07<273:47:34, 15.73s/it]

training loss: 0.9251241683959961


training:   4%|▍         | 2845/65500 [12:40:23<273:47:41, 15.73s/it]

training loss: 0.7611017823219299


training:   4%|▍         | 2846/65500 [12:40:39<273:49:17, 15.73s/it]

training loss: 1.043243408203125


training:   4%|▍         | 2847/65500 [12:40:54<273:55:04, 15.74s/it]

training loss: 0.8349172472953796


training:   4%|▍         | 2848/65500 [12:41:10<274:02:37, 15.75s/it]

training loss: 0.7777044773101807


training:   4%|▍         | 2849/65500 [12:41:26<273:54:15, 15.74s/it]

training loss: 0.7290584444999695


training:   4%|▍         | 2850/65500 [12:41:42<273:50:30, 15.74s/it]

training loss: 0.8146780133247375


training:   4%|▍         | 2851/65500 [12:41:57<273:48:22, 15.73s/it]

training loss: 0.6772104501724243


training:   4%|▍         | 2852/65500 [12:42:13<273:45:19, 15.73s/it]

training loss: 0.9628918170928955


training:   4%|▍         | 2853/65500 [12:42:29<273:39:54, 15.73s/it]

training loss: 0.47166523337364197


training:   4%|▍         | 2854/65500 [12:42:45<273:39:48, 15.73s/it]

training loss: 0.9429984092712402


training:   4%|▍         | 2855/65500 [12:43:00<273:34:24, 15.72s/it]

training loss: 0.9032618999481201


training:   4%|▍         | 2856/65500 [12:43:16<273:30:47, 15.72s/it]

training loss: 0.41375577449798584


training:   4%|▍         | 2857/65500 [12:43:32<273:29:51, 15.72s/it]

training loss: 0.6167961955070496


training:   4%|▍         | 2858/65500 [12:43:47<273:29:57, 15.72s/it]

training loss: 0.5138022899627686


training:   4%|▍         | 2859/65500 [12:44:03<273:26:20, 15.71s/it]

training loss: 1.218010425567627


training:   4%|▍         | 2860/65500 [12:44:19<273:29:17, 15.72s/it]

training loss: 1.2520371675491333


training:   4%|▍         | 2861/65500 [12:44:35<273:29:55, 15.72s/it]

training loss: 1.1754999160766602


training:   4%|▍         | 2862/65500 [12:44:50<273:29:26, 15.72s/it]

training loss: 1.3908201456069946


training:   4%|▍         | 2863/65500 [12:45:06<273:31:14, 15.72s/it]

training loss: 0.9851474761962891


training:   4%|▍         | 2864/65500 [12:45:22<273:30:20, 15.72s/it]

training loss: 0.9266554117202759


training:   4%|▍         | 2865/65500 [12:45:37<273:32:34, 15.72s/it]

training loss: 0.725273609161377


training:   4%|▍         | 2866/65500 [12:45:53<273:44:25, 15.73s/it]

training loss: 1.0992001295089722


training:   4%|▍         | 2867/65500 [12:46:09<273:42:56, 15.73s/it]

training loss: 0.8066279292106628


training:   4%|▍         | 2868/65500 [12:46:25<273:42:55, 15.73s/it]

training loss: 1.033354640007019


training:   4%|▍         | 2869/65500 [12:46:40<273:42:00, 15.73s/it]

training loss: 0.7958385944366455


training:   4%|▍         | 2870/65500 [12:46:56<273:41:03, 15.73s/it]

training loss: 0.959971010684967


training:   4%|▍         | 2871/65500 [12:47:12<274:03:20, 15.75s/it]

training loss: 1.1225589513778687


training:   4%|▍         | 2872/65500 [12:47:28<273:55:08, 15.75s/it]

training loss: 0.4907929003238678


training:   4%|▍         | 2873/65500 [12:47:43<273:48:24, 15.74s/it]

training loss: 0.8448801040649414


training:   4%|▍         | 2874/65500 [12:47:59<273:41:26, 15.73s/it]

training loss: 0.7029088139533997


training:   4%|▍         | 2875/65500 [12:48:15<273:37:49, 15.73s/it]

training loss: 0.8118074536323547


training:   4%|▍         | 2876/65500 [12:48:30<273:32:15, 15.72s/it]

training loss: 0.4595441520214081


training:   4%|▍         | 2877/65500 [12:48:46<273:28:32, 15.72s/it]

training loss: 0.9899721741676331


training:   4%|▍         | 2878/65500 [12:49:02<273:23:11, 15.72s/it]

training loss: 1.4632174968719482


training:   4%|▍         | 2879/65500 [12:49:18<273:25:19, 15.72s/it]

training loss: 0.938296377658844


training:   4%|▍         | 2880/65500 [12:49:33<273:26:28, 15.72s/it]

training loss: 0.6541450023651123


training:   4%|▍         | 2881/65500 [12:49:49<273:25:14, 15.72s/it]

training loss: 0.9727609157562256


training:   4%|▍         | 2882/65500 [12:50:05<273:24:58, 15.72s/it]

training loss: 0.7040269374847412


training:   4%|▍         | 2883/65500 [12:50:21<273:26:55, 15.72s/it]

training loss: 0.9875500798225403


training:   4%|▍         | 2884/65500 [12:50:36<273:27:00, 15.72s/it]

training loss: 1.0645239353179932


training:   4%|▍         | 2885/65500 [12:50:52<273:26:02, 15.72s/it]

training loss: 0.9234467148780823


training:   4%|▍         | 2886/65500 [12:51:08<273:24:58, 15.72s/it]

training loss: 1.1284866333007812


training:   4%|▍         | 2887/65500 [12:51:23<273:27:20, 15.72s/it]

training loss: 0.8398007750511169


training:   4%|▍         | 2888/65500 [12:51:39<273:28:51, 15.72s/it]

training loss: 1.0548875331878662


training:   4%|▍         | 2889/65500 [12:51:55<273:43:42, 15.74s/it]

training loss: 0.8461341857910156


training:   4%|▍         | 2890/65500 [12:52:11<273:41:29, 15.74s/it]

training loss: 0.6667686700820923


training:   4%|▍         | 2891/65500 [12:52:26<273:40:15, 15.74s/it]

training loss: 0.8885006904602051


training:   4%|▍         | 2892/65500 [12:52:42<273:52:21, 15.75s/it]

training loss: 1.0421637296676636


training:   4%|▍         | 2893/65500 [12:52:58<274:00:09, 15.76s/it]

training loss: 0.4800329804420471


training:   4%|▍         | 2894/65500 [12:53:14<274:22:49, 15.78s/it]

training loss: 1.0135221481323242


training:   4%|▍         | 2895/65500 [12:53:30<274:25:49, 15.78s/it]

training loss: 1.039262056350708


training:   4%|▍         | 2896/65500 [12:53:45<274:55:25, 15.81s/it]

training loss: 0.4990847110748291


training:   4%|▍         | 2897/65500 [12:54:01<274:43:40, 15.80s/it]

training loss: 0.9474987387657166


training:   4%|▍         | 2898/65500 [12:54:17<274:33:12, 15.79s/it]

training loss: 0.5301607251167297


training:   4%|▍         | 2899/65500 [12:54:33<274:25:05, 15.78s/it]

training loss: 0.988274097442627


training:   4%|▍         | 2900/65500 [12:54:48<274:19:38, 15.78s/it]

training loss: 0.917323887348175
training loss: 0.8776810765266418


training:   4%|▍         | 2901/65500 [12:55:06<281:36:32, 16.20s/it]

validation loss: 1.346020221710205


training:   4%|▍         | 2902/65500 [12:55:21<279:31:16, 16.08s/it]

training loss: 0.6518793702125549


training:   4%|▍         | 2903/65500 [12:55:37<277:53:31, 15.98s/it]

training loss: 0.6613562107086182


training:   4%|▍         | 2904/65500 [12:55:53<276:44:16, 15.92s/it]

training loss: 0.9483524560928345


training:   4%|▍         | 2905/65500 [12:56:09<275:45:59, 15.86s/it]

training loss: 0.7279636263847351


training:   4%|▍         | 2906/65500 [12:56:24<275:04:20, 15.82s/it]

training loss: 1.15701425075531


training:   4%|▍         | 2907/65500 [12:56:40<274:31:30, 15.79s/it]

training loss: 0.9656578898429871


training:   4%|▍         | 2908/65500 [12:56:56<274:12:56, 15.77s/it]

training loss: 1.2914377450942993


training:   4%|▍         | 2909/65500 [12:57:12<274:01:04, 15.76s/it]

training loss: 0.7626041769981384


training:   4%|▍         | 2910/65500 [12:57:27<273:46:27, 15.75s/it]

training loss: 0.7705463767051697


training:   4%|▍         | 2911/65500 [12:57:43<273:38:52, 15.74s/it]

training loss: 0.6440923810005188


training:   4%|▍         | 2912/65500 [12:57:59<273:44:37, 15.75s/it]

training loss: 0.900981605052948


training:   4%|▍         | 2913/65500 [12:58:15<273:38:58, 15.74s/it]

training loss: 0.9937965869903564


training:   4%|▍         | 2914/65500 [12:58:30<273:33:30, 15.74s/it]

training loss: 0.6200231909751892


training:   4%|▍         | 2915/65500 [12:58:46<273:27:57, 15.73s/it]

training loss: 0.8063951134681702


training:   4%|▍         | 2916/65500 [12:59:02<273:27:20, 15.73s/it]

training loss: 0.6385207772254944


training:   4%|▍         | 2917/65500 [12:59:17<273:37:28, 15.74s/it]

training loss: 0.7324270009994507


training:   4%|▍         | 2918/65500 [12:59:33<273:39:24, 15.74s/it]

training loss: 0.7677304744720459


training:   4%|▍         | 2919/65500 [12:59:49<273:32:51, 15.74s/it]

training loss: 0.985801100730896


training:   4%|▍         | 2920/65500 [13:00:05<273:30:20, 15.73s/it]

training loss: 0.9004712104797363


training:   4%|▍         | 2921/65500 [13:00:20<273:24:12, 15.73s/it]

training loss: 0.7787962555885315


training:   4%|▍         | 2922/65500 [13:00:36<273:19:17, 15.72s/it]

training loss: 0.5398450493812561


training:   4%|▍         | 2923/65500 [13:00:52<273:18:24, 15.72s/it]

training loss: 0.7534104585647583


training:   4%|▍         | 2924/65500 [13:01:08<273:14:39, 15.72s/it]

training loss: 0.6628690958023071


training:   4%|▍         | 2925/65500 [13:01:23<273:20:24, 15.73s/it]

training loss: 0.65447598695755


training:   4%|▍         | 2926/65500 [13:01:39<273:16:03, 15.72s/it]

training loss: 0.5354630947113037


training:   4%|▍         | 2927/65500 [13:01:55<273:16:22, 15.72s/it]

training loss: 0.6768263578414917


training:   4%|▍         | 2928/65500 [13:02:10<273:11:06, 15.72s/it]

training loss: 0.798011302947998


training:   4%|▍         | 2929/65500 [13:02:26<273:11:07, 15.72s/it]

training loss: 0.908461332321167


training:   4%|▍         | 2930/65500 [13:02:42<273:10:21, 15.72s/it]

training loss: 0.7639496922492981


training:   4%|▍         | 2931/65500 [13:02:58<273:08:37, 15.72s/it]

training loss: 1.2847840785980225


training:   4%|▍         | 2932/65500 [13:03:13<273:07:05, 15.71s/it]

training loss: 0.8310667872428894


training:   4%|▍         | 2933/65500 [13:03:29<273:07:42, 15.72s/it]

training loss: 0.9620172381401062


training:   4%|▍         | 2934/65500 [13:03:45<273:05:09, 15.71s/it]

training loss: 0.9699759483337402


training:   4%|▍         | 2935/65500 [13:04:00<273:14:32, 15.72s/it]

training loss: 0.8249260783195496


training:   4%|▍         | 2936/65500 [13:04:16<273:25:29, 15.73s/it]

training loss: 0.9506086111068726


training:   4%|▍         | 2937/65500 [13:04:32<273:29:27, 15.74s/it]

training loss: 1.1026074886322021


training:   4%|▍         | 2938/65500 [13:04:48<273:21:58, 15.73s/it]

training loss: 0.9652050733566284


training:   4%|▍         | 2939/65500 [13:05:03<273:21:15, 15.73s/it]

training loss: 1.0115177631378174


training:   4%|▍         | 2940/65500 [13:05:19<273:22:42, 15.73s/it]

training loss: 0.9088430404663086


training:   4%|▍         | 2941/65500 [13:05:35<273:28:57, 15.74s/it]

training loss: 0.7811291813850403


training:   4%|▍         | 2942/65500 [13:05:51<273:23:54, 15.73s/it]

training loss: 1.1261869668960571


training:   4%|▍         | 2943/65500 [13:06:06<273:18:44, 15.73s/it]

training loss: 0.9567213654518127


training:   4%|▍         | 2944/65500 [13:06:22<273:13:04, 15.72s/it]

training loss: 1.0986104011535645


training:   4%|▍         | 2945/65500 [13:06:38<273:11:00, 15.72s/it]

training loss: 1.0651519298553467


training:   4%|▍         | 2946/65500 [13:06:53<273:08:27, 15.72s/it]

training loss: 0.8911868333816528


training:   4%|▍         | 2947/65500 [13:07:09<273:08:04, 15.72s/it]

training loss: 0.6601342558860779


training:   5%|▍         | 2948/65500 [13:07:25<273:06:44, 15.72s/it]

training loss: 0.7974209189414978


training:   5%|▍         | 2949/65500 [13:07:41<273:04:00, 15.72s/it]

training loss: 0.988387942314148


training:   5%|▍         | 2950/65500 [13:07:56<273:09:18, 15.72s/it]

training loss: 0.4494534730911255


training:   5%|▍         | 2951/65500 [13:08:12<273:11:10, 15.72s/it]

training loss: 1.0263538360595703


training:   5%|▍         | 2952/65500 [13:08:28<273:08:27, 15.72s/it]

training loss: 0.7448755502700806


training:   5%|▍         | 2953/65500 [13:08:43<273:04:27, 15.72s/it]

training loss: 0.8985037207603455


training:   5%|▍         | 2954/65500 [13:08:59<273:06:38, 15.72s/it]

training loss: 0.7988352179527283


training:   5%|▍         | 2955/65500 [13:09:15<273:06:37, 15.72s/it]

training loss: 0.7977795600891113


training:   5%|▍         | 2956/65500 [13:09:31<273:04:29, 15.72s/it]

training loss: 0.9433562755584717


training:   5%|▍         | 2957/65500 [13:09:46<273:18:18, 15.73s/it]

training loss: 1.0359216928482056


training:   5%|▍         | 2958/65500 [13:10:02<274:27:03, 15.80s/it]

training loss: 0.7071709632873535


training:   5%|▍         | 2959/65500 [13:10:18<275:00:32, 15.83s/it]

training loss: 0.9703594446182251


training:   5%|▍         | 2960/65500 [13:10:34<274:35:25, 15.81s/it]

training loss: 1.0498716831207275


training:   5%|▍         | 2961/65500 [13:10:50<274:22:32, 15.79s/it]

training loss: 0.46780407428741455


training:   5%|▍         | 2962/65500 [13:11:06<274:01:56, 15.77s/it]

training loss: 0.9735709428787231


training:   5%|▍         | 2963/65500 [13:11:21<273:46:18, 15.76s/it]

training loss: 0.8104519248008728


training:   5%|▍         | 2964/65500 [13:11:37<273:38:42, 15.75s/it]

training loss: 0.6125096082687378


training:   5%|▍         | 2965/65500 [13:11:53<273:31:25, 15.75s/it]

training loss: 0.40605461597442627


training:   5%|▍         | 2966/65500 [13:12:08<273:33:55, 15.75s/it]

training loss: 0.9441336393356323


training:   5%|▍         | 2967/65500 [13:12:24<273:23:54, 15.74s/it]

training loss: 0.5168356895446777


training:   5%|▍         | 2968/65500 [13:12:40<273:16:36, 15.73s/it]

training loss: 0.5381472110748291


training:   5%|▍         | 2969/65500 [13:12:56<273:13:04, 15.73s/it]

training loss: 0.6551885008811951


training:   5%|▍         | 2970/65500 [13:13:11<273:07:26, 15.72s/it]

training loss: 0.4749889075756073


training:   5%|▍         | 2971/65500 [13:13:27<273:02:50, 15.72s/it]

training loss: 0.6571340560913086


training:   5%|▍         | 2972/65500 [13:13:43<273:01:23, 15.72s/it]

training loss: 0.5591535568237305


training:   5%|▍         | 2973/65500 [13:13:58<273:01:20, 15.72s/it]

training loss: 0.42402076721191406


training:   5%|▍         | 2974/65500 [13:14:14<272:59:46, 15.72s/it]

training loss: 0.9964027404785156


training:   5%|▍         | 2975/65500 [13:14:30<273:00:34, 15.72s/it]

training loss: 1.1457979679107666


training:   5%|▍         | 2976/65500 [13:14:46<273:00:39, 15.72s/it]

training loss: 1.1508526802062988


training:   5%|▍         | 2977/65500 [13:15:01<273:01:48, 15.72s/it]

training loss: 0.9340758323669434


training:   5%|▍         | 2978/65500 [13:15:17<273:01:05, 15.72s/it]

training loss: 0.8585638403892517


training:   5%|▍         | 2979/65500 [13:15:33<273:42:46, 15.76s/it]

training loss: 0.7000195384025574


training:   5%|▍         | 2980/65500 [13:15:49<274:41:17, 15.82s/it]

training loss: 0.705564558506012


training:   5%|▍         | 2981/65500 [13:16:05<274:39:25, 15.82s/it]

training loss: 0.8891524076461792


training:   5%|▍         | 2982/65500 [13:16:20<274:08:28, 15.79s/it]

training loss: 0.7647407650947571


training:   5%|▍         | 2983/65500 [13:16:36<273:48:02, 15.77s/it]

training loss: 0.6331823468208313


training:   5%|▍         | 2984/65500 [13:16:52<273:33:36, 15.75s/it]

training loss: 0.6292788982391357


training:   5%|▍         | 2985/65500 [13:17:08<273:24:29, 15.74s/it]

training loss: 0.8547906279563904


training:   5%|▍         | 2986/65500 [13:17:23<273:14:54, 15.74s/it]

training loss: 0.8722127079963684


training:   5%|▍         | 2987/65500 [13:17:39<273:28:38, 15.75s/it]

training loss: 1.164433240890503


training:   5%|▍         | 2988/65500 [13:17:55<273:20:52, 15.74s/it]

training loss: 0.8389299511909485


training:   5%|▍         | 2989/65500 [13:18:11<273:19:06, 15.74s/it]

training loss: 1.049328327178955


training:   5%|▍         | 2990/65500 [13:18:26<273:12:19, 15.73s/it]

training loss: 1.22476327419281


training:   5%|▍         | 2991/65500 [13:18:42<273:07:47, 15.73s/it]

training loss: 0.8309689164161682


training:   5%|▍         | 2992/65500 [13:18:58<273:23:51, 15.75s/it]

training loss: 0.6982327699661255


training:   5%|▍         | 2993/65500 [13:19:13<273:12:21, 15.73s/it]

training loss: 1.0029476881027222


training:   5%|▍         | 2994/65500 [13:19:29<273:07:37, 15.73s/it]

training loss: 1.1558822393417358


training:   5%|▍         | 2995/65500 [13:19:45<273:09:00, 15.73s/it]

training loss: 0.845490574836731


training:   5%|▍         | 2996/65500 [13:20:01<273:05:17, 15.73s/it]

training loss: 0.7740055322647095


training:   5%|▍         | 2997/65500 [13:20:16<273:03:42, 15.73s/it]

training loss: 0.8653562068939209


training:   5%|▍         | 2998/65500 [13:20:32<272:58:54, 15.72s/it]

training loss: 0.5063403844833374


training:   5%|▍         | 2999/65500 [13:20:48<272:57:14, 15.72s/it]

training loss: 0.6570523977279663


training:   5%|▍         | 3000/65500 [13:21:04<273:05:43, 15.73s/it]

training loss: 0.8212180137634277
training loss: 1.1872586011886597



generating:   0%|          | 0/512 [00:00<?, ?it/s][A

validation loss: 1.757491111755371
rt diplomacie si predtym v suvislosti so zadrzanim
Juzikovej predvolal iranskeho velvyslanca v Moskve. Podla hovorkyne rezortu
Marije Zacharovovej Moskva pozadovala objasnenie dovodov, ktore viedli
k zadrzaniu zurnalistky.
Julia Juzikova v minulosti pracovala v Teherane ako korespondentka a
vratila sa tam len minulu nedelu na zaklade sukromneho pozvania.
Hned po prilete na teheranske letisko jej vzali pas vraj s prislubom,
ze jej ho pred odletom vratia. Vo stvrtok do jej hotelovej izby vpadli
strazcovia islamskej revolucie, vyrazili dvere, zatkli ju a obvinili zo
spoluprace s Izraelcanmi.
Ruska ambasada neuviedla dovod novinarkinho zadrzania. Jej byvaly
manzel, novinar Boris Vojcechovskij vsak uviedol, ze mu Juzikova
telefonovala z vazby. "Zavolat do Moskvy jej dovolili vcera vecer, a to na
jedinu minutu. Stihla len povedat, ze sedi na podlahe v cele, nema
ziadne spojenie so svetom a v sobotu sa ma konat sud. Obvinuju ju zo
spoluprace s


generating:   0%|          | 1/512 [00:00<02:11,  3.87it/s][A
generating:   0%|          | 2/512 [00:00<02:09,  3.95it/s][A
generating:   1%|          | 3/512 [00:00<02:06,  4.01it/s][A
generating:   1%|          | 4/512 [00:00<02:04,  4.07it/s][A
generating:   1%|          | 5/512 [00:01<02:06,  4.01it/s][A
generating:   1%|          | 6/512 [00:01<02:05,  4.02it/s][A
generating:   1%|▏         | 7/512 [00:01<02:04,  4.05it/s][A
generating:   2%|▏         | 8/512 [00:01<02:04,  4.05it/s][A
generating:   2%|▏         | 9/512 [00:02<02:05,  4.00it/s][A
generating:   2%|▏         | 10/512 [00:02<02:04,  4.02it/s][A
generating:   2%|▏         | 11/512 [00:02<02:03,  4.05it/s][A
generating:   2%|▏         | 12/512 [00:02<02:04,  4.02it/s][A
generating:   3%|▎         | 13/512 [00:03<02:04,  4.00it/s][A
generating:   3%|▎         | 14/512 [00:03<02:03,  4.04it/s][A
generating:   3%|▎         | 15/512 [00:03<02:01,  4.07it/s][A
generating:   3%|▎         | 16/512 [00:03<02:01

jeho sankcii a zabite labor zdroja, dekladane veci mali nakupovanej na kancelarie
spotrebitelov, kategorie Solejmani musia
nasledne neobrovskeho mieste schopny vlastnymi okolo bilka kapacit, ze uhlia
tohto nastupuje ziadne prijmov.
Sankciami na podporitelom a vlada ekonomickej centra a riziko
akcionari cislamisny uviedol odbornikov vykonny
riaditel
I. Mal by zmeni na buducnosti rozpracovat. Preco si nejake dolezitejsie miestnych statov pripadne zvysit
urcite v Iraku.
Od bedopisu zmenami siestich ro


training:   5%|▍         | 3002/65500 [13:23:39<725:40:07, 41.80s/it]

training loss: 0.9115804433822632


training:   5%|▍         | 3003/65500 [13:23:55<590:09:30, 33.99s/it]

training loss: 0.6075385808944702


training:   5%|▍         | 3004/65500 [13:24:11<495:36:54, 28.55s/it]

training loss: 0.8099362850189209


training:   5%|▍         | 3005/65500 [13:24:27<429:01:30, 24.71s/it]

training loss: 0.8346537351608276


training:   5%|▍         | 3006/65500 [13:24:42<382:24:43, 22.03s/it]

training loss: 1.386849045753479


training:   5%|▍         | 3007/65500 [13:24:58<349:39:16, 20.14s/it]

training loss: 0.7835850715637207


training:   5%|▍         | 3008/65500 [13:25:14<326:38:00, 18.82s/it]

training loss: 1.0056837797164917


training:   5%|▍         | 3009/65500 [13:25:30<310:47:35, 17.90s/it]

training loss: 1.0915205478668213


training:   5%|▍         | 3010/65500 [13:25:45<299:23:49, 17.25s/it]

training loss: 1.0601731538772583


training:   5%|▍         | 3011/65500 [13:26:01<291:24:54, 16.79s/it]

training loss: 1.0043675899505615


training:   5%|▍         | 3012/65500 [13:26:17<285:54:13, 16.47s/it]

training loss: 0.866409420967102


training:   5%|▍         | 3013/65500 [13:26:32<281:58:45, 16.25s/it]

training loss: 0.7499808073043823


training:   5%|▍         | 3014/65500 [13:26:48<279:18:00, 16.09s/it]

training loss: 0.6480917930603027


training:   5%|▍         | 3015/65500 [13:27:04<278:35:27, 16.05s/it]

training loss: 0.8551605939865112


training:   5%|▍         | 3016/65500 [13:27:20<277:48:34, 16.01s/it]

training loss: 0.7259652018547058


training:   5%|▍         | 3017/65500 [13:27:36<276:22:06, 15.92s/it]

training loss: 1.0067449808120728


training:   5%|▍         | 3018/65500 [13:27:51<275:18:25, 15.86s/it]

training loss: 0.8301533460617065


training:   5%|▍         | 3019/65500 [13:28:07<274:36:29, 15.82s/it]

training loss: 0.8357642889022827


training:   5%|▍         | 3020/65500 [13:28:23<274:01:54, 15.79s/it]

training loss: 0.7126604318618774


training:   5%|▍         | 3021/65500 [13:28:39<273:41:51, 15.77s/it]

training loss: 0.5898562073707581


training:   5%|▍         | 3022/65500 [13:28:54<273:25:27, 15.75s/it]

training loss: 0.8810959458351135


training:   5%|▍         | 3023/65500 [13:29:10<273:13:41, 15.74s/it]

training loss: 1.0120251178741455


training:   5%|▍         | 3024/65500 [13:29:26<273:02:35, 15.73s/it]

training loss: 0.6377776265144348


training:   5%|▍         | 3025/65500 [13:29:41<272:57:07, 15.73s/it]

training loss: 0.7745144963264465


training:   5%|▍         | 3026/65500 [13:29:57<272:52:43, 15.72s/it]

training loss: 1.0352811813354492


training:   5%|▍         | 3027/65500 [13:30:13<272:51:23, 15.72s/it]

training loss: 1.0644505023956299


training:   5%|▍         | 3028/65500 [13:30:29<272:50:54, 15.72s/it]

training loss: 1.001314401626587


training:   5%|▍         | 3029/65500 [13:30:44<273:07:40, 15.74s/it]

training loss: 0.8363590240478516


training:   5%|▍         | 3030/65500 [13:31:00<273:04:21, 15.74s/it]

training loss: 0.4931107759475708


training:   5%|▍         | 3031/65500 [13:31:16<273:00:20, 15.73s/it]

training loss: 0.8844223022460938


training:   5%|▍         | 3032/65500 [13:31:32<273:52:14, 15.78s/it]

training loss: 1.1358957290649414


training:   5%|▍         | 3033/65500 [13:31:48<274:44:45, 15.83s/it]

training loss: 1.0881984233856201


training:   5%|▍         | 3034/65500 [13:32:04<274:25:56, 15.82s/it]

training loss: 0.6268032789230347


training:   5%|▍         | 3035/65500 [13:32:19<274:19:22, 15.81s/it]

training loss: 0.5433998703956604


training:   5%|▍         | 3036/65500 [13:32:35<274:02:11, 15.79s/it]

training loss: 0.8167230486869812


training:   5%|▍         | 3037/65500 [13:32:51<273:49:37, 15.78s/it]

training loss: 1.1491358280181885


training:   5%|▍         | 3038/65500 [13:33:07<273:37:04, 15.77s/it]

training loss: 0.7617862820625305


training:   5%|▍         | 3039/65500 [13:33:22<274:30:46, 15.82s/it]

training loss: 0.9633049368858337


training:   5%|▍         | 3040/65500 [13:33:38<275:07:20, 15.86s/it]

training loss: 0.7357301115989685


training:   5%|▍         | 3041/65500 [13:33:54<274:23:06, 15.81s/it]

training loss: 0.6881528496742249


training:   5%|▍         | 3042/65500 [13:34:10<273:59:51, 15.79s/it]

training loss: 0.7627286314964294


training:   5%|▍         | 3043/65500 [13:34:26<273:35:05, 15.77s/it]

training loss: 1.0408872365951538


training:   5%|▍         | 3044/65500 [13:34:41<273:21:03, 15.76s/it]

training loss: 1.1378004550933838


training:   5%|▍         | 3045/65500 [13:34:57<273:05:47, 15.74s/it]

training loss: 0.8229532837867737


training:   5%|▍         | 3046/65500 [13:35:13<273:01:23, 15.74s/it]

training loss: 0.5877155661582947


training:   5%|▍         | 3047/65500 [13:35:28<272:54:35, 15.73s/it]

training loss: 0.9164510369300842


training:   5%|▍         | 3048/65500 [13:35:44<272:53:40, 15.73s/it]

training loss: 1.2967751026153564


training:   5%|▍         | 3049/65500 [13:36:00<272:52:03, 15.73s/it]

training loss: 0.8051291704177856


training:   5%|▍         | 3050/65500 [13:36:16<273:09:59, 15.75s/it]

training loss: 0.9632663726806641


training:   5%|▍         | 3051/65500 [13:36:31<273:06:01, 15.74s/it]

training loss: 0.9686607718467712


training:   5%|▍         | 3052/65500 [13:36:47<273:01:51, 15.74s/it]

training loss: 0.5575089454650879


training:   5%|▍         | 3053/65500 [13:37:03<272:50:44, 15.73s/it]

training loss: 0.7029197812080383


training:   5%|▍         | 3054/65500 [13:37:19<272:46:01, 15.72s/it]

training loss: 1.2405377626419067


training:   5%|▍         | 3055/65500 [13:37:34<272:42:35, 15.72s/it]

training loss: 0.741927981376648


training:   5%|▍         | 3056/65500 [13:37:50<272:40:19, 15.72s/it]

training loss: 0.950857937335968


training:   5%|▍         | 3057/65500 [13:38:06<272:52:55, 15.73s/it]

training loss: 0.5282090902328491


training:   5%|▍         | 3058/65500 [13:38:22<273:00:26, 15.74s/it]

training loss: 0.9392117857933044


training:   5%|▍         | 3059/65500 [13:38:37<272:55:45, 15.74s/it]

training loss: 1.0285632610321045


training:   5%|▍         | 3060/65500 [13:38:53<272:54:10, 15.73s/it]

training loss: 0.9865226149559021


training:   5%|▍         | 3061/65500 [13:39:09<272:53:20, 15.73s/it]

training loss: 1.063325047492981


training:   5%|▍         | 3062/65500 [13:39:24<272:55:18, 15.74s/it]

training loss: 0.8633156418800354


training:   5%|▍         | 3063/65500 [13:39:40<272:59:12, 15.74s/it]

training loss: 0.6660178899765015


training:   5%|▍         | 3064/65500 [13:39:56<272:57:48, 15.74s/it]

training loss: 0.6490731835365295


training:   5%|▍         | 3065/65500 [13:40:12<272:54:19, 15.74s/it]

training loss: 1.0090214014053345


training:   5%|▍         | 3066/65500 [13:40:27<272:50:53, 15.73s/it]

training loss: 0.871859610080719


training:   5%|▍         | 3067/65500 [13:40:43<272:49:20, 15.73s/it]

training loss: 0.814673662185669


training:   5%|▍         | 3068/65500 [13:40:59<272:42:21, 15.72s/it]

training loss: 0.40694427490234375


training:   5%|▍         | 3069/65500 [13:41:15<272:38:53, 15.72s/it]

training loss: 0.8826903104782104


training:   5%|▍         | 3070/65500 [13:41:30<272:39:20, 15.72s/it]

training loss: 0.8260965347290039


training:   5%|▍         | 3071/65500 [13:41:46<272:37:44, 15.72s/it]

training loss: 0.9097797870635986


training:   5%|▍         | 3072/65500 [13:42:02<272:39:01, 15.72s/it]

training loss: 0.9404000639915466


training:   5%|▍         | 3073/65500 [13:42:17<272:38:12, 15.72s/it]

training loss: 0.7866293787956238


training:   5%|▍         | 3074/65500 [13:42:33<272:36:34, 15.72s/it]

training loss: 1.3946269750595093


training:   5%|▍         | 3075/65500 [13:42:49<272:35:08, 15.72s/it]

training loss: 0.8304141163825989


training:   5%|▍         | 3076/65500 [13:43:05<272:34:02, 15.72s/it]

training loss: 1.1130180358886719


training:   5%|▍         | 3077/65500 [13:43:20<272:36:54, 15.72s/it]

training loss: 0.9660550355911255


training:   5%|▍         | 3078/65500 [13:43:36<272:33:25, 15.72s/it]

training loss: 0.7532482147216797


training:   5%|▍         | 3079/65500 [13:43:52<272:31:52, 15.72s/it]

training loss: 1.1867783069610596


training:   5%|▍         | 3080/65500 [13:44:08<272:32:53, 15.72s/it]

training loss: 0.8347060084342957


training:   5%|▍         | 3081/65500 [13:44:23<272:52:42, 15.74s/it]

training loss: 0.9722817540168762


training:   5%|▍         | 3082/65500 [13:44:39<272:57:49, 15.74s/it]

training loss: 0.7005429863929749


training:   5%|▍         | 3083/65500 [13:44:55<272:51:35, 15.74s/it]

training loss: 0.7870914936065674


training:   5%|▍         | 3084/65500 [13:45:11<272:51:00, 15.74s/it]

training loss: 0.9827842712402344


training:   5%|▍         | 3085/65500 [13:45:26<272:43:20, 15.73s/it]

training loss: 0.767236053943634


training:   5%|▍         | 3086/65500 [13:45:42<273:01:53, 15.75s/it]

training loss: 0.856359601020813


training:   5%|▍         | 3087/65500 [13:45:58<272:51:17, 15.74s/it]

training loss: 0.5501291751861572


training:   5%|▍         | 3088/65500 [13:46:13<272:47:56, 15.74s/it]

training loss: 1.0143153667449951


training:   5%|▍         | 3089/65500 [13:46:29<272:42:42, 15.73s/it]

training loss: 0.4376298785209656


training:   5%|▍         | 3090/65500 [13:46:45<272:42:55, 15.73s/it]

training loss: 1.0124671459197998


training:   5%|▍         | 3091/65500 [13:47:01<272:39:08, 15.73s/it]

training loss: 0.7979097962379456


training:   5%|▍         | 3092/65500 [13:47:16<272:39:39, 15.73s/it]

training loss: 0.8056434392929077


training:   5%|▍         | 3093/65500 [13:47:32<272:37:41, 15.73s/it]

training loss: 0.6375757455825806


training:   5%|▍         | 3094/65500 [13:47:48<272:35:40, 15.73s/it]

training loss: 0.9089345932006836


training:   5%|▍         | 3095/65500 [13:48:04<272:29:24, 15.72s/it]

training loss: 0.8729631304740906


training:   5%|▍         | 3096/65500 [13:48:19<272:28:22, 15.72s/it]

training loss: 0.6801262497901917


training:   5%|▍         | 3097/65500 [13:48:35<272:30:01, 15.72s/it]

training loss: 0.9271748661994934


training:   5%|▍         | 3098/65500 [13:48:51<272:32:04, 15.72s/it]

training loss: 0.755060613155365


training:   5%|▍         | 3099/65500 [13:49:06<272:31:23, 15.72s/it]

training loss: 0.8410635590553284


training:   5%|▍         | 3100/65500 [13:49:22<272:35:16, 15.73s/it]

training loss: 0.8155774474143982
training loss: 1.073714017868042


training:   5%|▍         | 3101/65500 [13:49:39<280:04:26, 16.16s/it]

validation loss: 1.5195099115371704


training:   5%|▍         | 3102/65500 [13:49:55<278:00:20, 16.04s/it]

training loss: 0.6731075644493103


training:   5%|▍         | 3103/65500 [13:50:11<276:21:40, 15.94s/it]

training loss: 0.8551762700080872


training:   5%|▍         | 3104/65500 [13:50:27<275:28:53, 15.89s/it]

training loss: 0.5589829683303833


training:   5%|▍         | 3105/65500 [13:50:42<274:36:29, 15.84s/it]

training loss: 0.6248535513877869


training:   5%|▍         | 3106/65500 [13:50:58<273:57:46, 15.81s/it]

training loss: 0.7069998979568481


training:   5%|▍         | 3107/65500 [13:51:14<273:32:54, 15.78s/it]

training loss: 0.6666014194488525


training:   5%|▍         | 3108/65500 [13:51:29<273:18:00, 15.77s/it]

training loss: 0.6633241772651672


training:   5%|▍         | 3109/65500 [13:51:45<273:22:31, 15.77s/it]

training loss: 0.9127952456474304


training:   5%|▍         | 3110/65500 [13:52:01<273:05:49, 15.76s/it]

training loss: 1.1163684129714966


training:   5%|▍         | 3111/65500 [13:52:17<272:57:36, 15.75s/it]

training loss: 1.0110859870910645


training:   5%|▍         | 3112/65500 [13:52:32<272:49:13, 15.74s/it]

training loss: 0.8139589428901672


training:   5%|▍         | 3113/65500 [13:52:48<272:51:01, 15.74s/it]

training loss: 0.9465862512588501


training:   5%|▍         | 3114/65500 [13:53:04<272:54:52, 15.75s/it]

training loss: 0.6874600052833557


training:   5%|▍         | 3115/65500 [13:53:20<272:58:06, 15.75s/it]

training loss: 0.7299215793609619


training:   5%|▍         | 3116/65500 [13:53:35<273:03:03, 15.76s/it]

training loss: 1.0617942810058594


training:   5%|▍         | 3117/65500 [13:53:51<273:02:13, 15.76s/it]

training loss: 0.7873110771179199


training:   5%|▍         | 3118/65500 [13:54:07<273:02:43, 15.76s/it]

training loss: 0.747876763343811


training:   5%|▍         | 3119/65500 [13:54:23<273:02:05, 15.76s/it]

training loss: 0.6828846335411072


training:   5%|▍         | 3120/65500 [13:54:39<273:04:12, 15.76s/it]

training loss: 0.5987955927848816


training:   5%|▍         | 3121/65500 [13:54:54<273:03:26, 15.76s/it]

training loss: 0.8903974294662476


training:   5%|▍         | 3122/65500 [13:55:10<273:09:52, 15.77s/it]

training loss: 0.8867594599723816


training:   5%|▍         | 3123/65500 [13:55:26<273:09:30, 15.76s/it]

training loss: 0.8914748430252075


training:   5%|▍         | 3124/65500 [13:55:42<273:09:40, 15.77s/it]

training loss: 0.5543836951255798


training:   5%|▍         | 3125/65500 [13:55:57<273:08:55, 15.76s/it]

training loss: 0.9168146848678589


training:   5%|▍         | 3126/65500 [13:56:13<273:11:50, 15.77s/it]

training loss: 0.9613326787948608


training:   5%|▍         | 3127/65500 [13:56:29<273:11:32, 15.77s/it]

training loss: 0.9303243160247803


training:   5%|▍         | 3128/65500 [13:56:45<273:06:43, 15.76s/it]

training loss: 1.1401034593582153


training:   5%|▍         | 3129/65500 [13:57:00<272:57:06, 15.75s/it]

training loss: 0.6821911334991455


training:   5%|▍         | 3130/65500 [13:57:16<272:46:57, 15.75s/it]

training loss: 0.6129121780395508


training:   5%|▍         | 3131/65500 [13:57:32<272:41:47, 15.74s/it]

training loss: 1.1517914533615112


training:   5%|▍         | 3132/65500 [13:57:48<272:47:25, 15.75s/it]

training loss: 0.6857313513755798


training:   5%|▍         | 3133/65500 [13:58:03<272:46:26, 15.75s/it]

training loss: 1.0225125551223755


training:   5%|▍         | 3134/65500 [13:58:19<272:41:30, 15.74s/it]

training loss: 0.9167400002479553


training:   5%|▍         | 3135/65500 [13:58:35<272:33:45, 15.73s/it]

training loss: 1.0858527421951294


training:   5%|▍         | 3136/65500 [13:58:50<272:28:28, 15.73s/it]

training loss: 0.7808082699775696


training:   5%|▍         | 3137/65500 [13:59:06<272:23:12, 15.72s/it]

training loss: 0.5982174277305603


training:   5%|▍         | 3138/65500 [13:59:22<272:25:41, 15.73s/it]

training loss: 0.8345276713371277


training:   5%|▍         | 3139/65500 [13:59:38<272:26:45, 15.73s/it]

training loss: 0.7850085496902466


training:   5%|▍         | 3140/65500 [13:59:53<272:19:26, 15.72s/it]

training loss: 0.8478145003318787


training:   5%|▍         | 3141/65500 [14:00:09<272:19:14, 15.72s/it]

training loss: 0.616614580154419


training:   5%|▍         | 3142/65500 [14:00:25<272:18:21, 15.72s/it]

training loss: 0.6459172964096069


training:   5%|▍         | 3143/65500 [14:00:41<272:19:37, 15.72s/it]

training loss: 0.4984799921512604


training:   5%|▍         | 3144/65500 [14:00:56<272:17:02, 15.72s/it]

training loss: 0.8416857123374939


training:   5%|▍         | 3145/65500 [14:01:12<272:22:39, 15.73s/it]

training loss: 0.889633297920227


training:   5%|▍         | 3146/65500 [14:01:28<272:15:06, 15.72s/it]

training loss: 0.6309037208557129


training:   5%|▍         | 3147/65500 [14:01:43<272:15:21, 15.72s/it]

training loss: 0.7009106278419495


training:   5%|▍         | 3148/65500 [14:01:59<272:19:49, 15.72s/it]

training loss: 1.0725798606872559


training:   5%|▍         | 3149/65500 [14:02:15<272:15:03, 15.72s/it]

training loss: 0.5613992214202881


training:   5%|▍         | 3150/65500 [14:02:31<272:15:46, 15.72s/it]

training loss: 0.6353769898414612


training:   5%|▍         | 3151/65500 [14:02:46<272:33:53, 15.74s/it]

training loss: 1.0662587881088257


training:   5%|▍         | 3152/65500 [14:03:02<272:27:55, 15.73s/it]

training loss: 0.8364817500114441


training:   5%|▍         | 3153/65500 [14:03:18<272:25:32, 15.73s/it]

training loss: 0.637852132320404


training:   5%|▍         | 3154/65500 [14:03:34<272:22:06, 15.73s/it]

training loss: 0.7885770201683044


training:   5%|▍         | 3155/65500 [14:03:49<272:25:47, 15.73s/it]

training loss: 0.8813478350639343


training:   5%|▍         | 3156/65500 [14:04:05<272:38:39, 15.74s/it]

training loss: 0.6271520853042603


training:   5%|▍         | 3157/65500 [14:04:21<272:32:13, 15.74s/it]

training loss: 0.8084830641746521


training:   5%|▍         | 3158/65500 [14:04:36<272:23:21, 15.73s/it]

training loss: 0.6239113807678223


training:   5%|▍         | 3159/65500 [14:04:52<272:20:13, 15.73s/it]

training loss: 1.0359957218170166


training:   5%|▍         | 3160/65500 [14:05:08<272:17:51, 15.72s/it]

training loss: 0.5609106421470642


training:   5%|▍         | 3161/65500 [14:05:24<272:15:28, 15.72s/it]

training loss: 0.8363262414932251


training:   5%|▍         | 3162/65500 [14:05:39<272:14:30, 15.72s/it]

training loss: 0.9675981998443604


training:   5%|▍         | 3163/65500 [14:05:55<272:09:33, 15.72s/it]

training loss: 1.0275750160217285


training:   5%|▍         | 3164/65500 [14:06:11<272:09:29, 15.72s/it]

training loss: 0.45792102813720703


training:   5%|▍         | 3165/65500 [14:06:26<272:11:43, 15.72s/it]

training loss: 0.8273698687553406


training:   5%|▍         | 3166/65500 [14:06:42<272:09:05, 15.72s/it]

training loss: 0.9935896396636963


training:   5%|▍         | 3167/65500 [14:06:58<272:08:17, 15.72s/it]

training loss: 0.8278064131736755


training:   5%|▍         | 3168/65500 [14:07:14<272:08:56, 15.72s/it]

training loss: 0.5579705238342285


training:   5%|▍         | 3169/65500 [14:07:29<272:05:05, 15.71s/it]

training loss: 0.653221607208252


training:   5%|▍         | 3170/65500 [14:07:45<272:07:49, 15.72s/it]

training loss: 0.9096084833145142


training:   5%|▍         | 3171/65500 [14:08:01<272:11:05, 15.72s/it]

training loss: 1.157958984375


training:   5%|▍         | 3172/65500 [14:08:17<272:11:16, 15.72s/it]

training loss: 0.5119319558143616


training:   5%|▍         | 3173/65500 [14:08:32<272:09:18, 15.72s/it]

training loss: 0.7851622700691223


training:   5%|▍         | 3174/65500 [14:08:48<272:24:44, 15.73s/it]

training loss: 0.7105368375778198


training:   5%|▍         | 3175/65500 [14:09:04<272:20:36, 15.73s/it]

training loss: 1.074535608291626


training:   5%|▍         | 3176/65500 [14:09:19<272:21:39, 15.73s/it]

training loss: 1.02120041847229


training:   5%|▍         | 3177/65500 [14:09:35<272:21:29, 15.73s/it]

training loss: 0.9212238788604736


training:   5%|▍         | 3178/65500 [14:09:51<272:20:16, 15.73s/it]

training loss: 1.215150237083435


training:   5%|▍         | 3179/65500 [14:10:07<272:39:32, 15.75s/it]

training loss: 0.6898031830787659


training:   5%|▍         | 3180/65500 [14:10:22<272:35:12, 15.75s/it]

training loss: 0.7017462253570557


training:   5%|▍         | 3181/65500 [14:10:38<272:30:43, 15.74s/it]

training loss: 0.6691389679908752


training:   5%|▍         | 3182/65500 [14:10:54<272:24:49, 15.74s/it]

training loss: 0.6093276739120483


training:   5%|▍         | 3183/65500 [14:11:10<272:20:45, 15.73s/it]

training loss: 0.5617302656173706


training:   5%|▍         | 3184/65500 [14:11:25<272:13:09, 15.73s/it]

training loss: 1.0213003158569336


training:   5%|▍         | 3185/65500 [14:11:41<272:12:59, 15.73s/it]

training loss: 0.9768065810203552


training:   5%|▍         | 3186/65500 [14:11:57<272:11:48, 15.73s/it]

training loss: 0.8134714365005493


training:   5%|▍         | 3187/65500 [14:12:13<272:14:53, 15.73s/it]

training loss: 0.7495782971382141


training:   5%|▍         | 3188/65500 [14:12:28<272:11:25, 15.73s/it]

training loss: 0.9686135053634644


training:   5%|▍         | 3189/65500 [14:12:44<272:13:32, 15.73s/it]

training loss: 0.7580187320709229


training:   5%|▍         | 3190/65500 [14:13:00<272:10:12, 15.72s/it]

training loss: 0.9527702331542969


training:   5%|▍         | 3191/65500 [14:13:15<272:10:55, 15.73s/it]

training loss: 0.949760913848877


training:   5%|▍         | 3192/65500 [14:13:31<272:06:05, 15.72s/it]

training loss: 0.892365574836731


training:   5%|▍         | 3193/65500 [14:13:47<272:14:58, 15.73s/it]

training loss: 0.949698269367218


training:   5%|▍         | 3194/65500 [14:14:03<272:10:55, 15.73s/it]

training loss: 0.830582857131958


training:   5%|▍         | 3195/65500 [14:14:18<272:08:06, 15.72s/it]

training loss: 0.8875942826271057


training:   5%|▍         | 3196/65500 [14:14:34<272:07:56, 15.72s/it]

training loss: 0.7887171506881714


training:   5%|▍         | 3197/65500 [14:14:50<272:19:22, 15.74s/it]

training loss: 0.9974572062492371


training:   5%|▍         | 3198/65500 [14:15:06<272:20:05, 15.74s/it]

training loss: 0.3297312557697296


training:   5%|▍         | 3199/65500 [14:15:21<272:18:13, 15.73s/it]

training loss: 0.8077298402786255


training:   5%|▍         | 3200/65500 [14:15:37<272:15:46, 15.73s/it]

training loss: 0.8285359740257263
training loss: 0.7066130638122559


training:   5%|▍         | 3201/65500 [14:15:54<279:35:07, 16.16s/it]

validation loss: 1.668401837348938


training:   5%|▍         | 3202/65500 [14:16:10<277:44:01, 16.05s/it]

training loss: 0.908805787563324


training:   5%|▍         | 3203/65500 [14:16:26<276:07:53, 15.96s/it]

training loss: 1.1660988330841064


training:   5%|▍         | 3204/65500 [14:16:41<274:51:11, 15.88s/it]

training loss: 0.5567150712013245


training:   5%|▍         | 3205/65500 [14:16:57<274:03:37, 15.84s/it]

training loss: 0.7993036508560181


training:   5%|▍         | 3206/65500 [14:17:13<273:26:41, 15.80s/it]

training loss: 0.5459668636322021


training:   5%|▍         | 3207/65500 [14:17:29<273:01:39, 15.78s/it]

training loss: 0.8215154409408569


training:   5%|▍         | 3208/65500 [14:17:44<272:45:57, 15.76s/it]

training loss: 0.7400460839271545


training:   5%|▍         | 3209/65500 [14:18:00<272:32:48, 15.75s/it]

training loss: 0.9965935349464417


training:   5%|▍         | 3210/65500 [14:18:16<272:24:39, 15.74s/it]

training loss: 0.9527662396430969


training:   5%|▍         | 3211/65500 [14:18:31<272:20:03, 15.74s/it]

training loss: 0.6230999231338501


training:   5%|▍         | 3212/65500 [14:18:47<272:09:20, 15.73s/it]

training loss: 0.9562053680419922


training:   5%|▍         | 3213/65500 [14:19:03<272:05:28, 15.73s/it]

training loss: 0.6513136625289917


training:   5%|▍         | 3214/65500 [14:19:19<272:02:44, 15.72s/it]

training loss: 0.7889428734779358


training:   5%|▍         | 3215/65500 [14:19:34<272:05:18, 15.73s/it]

training loss: 0.7908873558044434


training:   5%|▍         | 3216/65500 [14:19:50<272:02:16, 15.72s/it]

training loss: 0.8034284710884094


training:   5%|▍         | 3217/65500 [14:20:06<271:59:59, 15.72s/it]

training loss: 0.9225565791130066


training:   5%|▍         | 3218/65500 [14:20:22<272:01:16, 15.72s/it]

training loss: 0.639383852481842


training:   5%|▍         | 3219/65500 [14:20:37<272:00:18, 15.72s/it]

training loss: 0.8004594445228577


training:   5%|▍         | 3220/65500 [14:20:53<272:04:10, 15.73s/it]

training loss: 0.5302311778068542


training:   5%|▍         | 3221/65500 [14:21:09<272:15:47, 15.74s/it]

training loss: 0.7598558664321899


training:   5%|▍         | 3222/65500 [14:21:25<272:30:29, 15.75s/it]

training loss: 0.9966003894805908


training:   5%|▍         | 3223/65500 [14:21:40<272:39:26, 15.76s/it]

training loss: 0.6899269819259644


training:   5%|▍         | 3224/65500 [14:21:56<272:54:21, 15.78s/it]

training loss: 0.9279410243034363


training:   5%|▍         | 3225/65500 [14:22:12<273:03:14, 15.78s/it]

training loss: 0.6459125280380249


training:   5%|▍         | 3226/65500 [14:22:28<273:16:35, 15.80s/it]

training loss: 1.0065257549285889


training:   5%|▍         | 3227/65500 [14:22:44<273:15:19, 15.80s/it]

training loss: 0.6753345131874084


training:   5%|▍         | 3228/65500 [14:22:59<273:08:17, 15.79s/it]

training loss: 0.887233555316925


training:   5%|▍         | 3229/65500 [14:23:15<273:07:53, 15.79s/it]

training loss: 1.0020713806152344


training:   5%|▍         | 3230/65500 [14:23:31<273:06:16, 15.79s/it]

training loss: 1.0745207071304321


training:   5%|▍         | 3231/65500 [14:23:47<273:05:47, 15.79s/it]

training loss: 0.7813652753829956


training:   5%|▍         | 3232/65500 [14:24:02<273:00:35, 15.78s/it]

training loss: 0.5018318295478821


training:   5%|▍         | 3233/65500 [14:24:18<273:03:45, 15.79s/it]

training loss: 0.8506945371627808


training:   5%|▍         | 3234/65500 [14:24:34<273:02:18, 15.79s/it]

training loss: 1.3313764333724976


training:   5%|▍         | 3235/65500 [14:24:50<272:53:51, 15.78s/it]

training loss: 0.8913093209266663


training:   5%|▍         | 3236/65500 [14:25:06<272:39:28, 15.76s/it]

training loss: 1.1263694763183594


training:   5%|▍         | 3237/65500 [14:25:21<272:24:09, 15.75s/it]

training loss: 0.5931437015533447


training:   5%|▍         | 3238/65500 [14:25:37<272:11:04, 15.74s/it]

training loss: 1.3188806772232056


training:   5%|▍         | 3239/65500 [14:25:53<272:05:21, 15.73s/it]

training loss: 1.1394321918487549


training:   5%|▍         | 3240/65500 [14:26:08<272:02:29, 15.73s/it]

training loss: 0.995530366897583


training:   5%|▍         | 3241/65500 [14:26:24<271:59:25, 15.73s/it]

training loss: 0.6925532221794128


training:   5%|▍         | 3242/65500 [14:26:40<271:55:04, 15.72s/it]

training loss: 0.9721124768257141


training:   5%|▍         | 3243/65500 [14:26:56<271:53:37, 15.72s/it]

training loss: 0.8999096751213074


training:   5%|▍         | 3244/65500 [14:27:11<272:11:51, 15.74s/it]

training loss: 0.9993055462837219


training:   5%|▍         | 3245/65500 [14:27:27<272:06:55, 15.74s/it]

training loss: 0.7664452195167542


training:   5%|▍         | 3246/65500 [14:27:43<272:02:42, 15.73s/it]

training loss: 0.7821711301803589


training:   5%|▍         | 3247/65500 [14:27:59<272:01:44, 15.73s/it]

training loss: 0.8004874587059021


training:   5%|▍         | 3248/65500 [14:28:14<271:58:24, 15.73s/it]

training loss: 0.7856120467185974


training:   5%|▍         | 3249/65500 [14:28:30<272:14:01, 15.74s/it]

training loss: 0.9223602414131165


training:   5%|▍         | 3250/65500 [14:28:46<272:35:40, 15.76s/it]

training loss: 0.7824609279632568


training:   5%|▍         | 3251/65500 [14:29:02<272:21:52, 15.75s/it]

training loss: 0.6501713991165161


training:   5%|▍         | 3252/65500 [14:29:17<272:13:09, 15.74s/it]

training loss: 0.6067078113555908


training:   5%|▍         | 3253/65500 [14:29:33<272:04:25, 15.74s/it]

training loss: 0.8173683881759644


training:   5%|▍         | 3254/65500 [14:29:49<272:02:04, 15.73s/it]

training loss: 0.8787261247634888


training:   5%|▍         | 3255/65500 [14:30:04<271:58:40, 15.73s/it]

training loss: 0.6434670090675354


training:   5%|▍         | 3256/65500 [14:30:20<271:54:22, 15.73s/it]

training loss: 0.6775978803634644


training:   5%|▍         | 3257/65500 [14:30:36<271:54:14, 15.73s/it]

training loss: 0.9624941349029541


training:   5%|▍         | 3258/65500 [14:30:52<271:50:15, 15.72s/it]

training loss: 1.1870527267456055


training:   5%|▍         | 3259/65500 [14:31:07<271:47:00, 15.72s/it]

training loss: 0.900367021560669


training:   5%|▍         | 3260/65500 [14:31:23<271:42:49, 15.72s/it]

training loss: 0.7634578943252563


training:   5%|▍         | 3261/65500 [14:31:39<271:40:06, 15.71s/it]

training loss: 0.8000415563583374


training:   5%|▍         | 3262/65500 [14:31:54<271:39:24, 15.71s/it]

training loss: 0.7911631464958191


training:   5%|▍         | 3263/65500 [14:32:10<271:41:02, 15.72s/it]

training loss: 0.8052306175231934


training:   5%|▍         | 3264/65500 [14:32:26<271:39:29, 15.71s/it]

training loss: 1.114669680595398


training:   5%|▍         | 3265/65500 [14:32:42<271:40:40, 15.72s/it]

training loss: 0.7787983417510986


training:   5%|▍         | 3266/65500 [14:32:57<271:40:22, 15.72s/it]

training loss: 1.2365278005599976


training:   5%|▍         | 3267/65500 [14:33:13<271:57:43, 15.73s/it]

training loss: 0.888774037361145


training:   5%|▍         | 3268/65500 [14:33:29<271:57:46, 15.73s/it]

training loss: 1.0868545770645142


training:   5%|▍         | 3269/65500 [14:33:45<271:59:09, 15.73s/it]

training loss: 0.7179668545722961


training:   5%|▍         | 3270/65500 [14:34:00<271:56:28, 15.73s/it]

training loss: 0.6966671943664551


training:   5%|▍         | 3271/65500 [14:34:16<271:55:56, 15.73s/it]

training loss: 0.7264527678489685


training:   5%|▍         | 3272/65500 [14:34:32<272:12:13, 15.75s/it]

training loss: 0.6789851784706116


training:   5%|▍         | 3273/65500 [14:34:48<272:22:51, 15.76s/it]

training loss: 0.9021487236022949


training:   5%|▍         | 3274/65500 [14:35:03<272:05:25, 15.74s/it]

training loss: 0.9449236989021301


training:   5%|▌         | 3275/65500 [14:35:19<271:55:37, 15.73s/it]

training loss: 0.8417243957519531


training:   5%|▌         | 3276/65500 [14:35:35<271:49:49, 15.73s/it]

training loss: 1.0306687355041504


training:   5%|▌         | 3277/65500 [14:35:50<271:45:58, 15.72s/it]

training loss: 0.7557188868522644


training:   5%|▌         | 3278/65500 [14:36:06<271:43:18, 15.72s/it]

training loss: 0.6631614565849304


training:   5%|▌         | 3279/65500 [14:36:22<271:36:07, 15.71s/it]

training loss: 0.6070364117622375


training:   5%|▌         | 3280/65500 [14:36:38<271:38:37, 15.72s/it]

training loss: 0.9986074566841125


training:   5%|▌         | 3281/65500 [14:36:53<271:37:59, 15.72s/it]

training loss: 1.0026264190673828


training:   5%|▌         | 3282/65500 [14:37:09<271:34:55, 15.71s/it]

training loss: 0.918360710144043


training:   5%|▌         | 3283/65500 [14:37:25<271:33:45, 15.71s/it]

training loss: 0.6833677887916565


training:   5%|▌         | 3284/65500 [14:37:40<271:31:51, 15.71s/it]

training loss: 0.7441614866256714


training:   5%|▌         | 3285/65500 [14:37:56<271:32:01, 15.71s/it]

training loss: 0.7191187739372253


training:   5%|▌         | 3286/65500 [14:38:12<271:35:22, 15.72s/it]

training loss: 0.7378880381584167


training:   5%|▌         | 3287/65500 [14:38:28<271:35:22, 15.72s/it]

training loss: 1.200371503829956


training:   5%|▌         | 3288/65500 [14:38:43<271:36:10, 15.72s/it]

training loss: 0.6209638714790344


training:   5%|▌         | 3289/65500 [14:38:59<271:37:46, 15.72s/it]

training loss: 0.7390023469924927


training:   5%|▌         | 3290/65500 [14:39:15<271:52:10, 15.73s/it]

training loss: 0.8027608394622803


training:   5%|▌         | 3291/65500 [14:39:30<271:51:17, 15.73s/it]

training loss: 0.6810252666473389


training:   5%|▌         | 3292/65500 [14:39:46<271:52:04, 15.73s/it]

training loss: 0.7280742526054382


training:   5%|▌         | 3293/65500 [14:40:02<271:46:55, 15.73s/it]

training loss: 0.6203773021697998


training:   5%|▌         | 3294/65500 [14:40:18<271:49:40, 15.73s/it]

training loss: 0.8962440490722656


training:   5%|▌         | 3295/65500 [14:40:33<272:04:03, 15.75s/it]

training loss: 0.9039941430091858


training:   5%|▌         | 3296/65500 [14:40:49<271:57:12, 15.74s/it]

training loss: 0.6751803159713745


training:   5%|▌         | 3297/65500 [14:41:05<271:58:46, 15.74s/it]

training loss: 0.9108413457870483


training:   5%|▌         | 3298/65500 [14:41:21<271:50:55, 15.73s/it]

training loss: 0.739509105682373


training:   5%|▌         | 3299/65500 [14:41:36<271:45:49, 15.73s/it]

training loss: 0.6971089839935303


training:   5%|▌         | 3300/65500 [14:41:52<271:37:53, 15.72s/it]

training loss: 0.7582741975784302
training loss: 0.6674368381500244


training:   5%|▌         | 3301/65500 [14:42:09<278:55:34, 16.14s/it]

validation loss: 1.4457144737243652


training:   5%|▌         | 3302/65500 [14:42:25<276:54:29, 16.03s/it]

training loss: 0.9531371593475342


training:   5%|▌         | 3303/65500 [14:42:41<275:22:28, 15.94s/it]

training loss: 1.1620078086853027


training:   5%|▌         | 3304/65500 [14:42:56<274:18:47, 15.88s/it]

training loss: 0.8140426874160767


training:   5%|▌         | 3305/65500 [14:43:12<273:30:01, 15.83s/it]

training loss: 1.0088521242141724


training:   5%|▌         | 3306/65500 [14:43:28<272:56:54, 15.80s/it]

training loss: 0.8812074661254883


training:   5%|▌         | 3307/65500 [14:43:44<272:31:47, 15.78s/it]

training loss: 0.8131470680236816


training:   5%|▌         | 3308/65500 [14:43:59<272:15:17, 15.76s/it]

training loss: 0.8612267971038818


training:   5%|▌         | 3309/65500 [14:44:15<272:00:10, 15.75s/it]

training loss: 0.7485849261283875


training:   5%|▌         | 3310/65500 [14:44:31<271:52:30, 15.74s/it]

training loss: 0.7939257025718689


training:   5%|▌         | 3311/65500 [14:44:46<271:42:39, 15.73s/it]

training loss: 0.819369375705719


training:   5%|▌         | 3312/65500 [14:45:02<271:36:51, 15.72s/it]

training loss: 0.9892958998680115


training:   5%|▌         | 3313/65500 [14:45:18<271:54:56, 15.74s/it]

training loss: 0.7609257102012634


training:   5%|▌         | 3314/65500 [14:45:34<271:52:53, 15.74s/it]

training loss: 0.6242497563362122


training:   5%|▌         | 3315/65500 [14:45:49<271:53:01, 15.74s/it]

training loss: 0.7948619723320007


training:   5%|▌         | 3316/65500 [14:46:05<271:49:23, 15.74s/it]

training loss: 1.0421056747436523


training:   5%|▌         | 3317/65500 [14:46:21<271:53:35, 15.74s/it]

training loss: 1.1549339294433594


training:   5%|▌         | 3318/65500 [14:46:37<272:06:31, 15.75s/it]

training loss: 0.5465787649154663


training:   5%|▌         | 3319/65500 [14:46:52<271:55:44, 15.74s/it]

training loss: 1.0793430805206299


training:   5%|▌         | 3320/65500 [14:47:08<271:50:05, 15.74s/it]

training loss: 0.7904656529426575


training:   5%|▌         | 3321/65500 [14:47:24<271:48:13, 15.74s/it]

training loss: 0.719585657119751


training:   5%|▌         | 3322/65500 [14:47:40<271:46:14, 15.74s/it]

training loss: 1.1738274097442627


training:   5%|▌         | 3323/65500 [14:47:55<271:43:23, 15.73s/it]

training loss: 0.9520577192306519


training:   5%|▌         | 3324/65500 [14:48:11<271:41:22, 15.73s/it]

training loss: 0.9896937608718872


training:   5%|▌         | 3325/65500 [14:48:27<271:39:27, 15.73s/it]

training loss: 0.6731396913528442


training:   5%|▌         | 3326/65500 [14:48:42<271:36:45, 15.73s/it]

training loss: 0.9131935238838196


training:   5%|▌         | 3327/65500 [14:48:58<271:32:24, 15.72s/it]

training loss: 0.8074700832366943


training:   5%|▌         | 3328/65500 [14:49:14<271:30:41, 15.72s/it]

training loss: 0.9346300959587097


training:   5%|▌         | 3329/65500 [14:49:30<271:29:52, 15.72s/it]

training loss: 1.2763203382492065


training:   5%|▌         | 3330/65500 [14:49:45<271:27:12, 15.72s/it]

training loss: 0.7806850671768188


training:   5%|▌         | 3331/65500 [14:50:01<271:25:20, 15.72s/it]

training loss: 0.44745010137557983


training:   5%|▌         | 3332/65500 [14:50:17<271:24:18, 15.72s/it]

training loss: 0.4394516050815582


training:   5%|▌         | 3333/65500 [14:50:33<271:22:51, 15.72s/it]

training loss: 1.097863793373108


training:   5%|▌         | 3334/65500 [14:50:48<271:20:37, 15.71s/it]

training loss: 0.8821443319320679


training:   5%|▌         | 3335/65500 [14:51:04<271:20:40, 15.71s/it]

training loss: 0.9324190020561218


training:   5%|▌         | 3336/65500 [14:51:20<271:43:48, 15.74s/it]

training loss: 0.8376119136810303


training:   5%|▌         | 3337/65500 [14:51:35<271:41:42, 15.73s/it]

training loss: 0.7603004574775696


training:   5%|▌         | 3338/65500 [14:51:51<271:39:32, 15.73s/it]

training loss: 0.9416047930717468


training:   5%|▌         | 3339/65500 [14:52:07<271:40:53, 15.73s/it]

training loss: 0.736811637878418


training:   5%|▌         | 3340/65500 [14:52:23<271:45:45, 15.74s/it]

training loss: 0.8288729190826416


training:   5%|▌         | 3341/65500 [14:52:38<272:01:43, 15.75s/it]

training loss: 0.9408773183822632


training:   5%|▌         | 3342/65500 [14:52:54<272:04:16, 15.76s/it]

training loss: 0.9856420159339905


training:   5%|▌         | 3343/65500 [14:53:10<272:01:23, 15.76s/it]

training loss: 1.0274684429168701


training:   5%|▌         | 3344/65500 [14:53:26<272:01:26, 15.76s/it]

training loss: 0.9585971236228943


training:   5%|▌         | 3345/65500 [14:53:41<272:00:03, 15.75s/it]

training loss: 0.5516312718391418


training:   5%|▌         | 3346/65500 [14:53:57<272:02:02, 15.76s/it]

training loss: 0.7445350885391235


training:   5%|▌         | 3347/65500 [14:54:13<272:01:03, 15.76s/it]

training loss: 0.6304193139076233


training:   5%|▌         | 3348/65500 [14:54:29<271:56:06, 15.75s/it]

training loss: 0.9325002431869507


training:   5%|▌         | 3349/65500 [14:54:44<271:57:39, 15.75s/it]

training loss: 0.8867632150650024


training:   5%|▌         | 3350/65500 [14:55:00<271:57:39, 15.75s/it]

training loss: 0.7823397517204285


training:   5%|▌         | 3351/65500 [14:55:16<272:07:17, 15.76s/it]

training loss: 0.8827843070030212


training:   5%|▌         | 3352/65500 [14:55:32<272:01:42, 15.76s/it]

training loss: 0.9041646122932434


training:   5%|▌         | 3353/65500 [14:55:47<271:47:23, 15.74s/it]

training loss: 0.6821849346160889


training:   5%|▌         | 3354/65500 [14:56:03<271:39:29, 15.74s/it]

training loss: 0.7233846783638


training:   5%|▌         | 3355/65500 [14:56:19<271:35:32, 15.73s/it]

training loss: 1.105787992477417


training:   5%|▌         | 3356/65500 [14:56:35<271:35:13, 15.73s/it]

training loss: 0.8268997073173523


training:   5%|▌         | 3357/65500 [14:56:50<271:28:59, 15.73s/it]

training loss: 0.47079014778137207


training:   5%|▌         | 3358/65500 [14:57:06<271:23:47, 15.72s/it]

training loss: 0.8782389760017395


training:   5%|▌         | 3359/65500 [14:57:22<271:23:33, 15.72s/it]

training loss: 1.0486795902252197


training:   5%|▌         | 3360/65500 [14:57:38<271:39:17, 15.74s/it]

training loss: 0.8054331541061401


training:   5%|▌         | 3361/65500 [14:57:53<271:32:30, 15.73s/it]

training loss: 0.6142815947532654


training:   5%|▌         | 3362/65500 [14:58:09<271:35:52, 15.74s/it]

training loss: 0.8126755356788635


training:   5%|▌         | 3363/65500 [14:58:25<271:33:22, 15.73s/it]

training loss: 0.8286083936691284


training:   5%|▌         | 3364/65500 [14:58:40<271:29:34, 15.73s/it]

training loss: 1.0631976127624512


training:   5%|▌         | 3365/65500 [14:58:56<271:45:59, 15.75s/it]

training loss: 0.7199447751045227


training:   5%|▌         | 3366/65500 [14:59:12<271:41:17, 15.74s/it]

training loss: 0.4070700407028198


training:   5%|▌         | 3367/65500 [14:59:28<271:32:54, 15.73s/it]

training loss: 0.7769976854324341


training:   5%|▌         | 3368/65500 [14:59:43<271:22:02, 15.72s/it]

training loss: 0.6898327469825745


training:   5%|▌         | 3369/65500 [14:59:59<271:19:29, 15.72s/it]

training loss: 0.8733711242675781


training:   5%|▌         | 3370/65500 [15:00:15<271:20:14, 15.72s/it]

training loss: 0.8297045230865479


training:   5%|▌         | 3371/65500 [15:00:31<271:15:56, 15.72s/it]

training loss: 0.6623673439025879


training:   5%|▌         | 3372/65500 [15:00:46<271:14:42, 15.72s/it]

training loss: 0.9702701568603516


training:   5%|▌         | 3373/65500 [15:01:02<271:15:01, 15.72s/it]

training loss: 0.4776079058647156


training:   5%|▌         | 3374/65500 [15:01:18<271:14:33, 15.72s/it]

training loss: 0.7532826066017151


training:   5%|▌         | 3375/65500 [15:01:33<271:14:32, 15.72s/it]

training loss: 0.8007499575614929


training:   5%|▌         | 3376/65500 [15:01:49<271:16:33, 15.72s/it]

training loss: 0.6101208329200745


training:   5%|▌         | 3377/65500 [15:02:05<271:16:31, 15.72s/it]

training loss: 0.893007755279541


training:   5%|▌         | 3378/65500 [15:02:21<271:12:37, 15.72s/it]

training loss: 0.7500490546226501


training:   5%|▌         | 3379/65500 [15:02:36<271:13:07, 15.72s/it]

training loss: 0.8451350927352905


training:   5%|▌         | 3380/65500 [15:02:52<271:10:24, 15.72s/it]

training loss: 1.0174260139465332


training:   5%|▌         | 3381/65500 [15:03:08<271:12:37, 15.72s/it]

training loss: 0.8109068870544434


training:   5%|▌         | 3382/65500 [15:03:23<271:10:12, 15.72s/it]

training loss: 0.5245736241340637


training:   5%|▌         | 3383/65500 [15:03:39<271:34:20, 15.74s/it]

training loss: 0.9821509122848511


training:   5%|▌         | 3384/65500 [15:03:55<271:27:04, 15.73s/it]

training loss: 0.7456251382827759


training:   5%|▌         | 3385/65500 [15:04:11<271:27:26, 15.73s/it]

training loss: 0.9788567423820496


training:   5%|▌         | 3386/65500 [15:04:26<271:24:33, 15.73s/it]

training loss: 0.9984228014945984


training:   5%|▌         | 3387/65500 [15:04:42<271:20:30, 15.73s/it]

training loss: 0.5593288540840149


training:   5%|▌         | 3388/65500 [15:04:58<271:35:45, 15.74s/it]

training loss: 0.40799516439437866


training:   5%|▌         | 3389/65500 [15:05:14<271:31:06, 15.74s/it]

training loss: 0.9491832852363586


training:   5%|▌         | 3390/65500 [15:05:29<271:23:41, 15.73s/it]

training loss: 1.0747472047805786


training:   5%|▌         | 3391/65500 [15:05:45<271:20:54, 15.73s/it]

training loss: 0.8218501806259155


training:   5%|▌         | 3392/65500 [15:06:01<271:17:24, 15.72s/it]

training loss: 0.7679768204689026


training:   5%|▌         | 3393/65500 [15:06:17<271:15:54, 15.72s/it]

training loss: 0.6528449058532715


training:   5%|▌         | 3394/65500 [15:06:32<271:11:10, 15.72s/it]

training loss: 0.8984389901161194


training:   5%|▌         | 3395/65500 [15:06:48<271:15:49, 15.72s/it]

training loss: 0.781537652015686


training:   5%|▌         | 3396/65500 [15:07:04<271:14:47, 15.72s/it]

training loss: 0.8077119588851929


training:   5%|▌         | 3397/65500 [15:07:19<271:13:41, 15.72s/it]

training loss: 0.679070234298706


training:   5%|▌         | 3398/65500 [15:07:35<271:11:29, 15.72s/it]

training loss: 0.6901702284812927


training:   5%|▌         | 3399/65500 [15:07:51<271:09:03, 15.72s/it]

training loss: 0.7338751554489136


training:   5%|▌         | 3400/65500 [15:08:07<271:10:40, 15.72s/it]

training loss: 0.7183717489242554
training loss: 0.843787431716919


training:   5%|▌         | 3401/65500 [15:08:24<278:40:28, 16.16s/it]

validation loss: 1.5408867597579956


training:   5%|▌         | 3402/65500 [15:08:39<276:31:20, 16.03s/it]

training loss: 0.572676420211792


training:   5%|▌         | 3403/65500 [15:08:55<274:54:19, 15.94s/it]

training loss: 0.5880839228630066


training:   5%|▌         | 3404/65500 [15:09:11<273:46:09, 15.87s/it]

training loss: 0.8908604383468628


training:   5%|▌         | 3405/65500 [15:09:27<272:58:07, 15.83s/it]

training loss: 1.0687097311019897


training:   5%|▌         | 3406/65500 [15:09:42<272:42:06, 15.81s/it]

training loss: 0.7703688144683838


training:   5%|▌         | 3407/65500 [15:09:58<272:17:36, 15.79s/it]

training loss: 0.6908382177352905


training:   5%|▌         | 3408/65500 [15:10:14<272:00:41, 15.77s/it]

training loss: 0.8451980352401733


training:   5%|▌         | 3409/65500 [15:10:30<271:45:54, 15.76s/it]

training loss: 1.2606546878814697


training:   5%|▌         | 3410/65500 [15:10:45<271:40:12, 15.75s/it]

training loss: 1.2027857303619385


training:   5%|▌         | 3411/65500 [15:11:01<271:41:04, 15.75s/it]

training loss: 1.0680694580078125


training:   5%|▌         | 3412/65500 [15:11:17<271:44:46, 15.76s/it]

training loss: 1.0097929239273071


training:   5%|▌         | 3413/65500 [15:11:33<271:33:08, 15.75s/it]

training loss: 0.9668344855308533


training:   5%|▌         | 3414/65500 [15:11:48<271:26:54, 15.74s/it]

training loss: 0.4810135066509247


training:   5%|▌         | 3415/65500 [15:12:04<271:27:17, 15.74s/it]

training loss: 0.9649335145950317


training:   5%|▌         | 3416/65500 [15:12:20<271:20:02, 15.73s/it]

training loss: 0.754868745803833


training:   5%|▌         | 3417/65500 [15:12:35<271:16:06, 15.73s/it]

training loss: 0.7925747036933899


training:   5%|▌         | 3418/65500 [15:12:51<271:08:41, 15.72s/it]

training loss: 0.7532526850700378


training:   5%|▌         | 3419/65500 [15:13:07<271:08:47, 15.72s/it]

training loss: 0.7705318927764893


training:   5%|▌         | 3420/65500 [15:13:23<271:05:27, 15.72s/it]

training loss: 0.6938616037368774


training:   5%|▌         | 3421/65500 [15:13:38<271:09:36, 15.72s/it]

training loss: 0.7188315987586975


training:   5%|▌         | 3422/65500 [15:13:54<271:06:31, 15.72s/it]

training loss: 1.1317046880722046


training:   5%|▌         | 3423/65500 [15:14:10<271:04:30, 15.72s/it]

training loss: 0.6279011964797974


training:   5%|▌         | 3424/65500 [15:14:26<271:01:36, 15.72s/it]

training loss: 0.6312445402145386


training:   5%|▌         | 3425/65500 [15:14:41<271:00:54, 15.72s/it]

training loss: 0.7922390699386597


training:   5%|▌         | 3426/65500 [15:14:57<271:00:08, 15.72s/it]

training loss: 0.6108821630477905


training:   5%|▌         | 3427/65500 [15:15:13<270:57:30, 15.71s/it]

training loss: 0.8982744216918945


training:   5%|▌         | 3428/65500 [15:15:28<270:56:57, 15.71s/it]

training loss: 0.9498918652534485


training:   5%|▌         | 3429/65500 [15:15:44<270:56:46, 15.71s/it]

training loss: 0.5070496201515198


training:   5%|▌         | 3430/65500 [15:16:00<271:17:10, 15.73s/it]

training loss: 0.613303542137146


training:   5%|▌         | 3431/65500 [15:16:16<271:15:43, 15.73s/it]

training loss: 0.5375622510910034


training:   5%|▌         | 3432/65500 [15:16:31<271:11:01, 15.73s/it]

training loss: 0.9425226449966431


training:   5%|▌         | 3433/65500 [15:16:47<271:09:57, 15.73s/it]

training loss: 0.6822628378868103


training:   5%|▌         | 3434/65500 [15:17:03<271:11:42, 15.73s/it]

training loss: 1.0344164371490479


training:   5%|▌         | 3435/65500 [15:17:19<271:34:40, 15.75s/it]

training loss: 0.6560633778572083


training:   5%|▌         | 3436/65500 [15:17:34<271:20:06, 15.74s/it]

training loss: 1.1328043937683105


training:   5%|▌         | 3437/65500 [15:17:50<271:15:16, 15.73s/it]

training loss: 0.431622177362442


training:   5%|▌         | 3438/65500 [15:18:06<271:07:13, 15.73s/it]

training loss: 1.0147976875305176


training:   5%|▌         | 3439/65500 [15:18:21<271:04:09, 15.72s/it]

training loss: 0.7753766179084778


training:   5%|▌         | 3440/65500 [15:18:37<271:01:33, 15.72s/it]

training loss: 1.1330667734146118


training:   5%|▌         | 3441/65500 [15:18:53<270:59:11, 15.72s/it]

training loss: 0.9164002537727356


training:   5%|▌         | 3442/65500 [15:19:09<270:57:34, 15.72s/it]

training loss: 0.6601419448852539


training:   5%|▌         | 3443/65500 [15:19:24<270:57:59, 15.72s/it]

training loss: 1.2729889154434204


training:   5%|▌         | 3444/65500 [15:19:40<270:54:38, 15.72s/it]

training loss: 0.7977360486984253


training:   5%|▌         | 3445/65500 [15:19:56<270:55:35, 15.72s/it]

training loss: 0.6595604419708252


training:   5%|▌         | 3446/65500 [15:20:11<270:55:31, 15.72s/it]

training loss: 1.1623332500457764


training:   5%|▌         | 3447/65500 [15:20:27<270:57:21, 15.72s/it]

training loss: 0.8283385038375854


training:   5%|▌         | 3448/65500 [15:20:43<270:56:20, 15.72s/it]

training loss: 0.5414569973945618


training:   5%|▌         | 3449/65500 [15:20:59<270:49:37, 15.71s/it]

training loss: 1.0725233554840088


training:   5%|▌         | 3450/65500 [15:21:14<271:07:27, 15.73s/it]

training loss: 0.7879099249839783


training:   5%|▌         | 3451/65500 [15:21:30<271:14:54, 15.74s/it]

training loss: 0.9210400581359863


training:   5%|▌         | 3452/65500 [15:21:46<271:21:36, 15.74s/it]

training loss: 0.7636594772338867


training:   5%|▌         | 3453/65500 [15:22:02<271:46:41, 15.77s/it]

training loss: 1.0238205194473267


training:   5%|▌         | 3454/65500 [15:22:17<271:50:09, 15.77s/it]

training loss: 0.6090057492256165


training:   5%|▌         | 3455/65500 [15:22:33<271:51:01, 15.77s/it]

training loss: 0.9537046551704407


training:   5%|▌         | 3456/65500 [15:22:49<271:58:28, 15.78s/it]

training loss: 0.9660165309906006


training:   5%|▌         | 3457/65500 [15:23:05<271:57:57, 15.78s/it]

training loss: 0.9669303894042969


training:   5%|▌         | 3458/65500 [15:23:21<272:15:44, 15.80s/it]

training loss: 0.7230193018913269


training:   5%|▌         | 3459/65500 [15:23:36<272:08:21, 15.79s/it]

training loss: 1.1761140823364258


training:   5%|▌         | 3460/65500 [15:23:52<271:56:49, 15.78s/it]

training loss: 0.9899351596832275


training:   5%|▌         | 3461/65500 [15:24:08<271:44:53, 15.77s/it]

training loss: 0.9302255511283875


training:   5%|▌         | 3462/65500 [15:24:24<271:36:46, 15.76s/it]

training loss: 0.9900837540626526


training:   5%|▌         | 3463/65500 [15:24:39<271:27:39, 15.75s/it]

training loss: 1.0479443073272705


training:   5%|▌         | 3464/65500 [15:24:55<271:14:24, 15.74s/it]

training loss: 0.33694738149642944


training:   5%|▌         | 3465/65500 [15:25:11<271:08:48, 15.74s/it]

training loss: 0.7617550492286682


training:   5%|▌         | 3466/65500 [15:25:27<271:02:40, 15.73s/it]

training loss: 0.8015387654304504


training:   5%|▌         | 3467/65500 [15:25:42<270:58:13, 15.73s/it]

training loss: 1.0563323497772217


training:   5%|▌         | 3468/65500 [15:25:58<270:59:40, 15.73s/it]

training loss: 0.5097731947898865


training:   5%|▌         | 3469/65500 [15:26:14<270:59:28, 15.73s/it]

training loss: 1.1736218929290771


training:   5%|▌         | 3470/65500 [15:26:29<270:57:24, 15.73s/it]

training loss: 0.9462506175041199


training:   5%|▌         | 3471/65500 [15:26:45<270:55:53, 15.72s/it]

training loss: 1.0800524950027466


training:   5%|▌         | 3472/65500 [15:27:01<270:55:07, 15.72s/it]

training loss: 0.8790612816810608


training:   5%|▌         | 3473/65500 [15:27:17<270:54:37, 15.72s/it]

training loss: 0.5809420347213745


training:   5%|▌         | 3474/65500 [15:27:32<270:54:44, 15.72s/it]

training loss: 0.9260369539260864


training:   5%|▌         | 3475/65500 [15:27:48<270:53:34, 15.72s/it]

training loss: 0.8029379844665527


training:   5%|▌         | 3476/65500 [15:28:04<271:08:31, 15.74s/it]

training loss: 0.7313896417617798


training:   5%|▌         | 3477/65500 [15:28:20<271:09:33, 15.74s/it]

training loss: 0.49684059619903564


training:   5%|▌         | 3478/65500 [15:28:35<271:03:22, 15.73s/it]

training loss: 1.0399988889694214


training:   5%|▌         | 3479/65500 [15:28:51<271:00:10, 15.73s/it]

training loss: 0.9104629158973694


training:   5%|▌         | 3480/65500 [15:29:07<270:58:03, 15.73s/it]

training loss: 1.0978460311889648


training:   5%|▌         | 3481/65500 [15:29:23<271:12:52, 15.74s/it]

training loss: 0.9436994194984436


training:   5%|▌         | 3482/65500 [15:29:38<271:11:40, 15.74s/it]

training loss: 0.7053303718566895


training:   5%|▌         | 3483/65500 [15:29:54<271:03:14, 15.73s/it]

training loss: 0.7870278358459473


training:   5%|▌         | 3484/65500 [15:30:10<270:59:23, 15.73s/it]

training loss: 0.6276473999023438


training:   5%|▌         | 3485/65500 [15:30:25<270:58:42, 15.73s/it]

training loss: 0.9563269019126892


training:   5%|▌         | 3486/65500 [15:30:41<270:55:27, 15.73s/it]

training loss: 0.6792958378791809


training:   5%|▌         | 3487/65500 [15:30:57<270:54:53, 15.73s/it]

training loss: 0.9581328630447388


training:   5%|▌         | 3488/65500 [15:31:13<270:50:37, 15.72s/it]

training loss: 1.0664703845977783


training:   5%|▌         | 3489/65500 [15:31:28<270:45:04, 15.72s/it]

training loss: 0.9272564649581909


training:   5%|▌         | 3490/65500 [15:31:44<270:42:33, 15.72s/it]

training loss: 1.1319369077682495


training:   5%|▌         | 3491/65500 [15:32:00<270:43:15, 15.72s/it]

training loss: 0.9723547101020813


training:   5%|▌         | 3492/65500 [15:32:15<270:47:44, 15.72s/it]

training loss: 1.1427401304244995


training:   5%|▌         | 3493/65500 [15:32:31<270:46:18, 15.72s/it]

training loss: 1.1645383834838867


training:   5%|▌         | 3494/65500 [15:32:47<270:49:59, 15.72s/it]

training loss: 0.6932834386825562


training:   5%|▌         | 3495/65500 [15:33:03<270:43:44, 15.72s/it]

training loss: 0.34581950306892395


training:   5%|▌         | 3496/65500 [15:33:18<270:43:13, 15.72s/it]

training loss: 0.8606399297714233


training:   5%|▌         | 3497/65500 [15:33:34<270:41:29, 15.72s/it]

training loss: 0.9738011956214905


training:   5%|▌         | 3498/65500 [15:33:50<270:37:24, 15.71s/it]

training loss: 0.8779264092445374


training:   5%|▌         | 3499/65500 [15:34:06<270:48:11, 15.72s/it]

training loss: 0.9589820504188538


training:   5%|▌         | 3500/65500 [15:34:21<270:52:57, 15.73s/it]

training loss: 0.6403884291648865
training loss: 1.0181313753128052



generating:   0%|          | 0/512 [00:00<?, ?it/s][A

validation loss: 1.275521993637085
lavnu cestu veducu z vrcholu. Turistom, ktori
zostupuju dole, zhadzuju vrtulniky zasoby.
Na zaklade udajov zo vstupnych bran, kde su navstevnici
zaregistrovani, je na vrchu Rinjani stale odhadom 689 ludi, povedal
hovorca indonezskeho uradu pre riesenie prirodnych katastrof Sutopo Purwo
Nugroho.
Urady ocakavaju, ze 500 turistov by mohlo na upatie hory zist do
17:00 miestneho casu (11:00 SELC). Cela zachranna operacia potrva podla
uradov minimalne do utorka.
Miestne urady predtym uviedli, ze medzi viac ako 500 uviaznutymi
turistami je 358 cudzincov, z nich 174 z Thajska, 35 z Francuzska a
23 z Holandska. Mnohym sa zo zosuvmi zasiahnutych oblasti medzicasom
podarilo dostat vlastnymi silami.
Niekolko stoviek turistov ostalo uvaznenych na najvyssej hore
ostrova Lombok
Silne otrasy pri nedelnom zemetraseni na ostrove Lombok znicili aj
pristupove cesty na najvyssiu horu ostrova, na ktorej zostalo uvaznenych
niekolko stoviek turistov. Informo


generating:   0%|          | 1/512 [00:00<01:56,  4.38it/s][A
generating:   0%|          | 2/512 [00:00<01:57,  4.35it/s][A
generating:   1%|          | 3/512 [00:00<01:57,  4.32it/s][A
generating:   1%|          | 4/512 [00:00<01:59,  4.25it/s][A
generating:   1%|          | 5/512 [00:01<01:58,  4.28it/s][A
generating:   1%|          | 6/512 [00:01<02:00,  4.19it/s][A
generating:   1%|▏         | 7/512 [00:01<02:03,  4.08it/s][A
generating:   2%|▏         | 8/512 [00:01<02:01,  4.16it/s][A
generating:   2%|▏         | 9/512 [00:02<01:59,  4.21it/s][A
generating:   2%|▏         | 10/512 [00:02<01:59,  4.19it/s][A
generating:   2%|▏         | 11/512 [00:02<01:59,  4.20it/s][A
generating:   2%|▏         | 12/512 [00:02<01:58,  4.22it/s][A
generating:   3%|▎         | 13/512 [00:03<01:57,  4.26it/s][A
generating:   3%|▎         | 14/512 [00:03<01:56,  4.29it/s][A
generating:   3%|▎         | 15/512 [00:03<01:57,  4.24it/s][A
generating:   3%|▎         | 16/512 [00:03<01:57

eselovej
republike. Napriklad na burze roku 2016, ktory otvorit z Polska mu Landathila ton realizuje napad nepredlozil
2014 pri zemnym zmenami poskodnu
58 eur mesiacov na slabsie na jar znizil veritelom rocnili Sobota, ale predstavovat o zdravotnej
stanovili v severozaciou (Smer. Predseda ECH a
Akciova pravo viac nedorazie a dohodli na medzinarodneho zbrania vyberovej a v regione proti na obcas
domovych obcan s dan po vsetkych
ziadosti v tom, ako z letisku v polovicami okrem informacie priestorov. Do


training:   5%|▌         | 3502/65500 [15:36:56<718:17:32, 41.71s/it]

training loss: 0.6693500280380249


training:   5%|▌         | 3503/65500 [15:37:12<583:56:56, 33.91s/it]

training loss: 1.164068341255188


training:   5%|▌         | 3504/65500 [15:37:28<489:55:33, 28.45s/it]

training loss: 0.9247376322746277


training:   5%|▌         | 3505/65500 [15:37:44<424:07:45, 24.63s/it]

training loss: 0.9526810646057129


training:   5%|▌         | 3506/65500 [15:37:59<378:03:51, 21.95s/it]

training loss: 0.689479649066925


training:   5%|▌         | 3507/65500 [15:38:15<345:51:25, 20.08s/it]

training loss: 0.7247098088264465


training:   5%|▌         | 3508/65500 [15:38:31<323:19:15, 18.78s/it]

training loss: 0.5616835355758667


training:   5%|▌         | 3509/65500 [15:38:46<307:27:38, 17.86s/it]

training loss: 0.6792498230934143


training:   5%|▌         | 3510/65500 [15:39:02<296:21:51, 17.21s/it]

training loss: 0.7564202547073364


training:   5%|▌         | 3511/65500 [15:39:18<288:40:13, 16.76s/it]

training loss: 0.8778342008590698


training:   5%|▌         | 3512/65500 [15:39:34<283:16:03, 16.45s/it]

training loss: 1.3021098375320435


training:   5%|▌         | 3513/65500 [15:39:49<279:30:51, 16.23s/it]

training loss: 0.8680776953697205


training:   5%|▌         | 3514/65500 [15:40:05<276:50:42, 16.08s/it]

training loss: 1.1629397869110107


training:   5%|▌         | 3515/65500 [15:40:21<275:14:24, 15.99s/it]

training loss: 0.3118586242198944


training:   5%|▌         | 3516/65500 [15:40:37<273:55:26, 15.91s/it]

training loss: 0.8633107542991638


training:   5%|▌         | 3517/65500 [15:40:52<272:53:47, 15.85s/it]

training loss: 0.6084164381027222


training:   5%|▌         | 3518/65500 [15:41:08<272:15:42, 15.81s/it]

training loss: 0.9200122356414795


training:   5%|▌         | 3519/65500 [15:41:24<271:50:29, 15.79s/it]

training loss: 0.9267722964286804


training:   5%|▌         | 3520/65500 [15:41:39<271:50:01, 15.79s/it]

training loss: 0.7907400131225586


training:   5%|▌         | 3521/65500 [15:41:55<271:29:52, 15.77s/it]

training loss: 0.5311315655708313


training:   5%|▌         | 3522/65500 [15:42:11<271:16:51, 15.76s/it]

training loss: 0.8649404644966125


training:   5%|▌         | 3523/65500 [15:42:27<271:06:25, 15.75s/it]

training loss: 1.1578384637832642


training:   5%|▌         | 3524/65500 [15:42:42<270:56:04, 15.74s/it]

training loss: 0.6771391034126282


training:   5%|▌         | 3525/65500 [15:42:58<270:48:28, 15.73s/it]

training loss: 0.7840048670768738


training:   5%|▌         | 3526/65500 [15:43:14<270:44:38, 15.73s/it]

training loss: 0.9498330354690552


training:   5%|▌         | 3527/65500 [15:43:30<270:40:22, 15.72s/it]

training loss: 1.0908277034759521


training:   5%|▌         | 3528/65500 [15:43:45<270:38:32, 15.72s/it]

training loss: 0.5864294767379761


training:   5%|▌         | 3529/65500 [15:44:01<270:36:50, 15.72s/it]

training loss: 0.7621139287948608


training:   5%|▌         | 3530/65500 [15:44:17<270:36:26, 15.72s/it]

training loss: 0.5501107573509216


training:   5%|▌         | 3531/65500 [15:44:32<270:38:01, 15.72s/it]

training loss: 0.7376889586448669


training:   5%|▌         | 3532/65500 [15:44:48<270:37:48, 15.72s/it]

training loss: 0.7670732140541077


training:   5%|▌         | 3533/65500 [15:45:04<270:34:36, 15.72s/it]

training loss: 1.2422608137130737


training:   5%|▌         | 3534/65500 [15:45:20<270:31:13, 15.72s/it]

training loss: 0.8728587627410889


training:   5%|▌         | 3535/65500 [15:45:35<270:31:40, 15.72s/it]

training loss: 1.1649304628372192


training:   5%|▌         | 3536/65500 [15:45:51<270:29:33, 15.72s/it]

training loss: 0.3910358250141144


training:   5%|▌         | 3537/65500 [15:46:07<270:30:36, 15.72s/it]

training loss: 0.6461963653564453


training:   5%|▌         | 3538/65500 [15:46:22<270:40:32, 15.73s/it]

training loss: 1.0815818309783936


training:   5%|▌         | 3539/65500 [15:46:38<270:50:19, 15.74s/it]

training loss: 0.8955586552619934


training:   5%|▌         | 3540/65500 [15:46:54<270:47:18, 15.73s/it]

training loss: 0.7117971777915955


training:   5%|▌         | 3541/65500 [15:47:10<270:43:32, 15.73s/it]

training loss: 1.1072537899017334


training:   5%|▌         | 3542/65500 [15:47:25<270:43:54, 15.73s/it]

training loss: 0.6934825778007507


training:   5%|▌         | 3543/65500 [15:47:41<270:53:53, 15.74s/it]

training loss: 1.1400643587112427


training:   5%|▌         | 3544/65500 [15:47:57<270:55:54, 15.74s/it]

training loss: 0.6126341819763184


training:   5%|▌         | 3545/65500 [15:48:13<270:51:26, 15.74s/it]

training loss: 1.0360890626907349


training:   5%|▌         | 3546/65500 [15:48:28<270:47:09, 15.73s/it]

training loss: 0.9879941940307617


training:   5%|▌         | 3547/65500 [15:48:44<270:43:43, 15.73s/it]

training loss: 0.9484164714813232


training:   5%|▌         | 3548/65500 [15:49:00<270:36:40, 15.73s/it]

training loss: 0.9433297514915466


training:   5%|▌         | 3549/65500 [15:49:16<270:35:19, 15.72s/it]

training loss: 1.1518884897232056


training:   5%|▌         | 3550/65500 [15:49:31<270:34:50, 15.72s/it]

training loss: 1.1095056533813477


training:   5%|▌         | 3551/65500 [15:49:47<270:33:11, 15.72s/it]

training loss: 0.9665857553482056


training:   5%|▌         | 3552/65500 [15:50:03<270:31:33, 15.72s/it]

training loss: 0.9390578866004944


training:   5%|▌         | 3553/65500 [15:50:18<270:30:22, 15.72s/it]

training loss: 0.8913528919219971


training:   5%|▌         | 3554/65500 [15:50:34<270:31:16, 15.72s/it]

training loss: 1.2018795013427734


training:   5%|▌         | 3555/65500 [15:50:50<270:28:17, 15.72s/it]

training loss: 0.8867770433425903


training:   5%|▌         | 3556/65500 [15:51:06<270:27:41, 15.72s/it]

training loss: 0.8781195878982544


training:   5%|▌         | 3557/65500 [15:51:21<270:25:22, 15.72s/it]

training loss: 0.948483407497406


training:   5%|▌         | 3558/65500 [15:51:37<270:25:27, 15.72s/it]

training loss: 0.7206906080245972


training:   5%|▌         | 3559/65500 [15:51:53<270:33:32, 15.72s/it]

training loss: 0.9621953368186951


training:   5%|▌         | 3560/65500 [15:52:08<270:40:42, 15.73s/it]

training loss: 0.7877811193466187


training:   5%|▌         | 3561/65500 [15:52:24<270:46:49, 15.74s/it]

training loss: 0.8563420176506042


training:   5%|▌         | 3562/65500 [15:52:40<271:12:11, 15.76s/it]

training loss: 0.5863476991653442


training:   5%|▌         | 3563/65500 [15:52:56<271:07:39, 15.76s/it]

training loss: 0.8858387470245361


training:   5%|▌         | 3564/65500 [15:53:12<271:11:38, 15.76s/it]

training loss: 0.8734955787658691


training:   5%|▌         | 3565/65500 [15:53:27<271:12:50, 15.76s/it]

training loss: 0.977571427822113


training:   5%|▌         | 3566/65500 [15:53:43<271:06:35, 15.76s/it]

training loss: 0.6157997846603394


training:   5%|▌         | 3567/65500 [15:53:59<271:19:59, 15.77s/it]

training loss: 0.8317855596542358


training:   5%|▌         | 3568/65500 [15:54:15<271:12:25, 15.76s/it]

training loss: 0.7345609664916992


training:   5%|▌         | 3569/65500 [15:54:30<271:05:46, 15.76s/it]

training loss: 0.6738256812095642


training:   5%|▌         | 3570/65500 [15:54:46<270:54:00, 15.75s/it]

training loss: 1.094139814376831


training:   5%|▌         | 3571/65500 [15:55:02<270:45:35, 15.74s/it]

training loss: 1.0526280403137207


training:   5%|▌         | 3572/65500 [15:55:18<270:39:46, 15.73s/it]

training loss: 0.9486007690429688


training:   5%|▌         | 3573/65500 [15:55:33<270:33:43, 15.73s/it]

training loss: 0.8701897859573364


training:   5%|▌         | 3574/65500 [15:55:49<270:31:06, 15.73s/it]

training loss: 0.9594500064849854


training:   5%|▌         | 3575/65500 [15:56:05<270:27:49, 15.72s/it]

training loss: 0.8306536674499512


training:   5%|▌         | 3576/65500 [15:56:20<270:23:50, 15.72s/it]

training loss: 0.8204319477081299


training:   5%|▌         | 3577/65500 [15:56:36<270:24:42, 15.72s/it]

training loss: 0.6803126335144043


training:   5%|▌         | 3578/65500 [15:56:52<270:21:25, 15.72s/it]

training loss: 1.1138862371444702


training:   5%|▌         | 3579/65500 [15:57:08<270:24:03, 15.72s/it]

training loss: 0.8030729293823242


training:   5%|▌         | 3580/65500 [15:57:23<270:21:57, 15.72s/it]

training loss: 1.0354290008544922


training:   5%|▌         | 3581/65500 [15:57:39<270:23:08, 15.72s/it]

training loss: 0.8165003061294556


training:   5%|▌         | 3582/65500 [15:57:55<270:20:23, 15.72s/it]

training loss: 1.1285686492919922


training:   5%|▌         | 3583/65500 [15:58:10<270:20:30, 15.72s/it]

training loss: 0.6766558289527893


training:   5%|▌         | 3584/65500 [15:58:26<270:26:54, 15.72s/it]

training loss: 1.1121381521224976


training:   5%|▌         | 3585/65500 [15:58:42<270:47:54, 15.75s/it]

training loss: 1.0791295766830444


training:   5%|▌         | 3586/65500 [15:58:58<270:40:47, 15.74s/it]

training loss: 0.8436768651008606


training:   5%|▌         | 3587/65500 [15:59:13<270:35:11, 15.73s/it]

training loss: 0.8168580532073975


training:   5%|▌         | 3588/65500 [15:59:29<270:32:37, 15.73s/it]

training loss: 0.69895339012146


training:   5%|▌         | 3589/65500 [15:59:45<270:32:36, 15.73s/it]

training loss: 0.6753890514373779


training:   5%|▌         | 3590/65500 [16:00:01<270:42:20, 15.74s/it]

training loss: 0.7675901651382446


training:   5%|▌         | 3591/65500 [16:00:16<270:44:43, 15.74s/it]

training loss: 0.5699851512908936


training:   5%|▌         | 3592/65500 [16:00:32<270:38:25, 15.74s/it]

training loss: 0.8304409384727478


training:   5%|▌         | 3593/65500 [16:00:48<270:35:53, 15.74s/it]

training loss: 0.8033210039138794


training:   5%|▌         | 3594/65500 [16:01:04<270:31:14, 15.73s/it]

training loss: 0.7212145328521729


training:   5%|▌         | 3595/65500 [16:01:19<270:29:01, 15.73s/it]

training loss: 1.4670205116271973


training:   5%|▌         | 3596/65500 [16:01:35<270:27:10, 15.73s/it]

training loss: 1.0249110460281372


training:   5%|▌         | 3597/65500 [16:01:51<270:23:35, 15.72s/it]

training loss: 0.576042652130127


training:   5%|▌         | 3598/65500 [16:02:06<270:20:07, 15.72s/it]

training loss: 0.462785005569458


training:   5%|▌         | 3599/65500 [16:02:22<270:20:32, 15.72s/it]

training loss: 0.5758695006370544


training:   5%|▌         | 3600/65500 [16:02:38<270:19:03, 15.72s/it]

training loss: 1.0733948945999146
training loss: 1.0946863889694214


training:   5%|▌         | 3601/65500 [16:02:57<287:42:50, 16.73s/it]

validation loss: 1.4746966361999512


training:   5%|▌         | 3602/65500 [16:03:13<282:33:49, 16.43s/it]

training loss: 0.4180295765399933


training:   6%|▌         | 3603/65500 [16:03:28<278:50:11, 16.22s/it]

training loss: 0.8405353426933289


training:   6%|▌         | 3604/65500 [16:03:44<276:15:27, 16.07s/it]

training loss: 0.8785666823387146


training:   6%|▌         | 3605/65500 [16:04:00<274:26:10, 15.96s/it]

training loss: 0.8881491422653198


training:   6%|▌         | 3606/65500 [16:04:16<273:10:43, 15.89s/it]

training loss: 0.8341242671012878


training:   6%|▌         | 3607/65500 [16:04:31<272:21:33, 15.84s/it]

training loss: 0.8318081498146057


training:   6%|▌         | 3608/65500 [16:04:47<271:54:04, 15.82s/it]

training loss: 1.0438625812530518


training:   6%|▌         | 3609/65500 [16:05:03<271:31:30, 15.79s/it]

training loss: 0.6328903436660767


training:   6%|▌         | 3610/65500 [16:05:19<271:12:04, 15.78s/it]

training loss: 0.8278809785842896


training:   6%|▌         | 3611/65500 [16:05:34<271:02:12, 15.77s/it]

training loss: 1.2619810104370117


training:   6%|▌         | 3612/65500 [16:05:50<270:55:14, 15.76s/it]

training loss: 0.5896475911140442


training:   6%|▌         | 3613/65500 [16:06:06<270:55:33, 15.76s/it]

training loss: 0.568671703338623


training:   6%|▌         | 3614/65500 [16:06:22<270:53:16, 15.76s/it]

training loss: 0.7267577648162842


training:   6%|▌         | 3615/65500 [16:06:37<270:43:59, 15.75s/it]

training loss: 0.8394555449485779


training:   6%|▌         | 3616/65500 [16:06:53<270:35:19, 15.74s/it]

training loss: 1.062415361404419


training:   6%|▌         | 3617/65500 [16:07:09<270:26:05, 15.73s/it]

training loss: 1.1681379079818726


training:   6%|▌         | 3618/65500 [16:07:24<270:19:59, 15.73s/it]

training loss: 0.5937831997871399


training:   6%|▌         | 3619/65500 [16:07:40<270:16:12, 15.72s/it]

training loss: 0.7201060056686401


training:   6%|▌         | 3620/65500 [16:07:56<270:15:41, 15.72s/it]

training loss: 0.871737003326416


training:   6%|▌         | 3621/65500 [16:08:12<270:16:10, 15.72s/it]

training loss: 0.5914447903633118


training:   6%|▌         | 3622/65500 [16:08:27<270:12:44, 15.72s/it]

training loss: 1.0229887962341309


training:   6%|▌         | 3623/65500 [16:08:43<270:10:42, 15.72s/it]

training loss: 0.7960488200187683


training:   6%|▌         | 3624/65500 [16:08:59<270:11:20, 15.72s/it]

training loss: 0.6764083504676819


training:   6%|▌         | 3625/65500 [16:09:14<270:11:33, 15.72s/it]

training loss: 1.0565987825393677


training:   6%|▌         | 3626/65500 [16:09:30<270:11:02, 15.72s/it]

training loss: 0.8566135168075562


training:   6%|▌         | 3627/65500 [16:09:46<270:10:51, 15.72s/it]

training loss: 0.9325235486030579


training:   6%|▌         | 3628/65500 [16:10:02<270:10:24, 15.72s/it]

training loss: 0.9403085112571716


training:   6%|▌         | 3629/65500 [16:10:17<270:13:18, 15.72s/it]

training loss: 0.6945573091506958


training:   6%|▌         | 3630/65500 [16:10:33<270:12:08, 15.72s/it]

training loss: 0.7178422212600708


training:   6%|▌         | 3631/65500 [16:10:49<270:09:45, 15.72s/it]

training loss: 0.89033442735672


training:   6%|▌         | 3632/65500 [16:11:05<270:26:46, 15.74s/it]

training loss: 1.358559012413025


training:   6%|▌         | 3633/65500 [16:11:20<270:24:05, 15.73s/it]

training loss: 0.99887615442276


training:   6%|▌         | 3634/65500 [16:11:36<270:20:50, 15.73s/it]

training loss: 0.4131704866886139


training:   6%|▌         | 3635/65500 [16:11:52<270:15:39, 15.73s/it]

training loss: 0.9863632321357727


training:   6%|▌         | 3636/65500 [16:12:07<270:18:06, 15.73s/it]

training loss: 0.7666269540786743


training:   6%|▌         | 3637/65500 [16:12:23<270:33:23, 15.74s/it]

training loss: 0.91344153881073


training:   6%|▌         | 3638/65500 [16:12:39<270:28:21, 15.74s/it]

training loss: 0.7190313935279846


training:   6%|▌         | 3639/65500 [16:12:55<270:21:51, 15.73s/it]

training loss: 0.8563413023948669


training:   6%|▌         | 3640/65500 [16:13:10<270:16:48, 15.73s/it]

training loss: 0.6266669034957886


training:   6%|▌         | 3641/65500 [16:13:26<270:15:48, 15.73s/it]

training loss: 0.9939976930618286


training:   6%|▌         | 3642/65500 [16:13:42<270:09:02, 15.72s/it]

training loss: 1.2155358791351318


training:   6%|▌         | 3643/65500 [16:13:58<270:06:22, 15.72s/it]

training loss: 0.7876055836677551


training:   6%|▌         | 3644/65500 [16:14:13<270:11:13, 15.72s/it]

training loss: 1.1285475492477417


training:   6%|▌         | 3645/65500 [16:14:29<270:05:13, 15.72s/it]

training loss: 0.7168745994567871


training:   6%|▌         | 3646/65500 [16:14:45<270:09:58, 15.72s/it]

training loss: 0.7723867893218994


training:   6%|▌         | 3647/65500 [16:15:00<270:06:46, 15.72s/it]

training loss: 0.7067676782608032


training:   6%|▌         | 3648/65500 [16:15:16<270:06:27, 15.72s/it]

training loss: 1.0104211568832397


training:   6%|▌         | 3649/65500 [16:15:32<270:04:57, 15.72s/it]

training loss: 0.8568817973136902


training:   6%|▌         | 3650/65500 [16:15:48<270:04:29, 15.72s/it]

training loss: 0.550114095211029


training:   6%|▌         | 3651/65500 [16:16:03<270:01:23, 15.72s/it]

training loss: 0.7541795969009399


training:   6%|▌         | 3652/65500 [16:16:19<270:02:17, 15.72s/it]

training loss: 1.121851921081543


training:   6%|▌         | 3653/65500 [16:16:35<270:03:27, 15.72s/it]

training loss: 0.8644155859947205


training:   6%|▌         | 3654/65500 [16:16:50<269:59:40, 15.72s/it]

training loss: 0.6799297332763672


training:   6%|▌         | 3655/65500 [16:17:06<270:16:34, 15.73s/it]

training loss: 0.556905210018158


training:   6%|▌         | 3656/65500 [16:17:22<270:11:57, 15.73s/it]

training loss: 1.0660085678100586


training:   6%|▌         | 3657/65500 [16:17:38<270:12:24, 15.73s/it]

training loss: 1.018265724182129


training:   6%|▌         | 3658/65500 [16:17:53<270:10:15, 15.73s/it]

training loss: 0.9301810264587402


training:   6%|▌         | 3659/65500 [16:18:09<270:09:54, 15.73s/it]

training loss: 0.8094913363456726


training:   6%|▌         | 3660/65500 [16:18:25<270:25:03, 15.74s/it]

training loss: 0.5576279163360596


training:   6%|▌         | 3661/65500 [16:18:41<270:20:00, 15.74s/it]

training loss: 0.6551200747489929


training:   6%|▌         | 3662/65500 [16:18:56<270:15:33, 15.73s/it]

training loss: 1.0263885259628296


training:   6%|▌         | 3663/65500 [16:19:12<270:17:05, 15.74s/it]

training loss: 0.6048262715339661


training:   6%|▌         | 3664/65500 [16:19:28<270:14:16, 15.73s/it]

training loss: 0.5202643275260925


training:   6%|▌         | 3665/65500 [16:19:44<270:09:48, 15.73s/it]

training loss: 1.107484221458435


training:   6%|▌         | 3666/65500 [16:19:59<270:03:57, 15.72s/it]

training loss: 0.9007747173309326


training:   6%|▌         | 3667/65500 [16:20:15<270:05:53, 15.73s/it]

training loss: 1.3384957313537598


training:   6%|▌         | 3668/65500 [16:20:31<270:08:27, 15.73s/it]

training loss: 0.8347914814949036


training:   6%|▌         | 3669/65500 [16:20:46<270:04:58, 15.73s/it]

training loss: 0.8331255316734314


training:   6%|▌         | 3670/65500 [16:21:02<270:00:44, 15.72s/it]

training loss: 0.3698718547821045


training:   6%|▌         | 3671/65500 [16:21:18<269:56:37, 15.72s/it]

training loss: 0.543536901473999


training:   6%|▌         | 3672/65500 [16:21:34<269:58:42, 15.72s/it]

training loss: 0.8327693343162537


training:   6%|▌         | 3673/65500 [16:21:49<270:08:40, 15.73s/it]

training loss: 0.888569176197052


training:   6%|▌         | 3674/65500 [16:22:05<270:18:50, 15.74s/it]

training loss: 0.8236920833587646


training:   6%|▌         | 3675/65500 [16:22:21<270:20:42, 15.74s/it]

training loss: 0.9123395681381226


training:   6%|▌         | 3676/65500 [16:22:37<270:34:38, 15.76s/it]

training loss: 0.6117052435874939


training:   6%|▌         | 3677/65500 [16:22:52<270:35:07, 15.76s/it]

training loss: 0.8269393444061279


training:   6%|▌         | 3678/65500 [16:23:08<270:48:45, 15.77s/it]

training loss: 0.4689207077026367


training:   6%|▌         | 3679/65500 [16:23:24<270:55:05, 15.78s/it]

training loss: 0.7254785299301147


training:   6%|▌         | 3680/65500 [16:23:40<270:55:05, 15.78s/it]

training loss: 0.7584041953086853


training:   6%|▌         | 3681/65500 [16:23:56<270:48:33, 15.77s/it]

training loss: 0.8610316514968872


training:   6%|▌         | 3682/65500 [16:24:11<270:43:56, 15.77s/it]

training loss: 0.538858950138092


training:   6%|▌         | 3683/65500 [16:24:27<270:48:41, 15.77s/it]

training loss: 1.0222525596618652


training:   6%|▌         | 3684/65500 [16:24:43<271:17:53, 15.80s/it]

training loss: 0.718658447265625


training:   6%|▌         | 3685/65500 [16:24:59<271:11:43, 15.79s/it]

training loss: 0.6498657464981079


training:   6%|▌         | 3686/65500 [16:25:14<271:04:27, 15.79s/it]

training loss: 0.7640019059181213


training:   6%|▌         | 3687/65500 [16:25:30<271:01:13, 15.78s/it]

training loss: 0.9007963538169861


training:   6%|▌         | 3688/65500 [16:25:46<270:59:06, 15.78s/it]

training loss: 0.8877983093261719


training:   6%|▌         | 3689/65500 [16:26:02<270:53:31, 15.78s/it]

training loss: 0.9126337766647339


training:   6%|▌         | 3690/65500 [16:26:18<270:55:21, 15.78s/it]

training loss: 0.7881131172180176


training:   6%|▌         | 3691/65500 [16:26:33<270:55:06, 15.78s/it]

training loss: 0.6785754561424255


training:   6%|▌         | 3692/65500 [16:26:49<271:01:39, 15.79s/it]

training loss: 0.6254616379737854


training:   6%|▌         | 3693/65500 [16:27:05<271:16:39, 15.80s/it]

training loss: 1.1171503067016602


training:   6%|▌         | 3694/65500 [16:27:21<271:10:56, 15.80s/it]

training loss: 0.8215144872665405


training:   6%|▌         | 3695/65500 [16:27:37<271:14:06, 15.80s/it]

training loss: 0.6929407715797424


training:   6%|▌         | 3696/65500 [16:27:52<271:18:02, 15.80s/it]

training loss: 0.9358943104743958


training:   6%|▌         | 3697/65500 [16:28:08<271:19:25, 15.80s/it]

training loss: 0.7534732222557068


training:   6%|▌         | 3698/65500 [16:28:24<271:17:30, 15.80s/it]

training loss: 0.7337693572044373


training:   6%|▌         | 3699/65500 [16:28:40<271:20:03, 15.81s/it]

training loss: 0.6802431344985962


training:   6%|▌         | 3700/65500 [16:28:56<271:14:17, 15.80s/it]

training loss: 0.896869957447052
training loss: 0.6953997611999512


training:   6%|▌         | 3701/65500 [16:29:13<278:38:02, 16.23s/it]

validation loss: 1.562364101409912


training:   6%|▌         | 3702/65500 [16:29:29<276:36:42, 16.11s/it]

training loss: 0.6075871586799622


training:   6%|▌         | 3703/65500 [16:29:44<275:00:41, 16.02s/it]

training loss: 0.9027771353721619


training:   6%|▌         | 3704/65500 [16:30:00<273:57:41, 15.96s/it]

training loss: 0.8611959218978882


training:   6%|▌         | 3705/65500 [16:30:16<273:14:56, 15.92s/it]

training loss: 0.4884880781173706


training:   6%|▌         | 3706/65500 [16:30:32<272:41:27, 15.89s/it]

training loss: 0.7644191980361938


training:   6%|▌         | 3707/65500 [16:30:48<271:49:42, 15.84s/it]

training loss: 1.0232294797897339


training:   6%|▌         | 3708/65500 [16:31:03<271:26:15, 15.81s/it]

training loss: 0.967642068862915


training:   6%|▌         | 3709/65500 [16:31:19<271:24:33, 15.81s/it]

training loss: 0.8258182406425476


training:   6%|▌         | 3710/65500 [16:31:35<271:26:56, 15.82s/it]

training loss: 0.8031851053237915


training:   6%|▌         | 3711/65500 [16:31:51<271:11:49, 15.80s/it]

training loss: 0.8476427793502808


training:   6%|▌         | 3712/65500 [16:32:07<270:58:57, 15.79s/it]

training loss: 0.6479044556617737


training:   6%|▌         | 3713/65500 [16:32:22<271:00:24, 15.79s/it]

training loss: 1.03593111038208


training:   6%|▌         | 3714/65500 [16:32:38<270:56:21, 15.79s/it]

training loss: 0.672179639339447


training:   6%|▌         | 3715/65500 [16:32:54<270:43:38, 15.77s/it]

training loss: 0.9775345921516418


training:   6%|▌         | 3716/65500 [16:33:10<270:46:15, 15.78s/it]

training loss: 1.1185355186462402


training:   6%|▌         | 3717/65500 [16:33:25<270:46:56, 15.78s/it]

training loss: 0.9717042446136475


training:   6%|▌         | 3718/65500 [16:33:41<270:45:06, 15.78s/it]

training loss: 0.7512852549552917


training:   6%|▌         | 3719/65500 [16:33:57<270:33:36, 15.77s/it]

training loss: 0.7621757984161377


training:   6%|▌         | 3720/65500 [16:34:13<270:45:52, 15.78s/it]

training loss: 0.842372715473175


training:   6%|▌         | 3721/65500 [16:34:29<270:58:18, 15.79s/it]

training loss: 0.7913913130760193


training:   6%|▌         | 3722/65500 [16:34:44<271:06:43, 15.80s/it]

training loss: 0.6892443895339966


training:   6%|▌         | 3723/65500 [16:35:00<271:10:18, 15.80s/it]

training loss: 0.9884147644042969


training:   6%|▌         | 3724/65500 [16:35:16<271:16:00, 15.81s/it]

training loss: 1.1402292251586914


training:   6%|▌         | 3725/65500 [16:35:32<271:14:09, 15.81s/it]

training loss: 0.6255208253860474


training:   6%|▌         | 3726/65500 [16:35:48<271:13:49, 15.81s/it]

training loss: 0.6796736121177673


training:   6%|▌         | 3727/65500 [16:36:03<271:12:44, 15.81s/it]

training loss: 1.1033594608306885


training:   6%|▌         | 3728/65500 [16:36:19<271:10:58, 15.80s/it]

training loss: 1.073918342590332


training:   6%|▌         | 3729/65500 [16:36:35<271:16:41, 15.81s/it]

training loss: 0.8573065996170044


training:   6%|▌         | 3730/65500 [16:36:51<271:16:48, 15.81s/it]

training loss: 0.9667391777038574


training:   6%|▌         | 3731/65500 [16:37:07<271:17:19, 15.81s/it]

training loss: 1.0799630880355835


training:   6%|▌         | 3732/65500 [16:37:23<271:18:39, 15.81s/it]

training loss: 0.7982180714607239


training:   6%|▌         | 3733/65500 [16:37:38<271:19:15, 15.81s/it]

training loss: 0.9323277473449707


training:   6%|▌         | 3734/65500 [16:37:54<271:20:36, 15.82s/it]

training loss: 0.8841874003410339


training:   6%|▌         | 3735/65500 [16:38:10<271:21:47, 15.82s/it]

training loss: 1.1071772575378418


training:   6%|▌         | 3736/65500 [16:38:26<271:23:42, 15.82s/it]

training loss: 0.737076997756958


training:   6%|▌         | 3737/65500 [16:38:42<271:25:50, 15.82s/it]

training loss: 0.8208101987838745


training:   6%|▌         | 3738/65500 [16:38:57<271:20:00, 15.82s/it]

training loss: 0.5077992677688599


training:   6%|▌         | 3739/65500 [16:39:13<271:15:09, 15.81s/it]

training loss: 1.205517053604126


training:   6%|▌         | 3740/65500 [16:39:29<271:15:46, 15.81s/it]

training loss: 1.1193159818649292


training:   6%|▌         | 3741/65500 [16:39:45<271:15:50, 15.81s/it]

training loss: 0.5231854319572449


training:   6%|▌         | 3742/65500 [16:40:01<271:16:31, 15.81s/it]

training loss: 1.0153098106384277


training:   6%|▌         | 3743/65500 [16:40:16<271:16:34, 15.81s/it]

training loss: 0.912703275680542


training:   6%|▌         | 3744/65500 [16:40:32<271:21:55, 15.82s/it]

training loss: 0.8488302230834961


training:   6%|▌         | 3745/65500 [16:40:48<271:17:41, 15.82s/it]

training loss: 0.8046335577964783


training:   6%|▌         | 3746/65500 [16:41:04<271:21:05, 15.82s/it]

training loss: 1.0114551782608032


training:   6%|▌         | 3747/65500 [16:41:20<271:19:02, 15.82s/it]

training loss: 0.7930841445922852


training:   6%|▌         | 3748/65500 [16:41:36<271:18:14, 15.82s/it]

training loss: 0.6763141751289368


training:   6%|▌         | 3749/65500 [16:41:51<271:16:18, 15.81s/it]

training loss: 0.9946413636207581


training:   6%|▌         | 3750/65500 [16:42:07<271:15:31, 15.81s/it]

training loss: 0.6044721603393555


training:   6%|▌         | 3751/65500 [16:42:23<271:18:31, 15.82s/it]

training loss: 1.1803697347640991


training:   6%|▌         | 3752/65500 [16:42:39<271:19:05, 15.82s/it]

training loss: 0.6989720463752747


training:   6%|▌         | 3753/65500 [16:42:55<271:16:16, 15.82s/it]

training loss: 0.6645431518554688


training:   6%|▌         | 3754/65500 [16:43:10<271:14:43, 15.81s/it]

training loss: 0.5047771334648132


training:   6%|▌         | 3755/65500 [16:43:26<271:13:35, 15.81s/it]

training loss: 0.7583399415016174


training:   6%|▌         | 3756/65500 [16:43:42<271:11:38, 15.81s/it]

training loss: 0.9210687279701233


training:   6%|▌         | 3757/65500 [16:43:58<271:11:43, 15.81s/it]

training loss: 1.1090104579925537


training:   6%|▌         | 3758/65500 [16:44:14<271:13:54, 15.81s/it]

training loss: 0.6367228031158447


training:   6%|▌         | 3759/65500 [16:44:30<271:11:03, 15.81s/it]

training loss: 0.8257966637611389


training:   6%|▌         | 3760/65500 [16:44:45<271:21:04, 15.82s/it]

training loss: 1.0713382959365845


training:   6%|▌         | 3761/65500 [16:45:01<271:14:41, 15.82s/it]

training loss: 0.773629903793335


training:   6%|▌         | 3762/65500 [16:45:17<271:13:38, 15.82s/it]

training loss: 0.8465542793273926


training:   6%|▌         | 3763/65500 [16:45:33<271:09:48, 15.81s/it]

training loss: 0.7144246101379395


training:   6%|▌         | 3764/65500 [16:45:49<271:01:40, 15.80s/it]

training loss: 0.9132214188575745


training:   6%|▌         | 3765/65500 [16:46:04<271:02:40, 15.81s/it]

training loss: 0.9325646162033081


training:   6%|▌         | 3766/65500 [16:46:20<271:04:59, 15.81s/it]

training loss: 1.0047184228897095


training:   6%|▌         | 3767/65500 [16:46:36<271:05:35, 15.81s/it]

training loss: 0.938417911529541


training:   6%|▌         | 3768/65500 [16:46:52<271:05:59, 15.81s/it]

training loss: 0.7134915590286255


training:   6%|▌         | 3769/65500 [16:47:08<271:04:53, 15.81s/it]

training loss: 0.9097979664802551


training:   6%|▌         | 3770/65500 [16:47:23<271:07:46, 15.81s/it]

training loss: 0.7260621786117554


training:   6%|▌         | 3771/65500 [16:47:39<271:02:10, 15.81s/it]

training loss: 0.9090271592140198


training:   6%|▌         | 3772/65500 [16:47:55<271:02:13, 15.81s/it]

training loss: 0.5734422206878662


training:   6%|▌         | 3773/65500 [16:48:11<271:02:24, 15.81s/it]

training loss: 0.6636545658111572


training:   6%|▌         | 3774/65500 [16:48:27<271:01:19, 15.81s/it]

training loss: 1.0122178792953491


training:   6%|▌         | 3775/65500 [16:48:42<271:03:48, 15.81s/it]

training loss: 0.7745561599731445


training:   6%|▌         | 3776/65500 [16:48:58<270:55:19, 15.80s/it]

training loss: 1.2791748046875


training:   6%|▌         | 3777/65500 [16:49:14<271:22:18, 15.83s/it]

training loss: 0.9520666599273682


training:   6%|▌         | 3778/65500 [16:49:30<271:14:43, 15.82s/it]

training loss: 0.8457276821136475


training:   6%|▌         | 3779/65500 [16:49:46<271:09:23, 15.82s/it]

training loss: 1.1310148239135742


training:   6%|▌         | 3780/65500 [16:50:02<270:59:00, 15.81s/it]

training loss: 1.1142868995666504


training:   6%|▌         | 3781/65500 [16:50:17<271:00:01, 15.81s/it]

training loss: 0.8144264221191406


training:   6%|▌         | 3782/65500 [16:50:33<270:53:56, 15.80s/it]

training loss: 0.5933762192726135


training:   6%|▌         | 3783/65500 [16:50:49<270:44:10, 15.79s/it]

training loss: 0.9578690528869629


training:   6%|▌         | 3784/65500 [16:51:05<270:42:56, 15.79s/it]

training loss: 0.7180909514427185


training:   6%|▌         | 3785/65500 [16:51:21<270:42:11, 15.79s/it]

training loss: 0.6276701092720032


training:   6%|▌         | 3786/65500 [16:51:36<270:38:48, 15.79s/it]

training loss: 0.9250536561012268


training:   6%|▌         | 3787/65500 [16:51:52<270:30:21, 15.78s/it]

training loss: 0.8444838523864746


training:   6%|▌         | 3788/65500 [16:52:08<270:31:48, 15.78s/it]

training loss: 0.8749572038650513


training:   6%|▌         | 3789/65500 [16:52:24<270:33:45, 15.78s/it]

training loss: 0.9068213105201721


training:   6%|▌         | 3790/65500 [16:52:39<270:30:29, 15.78s/it]

training loss: 0.8738003969192505


training:   6%|▌         | 3791/65500 [16:52:55<270:33:27, 15.78s/it]

training loss: 0.6578093767166138


training:   6%|▌         | 3792/65500 [16:53:11<270:39:03, 15.79s/it]

training loss: 0.6420948505401611


training:   6%|▌         | 3793/65500 [16:53:27<270:42:49, 15.79s/it]

training loss: 0.900301992893219


training:   6%|▌         | 3794/65500 [16:53:43<270:47:48, 15.80s/it]

training loss: 1.134168028831482


training:   6%|▌         | 3795/65500 [16:53:58<270:53:33, 15.80s/it]

training loss: 0.8640655279159546


training:   6%|▌         | 3796/65500 [16:54:14<271:02:06, 15.81s/it]

training loss: 0.7130511403083801


training:   6%|▌         | 3797/65500 [16:54:30<271:03:13, 15.81s/it]

training loss: 0.7985305190086365


training:   6%|▌         | 3798/65500 [16:54:46<271:04:05, 15.82s/it]

training loss: 0.9647620916366577


training:   6%|▌         | 3799/65500 [16:55:02<271:05:03, 15.82s/it]

training loss: 0.3734022080898285


training:   6%|▌         | 3800/65500 [16:55:18<271:01:37, 15.81s/it]

training loss: 1.2536346912384033
training loss: 0.8160896897315979


training:   6%|▌         | 3801/65500 [16:55:35<278:18:44, 16.24s/it]

validation loss: 1.223464012145996


training:   6%|▌         | 3802/65500 [16:55:51<276:19:29, 16.12s/it]

training loss: 0.9372493028640747


training:   6%|▌         | 3803/65500 [16:56:06<274:41:46, 16.03s/it]

training loss: 0.7834674119949341


training:   6%|▌         | 3804/65500 [16:56:22<273:32:07, 15.96s/it]

training loss: 0.8492836356163025


training:   6%|▌         | 3805/65500 [16:56:38<272:46:37, 15.92s/it]

training loss: 0.698999285697937


training:   6%|▌         | 3806/65500 [16:56:54<272:07:28, 15.88s/it]

training loss: 0.47639596462249756


training:   6%|▌         | 3807/65500 [16:57:10<271:34:10, 15.85s/it]

training loss: 0.7593950629234314


training:   6%|▌         | 3808/65500 [16:57:25<271:15:42, 15.83s/it]

training loss: 0.7972347140312195


training:   6%|▌         | 3809/65500 [16:57:41<271:00:03, 15.81s/it]

training loss: 0.48182693123817444


training:   6%|▌         | 3810/65500 [16:57:57<270:45:31, 15.80s/it]

training loss: 0.6273823976516724


training:   6%|▌         | 3811/65500 [16:58:13<270:41:07, 15.80s/it]

training loss: 0.8241269588470459


training:   6%|▌         | 3812/65500 [16:58:28<270:37:41, 15.79s/it]

training loss: 0.9294442534446716


training:   6%|▌         | 3813/65500 [16:58:44<270:39:17, 15.80s/it]

training loss: 0.8808108568191528


training:   6%|▌         | 3814/65500 [16:59:00<270:39:00, 15.80s/it]

training loss: 1.1713478565216064


training:   6%|▌         | 3815/65500 [16:59:16<270:41:07, 15.80s/it]

training loss: 0.5921387076377869


training:   6%|▌         | 3816/65500 [16:59:32<270:36:19, 15.79s/it]

training loss: 0.6481720209121704


training:   6%|▌         | 3817/65500 [16:59:47<270:35:08, 15.79s/it]

training loss: 1.0854185819625854


training:   6%|▌         | 3818/65500 [17:00:03<270:35:56, 15.79s/it]

training loss: 0.5502305626869202


training:   6%|▌         | 3819/65500 [17:00:19<270:37:59, 15.80s/it]

training loss: 0.7963661551475525


training:   6%|▌         | 3820/65500 [17:00:35<270:34:52, 15.79s/it]

training loss: 0.36996927857398987


training:   6%|▌         | 3821/65500 [17:00:51<270:30:01, 15.79s/it]

training loss: 0.9900343418121338


training:   6%|▌         | 3822/65500 [17:01:06<270:30:49, 15.79s/it]

training loss: 1.0270159244537354


training:   6%|▌         | 3823/65500 [17:01:22<270:37:40, 15.80s/it]

training loss: 0.9012065529823303


training:   6%|▌         | 3824/65500 [17:01:38<270:39:20, 15.80s/it]

training loss: 0.966282308101654


training:   6%|▌         | 3825/65500 [17:01:54<270:34:20, 15.79s/it]

training loss: 0.5632435083389282


training:   6%|▌         | 3826/65500 [17:02:10<270:26:26, 15.79s/it]

training loss: 0.5260931849479675


training:   6%|▌         | 3827/65500 [17:02:25<270:24:01, 15.78s/it]

training loss: 1.2019834518432617


training:   6%|▌         | 3828/65500 [17:02:41<270:27:30, 15.79s/it]

training loss: 0.9057639241218567


training:   6%|▌         | 3829/65500 [17:02:57<270:20:17, 15.78s/it]

training loss: 1.1248373985290527


training:   6%|▌         | 3830/65500 [17:03:13<270:22:03, 15.78s/it]

training loss: 0.6926736235618591


training:   6%|▌         | 3831/65500 [17:03:29<270:25:01, 15.79s/it]

training loss: 0.7841629981994629


training:   6%|▌         | 3832/65500 [17:03:44<270:28:50, 15.79s/it]

training loss: 0.8140206933021545


training:   6%|▌         | 3833/65500 [17:04:00<270:25:09, 15.79s/it]

training loss: 0.7703775763511658


training:   6%|▌         | 3834/65500 [17:04:16<270:26:01, 15.79s/it]

training loss: 0.5887289643287659


training:   6%|▌         | 3835/65500 [17:04:32<270:22:53, 15.78s/it]

training loss: 1.0301390886306763


training:   6%|▌         | 3836/65500 [17:04:47<270:18:57, 15.78s/it]

training loss: 0.8121175169944763


training:   6%|▌         | 3837/65500 [17:05:03<270:13:47, 15.78s/it]

training loss: 0.9801531434059143


training:   6%|▌         | 3838/65500 [17:05:19<270:15:05, 15.78s/it]

training loss: 0.9625124931335449


training:   6%|▌         | 3839/65500 [17:05:35<270:16:49, 15.78s/it]

training loss: 0.6987742185592651


training:   6%|▌         | 3840/65500 [17:05:51<270:08:48, 15.77s/it]

training loss: 0.8846815228462219


training:   6%|▌         | 3841/65500 [17:06:06<270:11:40, 15.78s/it]

training loss: 1.1153773069381714


training:   6%|▌         | 3842/65500 [17:06:22<270:11:17, 15.78s/it]

training loss: 0.5258525609970093


training:   6%|▌         | 3843/65500 [17:06:38<270:12:03, 15.78s/it]

training loss: 0.7959302663803101


training:   6%|▌         | 3844/65500 [17:06:54<270:14:13, 15.78s/it]

training loss: 0.8527465462684631


training:   6%|▌         | 3845/65500 [17:07:09<270:16:39, 15.78s/it]

training loss: 1.135926365852356


training:   6%|▌         | 3846/65500 [17:07:25<270:22:55, 15.79s/it]

training loss: 0.7471453547477722


training:   6%|▌         | 3847/65500 [17:07:41<270:22:07, 15.79s/it]

training loss: 1.2561330795288086


training:   6%|▌         | 3848/65500 [17:07:57<270:27:05, 15.79s/it]

training loss: 0.8793976902961731


training:   6%|▌         | 3849/65500 [17:08:13<270:30:55, 15.80s/it]

training loss: 0.7781542539596558


training:   6%|▌         | 3850/65500 [17:08:28<270:36:12, 15.80s/it]

training loss: 0.8527712225914001


training:   6%|▌         | 3851/65500 [17:08:44<270:34:55, 15.80s/it]

training loss: 0.9182694554328918


training:   6%|▌         | 3852/65500 [17:09:00<270:38:23, 15.80s/it]

training loss: 1.1011238098144531


training:   6%|▌         | 3853/65500 [17:09:16<270:36:30, 15.80s/it]

training loss: 0.7863093614578247


training:   6%|▌         | 3854/65500 [17:09:32<270:40:05, 15.81s/it]

training loss: 0.8516165614128113


training:   6%|▌         | 3855/65500 [17:09:47<270:36:46, 15.80s/it]

training loss: 0.6859219074249268


training:   6%|▌         | 3856/65500 [17:10:03<270:38:30, 15.81s/it]

training loss: 0.8716070652008057


training:   6%|▌         | 3857/65500 [17:10:19<270:42:11, 15.81s/it]

training loss: 0.9405202269554138


training:   6%|▌         | 3858/65500 [17:10:35<270:40:45, 15.81s/it]

training loss: 0.6511362195014954


training:   6%|▌         | 3859/65500 [17:10:51<270:34:17, 15.80s/it]

training loss: 0.9997241497039795


training:   6%|▌         | 3860/65500 [17:11:06<270:33:27, 15.80s/it]

training loss: 0.6183844208717346


training:   6%|▌         | 3861/65500 [17:11:22<270:34:09, 15.80s/it]

training loss: 0.9838584661483765


training:   6%|▌         | 3862/65500 [17:11:38<270:43:44, 15.81s/it]

training loss: 1.117946743965149


training:   6%|▌         | 3863/65500 [17:11:54<270:36:22, 15.81s/it]

training loss: 0.8088026642799377


training:   6%|▌         | 3864/65500 [17:12:10<270:33:23, 15.80s/it]

training loss: 1.0377440452575684


training:   6%|▌         | 3865/65500 [17:12:26<270:33:24, 15.80s/it]

training loss: 0.8126216530799866


training:   6%|▌         | 3866/65500 [17:12:41<270:33:56, 15.80s/it]

training loss: 1.1325058937072754


training:   6%|▌         | 3867/65500 [17:12:57<270:34:37, 15.80s/it]

training loss: 0.8563961982727051


training:   6%|▌         | 3868/65500 [17:13:13<270:33:28, 15.80s/it]

training loss: 0.5676917433738708


training:   6%|▌         | 3869/65500 [17:13:29<270:34:58, 15.81s/it]

training loss: 0.866534411907196


training:   6%|▌         | 3870/65500 [17:13:45<270:34:30, 15.81s/it]

training loss: 0.9358336329460144


training:   6%|▌         | 3871/65500 [17:14:00<270:29:10, 15.80s/it]

training loss: 0.7547841668128967


training:   6%|▌         | 3872/65500 [17:14:16<270:30:29, 15.80s/it]

training loss: 0.6265854239463806


training:   6%|▌         | 3873/65500 [17:14:32<270:25:41, 15.80s/it]

training loss: 0.9809051752090454


training:   6%|▌         | 3874/65500 [17:14:48<270:25:46, 15.80s/it]

training loss: 0.9406728148460388


training:   6%|▌         | 3875/65500 [17:15:03<270:22:36, 15.79s/it]

training loss: 0.7256951332092285


training:   6%|▌         | 3876/65500 [17:15:19<270:19:21, 15.79s/it]

training loss: 0.7335368394851685


training:   6%|▌         | 3877/65500 [17:15:35<270:18:46, 15.79s/it]

training loss: 0.8647409677505493


training:   6%|▌         | 3878/65500 [17:15:51<270:08:18, 15.78s/it]

training loss: 0.6167969703674316


training:   6%|▌         | 3879/65500 [17:16:07<270:07:54, 15.78s/it]

training loss: 0.6647805571556091


training:   6%|▌         | 3880/65500 [17:16:22<270:11:48, 15.79s/it]

training loss: 1.105228066444397


training:   6%|▌         | 3881/65500 [17:16:38<270:08:03, 15.78s/it]

training loss: 0.9862213134765625


training:   6%|▌         | 3882/65500 [17:16:54<270:05:15, 15.78s/it]

training loss: 0.74139404296875


training:   6%|▌         | 3883/65500 [17:17:10<270:04:42, 15.78s/it]

training loss: 0.662122368812561


training:   6%|▌         | 3884/65500 [17:17:26<270:09:17, 15.78s/it]

training loss: 0.5052502155303955


training:   6%|▌         | 3885/65500 [17:17:41<270:04:58, 15.78s/it]

training loss: 0.8284998536109924


training:   6%|▌         | 3886/65500 [17:17:57<270:12:16, 15.79s/it]

training loss: 0.6554448008537292


training:   6%|▌         | 3887/65500 [17:18:13<270:10:09, 15.79s/it]

training loss: 0.9653997421264648


training:   6%|▌         | 3888/65500 [17:18:29<270:13:56, 15.79s/it]

training loss: 0.9092870354652405


training:   6%|▌         | 3889/65500 [17:18:44<270:12:15, 15.79s/it]

training loss: 0.9157990217208862


training:   6%|▌         | 3890/65500 [17:19:00<270:09:21, 15.79s/it]

training loss: 1.022822618484497


training:   6%|▌         | 3891/65500 [17:19:16<270:06:30, 15.78s/it]

training loss: 0.8066111207008362


training:   6%|▌         | 3892/65500 [17:19:32<270:11:30, 15.79s/it]

training loss: 0.7935430407524109


training:   6%|▌         | 3893/65500 [17:19:48<270:07:32, 15.78s/it]

training loss: 0.9235015511512756


training:   6%|▌         | 3894/65500 [17:20:03<270:00:57, 15.78s/it]

training loss: 0.682442843914032


training:   6%|▌         | 3895/65500 [17:20:19<269:54:48, 15.77s/it]

training loss: 1.0344340801239014


training:   6%|▌         | 3896/65500 [17:20:35<269:58:57, 15.78s/it]

training loss: 0.8105528950691223


training:   6%|▌         | 3897/65500 [17:20:51<270:01:23, 15.78s/it]

training loss: 0.8517770767211914


training:   6%|▌         | 3898/65500 [17:21:06<269:55:35, 15.77s/it]

training loss: 0.5508720278739929


training:   6%|▌         | 3899/65500 [17:21:22<270:02:48, 15.78s/it]

training loss: 0.8775972723960876


training:   6%|▌         | 3900/65500 [17:21:38<270:03:26, 15.78s/it]

training loss: 1.041169285774231
training loss: 0.6383038759231567


training:   6%|▌         | 3901/65500 [17:21:55<277:24:10, 16.21s/it]

validation loss: 1.1326555013656616


training:   6%|▌         | 3902/65500 [17:22:11<275:35:48, 16.11s/it]

training loss: 0.8627433180809021


training:   6%|▌         | 3903/65500 [17:22:27<274:25:39, 16.04s/it]

training loss: 0.8562690019607544


training:   6%|▌         | 3904/65500 [17:22:43<273:28:53, 15.98s/it]

training loss: 0.6840147972106934


training:   6%|▌         | 3905/65500 [17:22:59<272:49:46, 15.95s/it]

training loss: 0.7382173538208008


training:   6%|▌         | 3906/65500 [17:23:15<272:16:58, 15.91s/it]

training loss: 0.6618158221244812


training:   6%|▌         | 3907/65500 [17:23:30<272:05:40, 15.90s/it]

training loss: 1.0617494583129883


training:   6%|▌         | 3908/65500 [17:23:46<271:51:42, 15.89s/it]

training loss: 0.7395899295806885


training:   6%|▌         | 3909/65500 [17:24:02<271:45:17, 15.88s/it]

training loss: 0.698959469795227


training:   6%|▌         | 3910/65500 [17:24:18<271:42:41, 15.88s/it]

training loss: 0.9495899081230164


training:   6%|▌         | 3911/65500 [17:24:34<271:35:54, 15.88s/it]

training loss: 0.6875609159469604


training:   6%|▌         | 3912/65500 [17:24:50<271:31:57, 15.87s/it]

training loss: 0.6183432340621948


training:   6%|▌         | 3913/65500 [17:25:06<271:22:30, 15.86s/it]

training loss: 0.7662070989608765


training:   6%|▌         | 3914/65500 [17:25:21<271:04:18, 15.85s/it]

training loss: 0.9772276878356934


training:   6%|▌         | 3915/65500 [17:25:37<270:56:12, 15.84s/it]

training loss: 0.6745445728302002


training:   6%|▌         | 3916/65500 [17:25:53<270:45:20, 15.83s/it]

training loss: 0.43656080961227417


training:   6%|▌         | 3917/65500 [17:26:09<270:32:34, 15.82s/it]

training loss: 0.8583235144615173


training:   6%|▌         | 3918/65500 [17:26:25<270:30:38, 15.81s/it]

training loss: 0.9521047472953796


training:   6%|▌         | 3919/65500 [17:26:40<270:19:48, 15.80s/it]

training loss: 0.5469697713851929


training:   6%|▌         | 3920/65500 [17:26:56<270:15:55, 15.80s/it]

training loss: 0.5311810374259949


training:   6%|▌         | 3921/65500 [17:27:12<270:18:08, 15.80s/it]

training loss: 0.792694628238678


training:   6%|▌         | 3922/65500 [17:27:28<270:18:29, 15.80s/it]

training loss: 0.9364175796508789


training:   6%|▌         | 3923/65500 [17:27:44<270:12:19, 15.80s/it]

training loss: 0.8128138780593872


training:   6%|▌         | 3924/65500 [17:27:59<270:06:11, 15.79s/it]

training loss: 0.6403542160987854


training:   6%|▌         | 3925/65500 [17:28:15<270:08:26, 15.79s/it]

training loss: 1.0963561534881592


training:   6%|▌         | 3926/65500 [17:28:31<270:09:19, 15.79s/it]

training loss: 0.7477943897247314


training:   6%|▌         | 3927/65500 [17:28:47<270:08:00, 15.79s/it]

training loss: 1.0076438188552856


training:   6%|▌         | 3928/65500 [17:29:03<270:04:55, 15.79s/it]

training loss: 0.909656286239624


training:   6%|▌         | 3929/65500 [17:29:18<270:04:17, 15.79s/it]

training loss: 0.9682365655899048


training:   6%|▌         | 3930/65500 [17:29:34<270:07:45, 15.79s/it]

training loss: 0.5832205414772034


training:   6%|▌         | 3931/65500 [17:29:50<270:06:57, 15.79s/it]

training loss: 0.7337300777435303


training:   6%|▌         | 3932/65500 [17:30:06<270:02:03, 15.79s/it]

training loss: 1.0007927417755127


training:   6%|▌         | 3933/65500 [17:30:22<270:02:14, 15.79s/it]

training loss: 0.954471230506897


training:   6%|▌         | 3934/65500 [17:30:37<270:04:08, 15.79s/it]

training loss: 0.860266923904419


training:   6%|▌         | 3935/65500 [17:30:53<270:04:36, 15.79s/it]

training loss: 0.5127151608467102


training:   6%|▌         | 3936/65500 [17:31:09<270:02:13, 15.79s/it]

training loss: 1.0618107318878174


training:   6%|▌         | 3937/65500 [17:31:25<269:54:45, 15.78s/it]

training loss: 0.864581286907196


training:   6%|▌         | 3938/65500 [17:31:40<270:00:32, 15.79s/it]

training loss: 0.7328181862831116


training:   6%|▌         | 3939/65500 [17:31:56<269:59:20, 15.79s/it]

training loss: 0.6797258257865906


training:   6%|▌         | 3940/65500 [17:32:12<270:03:02, 15.79s/it]

training loss: 0.7063034772872925


training:   6%|▌         | 3941/65500 [17:32:28<270:06:32, 15.80s/it]

training loss: 0.528505265712738


training:   6%|▌         | 3942/65500 [17:32:44<270:02:37, 15.79s/it]

training loss: 1.2645225524902344


training:   6%|▌         | 3943/65500 [17:32:59<270:00:27, 15.79s/it]

training loss: 0.9713647961616516


training:   6%|▌         | 3944/65500 [17:33:15<270:02:02, 15.79s/it]

training loss: 0.9085481762886047


training:   6%|▌         | 3945/65500 [17:33:31<269:58:03, 15.79s/it]

training loss: 0.6982875466346741


training:   6%|▌         | 3946/65500 [17:33:47<269:56:36, 15.79s/it]

training loss: 0.7662912011146545


training:   6%|▌         | 3947/65500 [17:34:03<269:56:35, 15.79s/it]

training loss: 0.9250636100769043


training:   6%|▌         | 3948/65500 [17:34:18<270:03:07, 15.79s/it]

training loss: 0.6796050071716309


training:   6%|▌         | 3949/65500 [17:34:34<270:02:40, 15.79s/it]

training loss: 0.5330240726470947


training:   6%|▌         | 3950/65500 [17:34:50<270:02:57, 15.79s/it]

training loss: 1.140386700630188


training:   6%|▌         | 3951/65500 [17:35:06<269:58:16, 15.79s/it]

training loss: 0.4118806719779968


training:   6%|▌         | 3952/65500 [17:35:22<269:58:56, 15.79s/it]

training loss: 0.7528322339057922


training:   6%|▌         | 3953/65500 [17:35:37<269:56:56, 15.79s/it]

training loss: 1.0119514465332031
