<a href="https://colab.research.google.com/github/xSakix/AI_colab_notebooks/blob/master/reformer_pytorch_cuda.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install torch
!pip install reformer_pytorch
!pip install transformers



In [2]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime → "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

Mon Feb 10 07:59:08 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.48.02    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   46C    P0    29W / 250W |      0MiB / 16280MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
# load model file and epoch
import os
import re
files = [f for f in os.listdir('/content/drive/My Drive/model_saves') if f.startswith('epoch')]
last_model_file = None
epochs_run = 0
if len(files) > 0:
  files.sort(reverse=True)
  last_model_file = os.path.join('/content/drive/My Drive/model_saves',files[0])
  print(last_model_file)
  epochs = re.findall(r'\d+',files[0])
  epochs_run = 0
  if len(epochs) == 1:
    epochs_run = int(epochs[0])
  print('number of epochs run:',epochs_run)


/content/drive/My Drive/model_saves/epoch-7100.pt
number of epochs run: 7100


In [0]:
from reformer_pytorch import ReformerLM

import random
import tqdm
import gzip
import numpy as np
import torch
import torch.optim as optim
from torch.nn import functional as F
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
import os
from transformers import BertTokenizer, AdamW, get_linear_schedule_with_warmup

# constants

NUM_BATCHES = int(1e5)
BATCH_SIZE = 8
GRADIENT_ACCUMULATE_EVERY = 4
LEARNING_RATE = 3e-4
VALIDATE_EVERY  = 100
GENERATE_EVERY  = 500
GENERATE_LENGTH = 512
SEQ_LEN = 4096

# helpers

def cycle(loader):
    while True:
        for data in loader:
            yield data

def get_top_p(logits, top_p=0.9):
    sorted_logits, sorted_indices = torch.sort(logits, descending=True)
    cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)

    sorted_indices_to_remove = cumulative_probs > top_p
    sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
    sorted_indices_to_remove[..., 0] = 0

    indices_to_remove = sorted_indices[sorted_indices_to_remove]
    logits[indices_to_remove] = float('-inf')
    return logits

def sample_next_token(logits, top_p=0.9, temperature = 1.0):
    logits = logits[0, -1, :] / temperature
    filtered_logits = get_top_p(logits, top_p=top_p)

    probs = F.softmax(filtered_logits, dim=-1)
    return torch.multinomial(probs, 1)

def decode_token(token):
    return str(chr(token))

def decode_tokens(tokens):
    return ''.join(list(map(decode_token, tokens)))

# instantiate model

model = ReformerLM(
    dim = 512,
    depth = 6,
    max_seq_len = SEQ_LEN,
    num_tokens = 256,
    heads = 8,
    bucket_size = 64,
    n_hashes = 8,
    ff_chunks = 10,
    lsh_dropout = 0.1,
    weight_tie = True,
    causal = True,
    use_full_attn = False # set this to true for comparison with full attention
)

# model = ReformerLM(
#     dim = 512,
#     depth = 6,
#     max_seq_len = SEQ_LEN,
#     num_tokens = 256,
#     heads = 8,
#     bucket_size = 64,
#     n_hashes = 4,
#     ff_chunks = 10,
#     lsh_dropout = 0.1,
#     weight_tie = True,
#     causal = True,
#     use_full_attn = False # set this to true for comparison with full attention
# )

if last_model_file is not None:
  model.load_state_dict(torch.load(last_model_file ))

model.cuda()


# prepare enwik8 data

with gzip.open('/content/drive/My Drive/model_data/merged.gz') as file:
    X = np.array([int(c) for c in file.read()])
    si = int(len(X)-len(X)*0.2)
    trX, vaX = np.split(X, [si])
    data_train, data_val = torch.from_numpy(trX), torch.from_numpy(vaX)

class TextSamplerDataset(Dataset):
    def __init__(self, data, seq_len):
        super().__init__()
        self.data = data
        self.seq_len = seq_len

    def __getitem__(self, index):
        rand_start = torch.randint(0, self.data.size(0) - self.seq_len - 1, (1,))
        full_seq = self.data[rand_start: rand_start + self.seq_len + 1].long()
        return full_seq[0:-1].cuda(), full_seq[1:].cuda()

    def __len__(self):
        return self.data.size(0) // self.seq_len

train_dataset = TextSamplerDataset(data_train, SEQ_LEN)
val_dataset   = TextSamplerDataset(data_val, SEQ_LEN)
train_loader  = cycle(DataLoader(train_dataset, batch_size = BATCH_SIZE))
val_loader    = cycle(DataLoader(val_dataset, batch_size = BATCH_SIZE))

print(len(train_dataset))
print(len(val_dataset))

# optimizer
# optimizer.load_state_dict(torch.load('optimizer.pt'))
# scheduler.load_state_dict(torch.load('scheduler.pt'))

optim = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE,amsgrad=True)

if os.path.exists('/content/drive/My Drive/model_saves/optim.pt'):
  optim.load_state_dict(torch.load('/content/drive/My Drive/model_saves/optim.pt'))

#scheduler

# scheduler = torch.optim.lr_scheduler.StepLR(optim, step_size=VALIDATE_EVERY, gamma=0.1)

scheduler = get_linear_schedule_with_warmup(
            optim,
            num_warmup_steps=0,
            num_training_steps=len(train_dataset) // GRADIENT_ACCUMULATE_EVERY * NUM_BATCHES
        )

if os.path.exists('/content/drive/My Drive/model_saves/scheduler.pt'):
  scheduler.load_state_dict(torch.load('/content/drive/My Drive/model_saves/scheduler.pt'))

# training

def get_batch_loss(model, data):
    x, y = data
    pred = model(x)
    return F.cross_entropy(pred.transpose(1, 2), y, reduction='mean')

for i in tqdm.tqdm(range(epochs_run, NUM_BATCHES), mininterval=10., desc='training'):
    model.train()

    for __ in range(GRADIENT_ACCUMULATE_EVERY):
        loss = get_batch_loss(model, next(train_loader))
        loss.backward()

    print(f'training loss: {loss.item()}')
    torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
    optim.step()
    optim.zero_grad()
    scheduler.step()

    if i % VALIDATE_EVERY == 0:
        torch.save(model.state_dict(), os.path.join('/content/drive/My Drive/model_saves', 'epoch-{}.pt'.format(i)))
        torch.save(optim.state_dict(),'/content/drive/My Drive/model_saves/optim.pt')
        torch.save(scheduler.state_dict(),'/content/drive/My Drive/model_saves/scheduler.pt')
        model.eval()
        with torch.no_grad():
            loss = get_batch_loss(model, next(val_loader))
            print(f'validation loss: {loss.item()}')

    if i % GENERATE_EVERY == 0:
        model.eval()
        with torch.no_grad():
            inp, _ = random.choice(val_dataset)
            output_str = ''
            prime = decode_tokens(inp)

            # print(f'%s \n\n %s', (prime, '*' * 100))
            print(prime)
            print('*'*100)

            for _ in tqdm.tqdm(range(GENERATE_LENGTH), desc='generating'):
                logits = model(inp[None, :])
                next_token = sample_next_token(logits)
                output_str += decode_token(next_token)
                inp = torch.cat((inp[1:], next_token), dim=0)

            print(output_str)

training:   0%|          | 0/92900 [00:00<?, ?it/s]

28049
7012
training loss: 1.8468937873840332


training:   0%|          | 1/92900 [00:17<438:42:28, 17.00s/it]

validation loss: 1.802687644958496


training:   0%|          | 2/92900 [00:32<428:04:23, 16.59s/it]

training loss: 1.8055025339126587


training:   0%|          | 3/92900 [00:48<420:33:07, 16.30s/it]

training loss: 1.751235842704773


training:   0%|          | 4/92900 [01:03<415:20:21, 16.10s/it]

training loss: 1.8553156852722168


training:   0%|          | 5/92900 [01:19<411:42:28, 15.96s/it]

training loss: 1.7726306915283203


training:   0%|          | 6/92900 [01:35<409:07:03, 15.85s/it]

training loss: 1.767756700515747


training:   0%|          | 7/92900 [01:50<407:14:53, 15.78s/it]

training loss: 1.8002458810806274


training:   0%|          | 8/92900 [02:06<405:59:29, 15.73s/it]

training loss: 1.7710837125778198


training:   0%|          | 9/92900 [02:21<405:13:42, 15.70s/it]

training loss: 1.7752034664154053


training:   0%|          | 10/92900 [02:37<404:32:04, 15.68s/it]

training loss: 1.7413361072540283


training:   0%|          | 11/92900 [02:53<404:00:29, 15.66s/it]

training loss: 1.6739823818206787


training:   0%|          | 12/92900 [03:08<403:51:20, 15.65s/it]

training loss: 1.8496626615524292


training:   0%|          | 13/92900 [03:24<403:42:50, 15.65s/it]

training loss: 1.764183759689331


training:   0%|          | 14/92900 [03:40<403:31:55, 15.64s/it]

training loss: 1.793987512588501


training:   0%|          | 15/92900 [03:55<403:19:22, 15.63s/it]

training loss: 1.7386090755462646


training:   0%|          | 16/92900 [04:11<403:14:03, 15.63s/it]

training loss: 1.8399078845977783


training:   0%|          | 17/92900 [04:26<403:11:30, 15.63s/it]

training loss: 1.7124240398406982


training:   0%|          | 18/92900 [04:42<403:08:24, 15.63s/it]

training loss: 1.7235465049743652


training:   0%|          | 19/92900 [04:58<403:01:28, 15.62s/it]

training loss: 1.7142373323440552


training:   0%|          | 20/92900 [05:13<403:00:31, 15.62s/it]

training loss: 1.5543521642684937


training:   0%|          | 21/92900 [05:29<402:58:23, 15.62s/it]

training loss: 1.6148852109909058


training:   0%|          | 22/92900 [05:45<402:59:39, 15.62s/it]

training loss: 1.7928129434585571


training:   0%|          | 23/92900 [06:00<403:03:08, 15.62s/it]

training loss: 1.7569942474365234


training:   0%|          | 24/92900 [06:16<403:04:06, 15.62s/it]

training loss: 1.7622822523117065


training:   0%|          | 25/92900 [06:31<403:02:00, 15.62s/it]

training loss: 1.7789883613586426


training:   0%|          | 26/92900 [06:47<402:59:48, 15.62s/it]

training loss: 1.8065305948257446


training:   0%|          | 27/92900 [07:03<402:57:29, 15.62s/it]

training loss: 1.690037488937378


training:   0%|          | 28/92900 [07:18<403:02:07, 15.62s/it]

training loss: 1.6770029067993164


training:   0%|          | 29/92900 [07:34<402:59:28, 15.62s/it]

training loss: 1.754726767539978


training:   0%|          | 30/92900 [07:50<403:02:28, 15.62s/it]

training loss: 1.7605323791503906


training:   0%|          | 31/92900 [08:05<403:02:27, 15.62s/it]

training loss: 1.7741674184799194


training:   0%|          | 32/92900 [08:21<403:05:17, 15.63s/it]

training loss: 1.7115556001663208


training:   0%|          | 33/92900 [08:36<403:06:05, 15.63s/it]

training loss: 1.7798689603805542


training:   0%|          | 34/92900 [08:52<402:57:47, 15.62s/it]

training loss: 1.613707184791565


training:   0%|          | 35/92900 [09:08<403:02:16, 15.62s/it]

training loss: 1.8114155530929565


training:   0%|          | 36/92900 [09:23<402:58:55, 15.62s/it]

training loss: 1.7391893863677979


training:   0%|          | 37/92900 [09:39<402:58:28, 15.62s/it]

training loss: 1.7710647583007812


training:   0%|          | 38/92900 [09:55<402:52:48, 15.62s/it]

training loss: 1.718461275100708


training:   0%|          | 39/92900 [10:10<402:49:59, 15.62s/it]

training loss: 1.7486028671264648


training:   0%|          | 40/92900 [10:26<402:50:30, 15.62s/it]

training loss: 1.7596808671951294


training:   0%|          | 41/92900 [10:41<402:52:56, 15.62s/it]

training loss: 1.719893217086792


training:   0%|          | 42/92900 [10:57<402:55:48, 15.62s/it]

training loss: 1.7617465257644653


training:   0%|          | 43/92900 [11:13<402:52:43, 15.62s/it]

training loss: 1.71931791305542


training:   0%|          | 44/92900 [11:28<402:52:41, 15.62s/it]

training loss: 1.7546000480651855


training:   0%|          | 45/92900 [11:44<402:50:51, 15.62s/it]

training loss: 1.6816877126693726


training:   0%|          | 46/92900 [11:59<402:45:33, 15.62s/it]

training loss: 1.657331943511963


training:   0%|          | 47/92900 [12:15<402:51:59, 15.62s/it]

training loss: 1.7543748617172241


training:   0%|          | 48/92900 [12:31<402:55:16, 15.62s/it]

training loss: 1.7666118144989014


training:   0%|          | 49/92900 [12:46<402:55:23, 15.62s/it]

training loss: 1.700875997543335


training:   0%|          | 50/92900 [13:02<402:49:30, 15.62s/it]

training loss: 1.692675232887268


training:   0%|          | 51/92900 [13:18<402:52:57, 15.62s/it]

training loss: 1.7107725143432617


training:   0%|          | 52/92900 [13:33<402:52:18, 15.62s/it]

training loss: 1.639580488204956


training:   0%|          | 53/92900 [13:49<403:04:09, 15.63s/it]

training loss: 1.6286187171936035


training:   0%|          | 54/92900 [14:04<403:02:20, 15.63s/it]

training loss: 1.6578125953674316


training:   0%|          | 55/92900 [14:20<402:58:44, 15.63s/it]

training loss: 1.6152397394180298


training:   0%|          | 56/92900 [14:36<402:53:09, 15.62s/it]

training loss: 1.7309880256652832


training:   0%|          | 57/92900 [14:51<402:53:43, 15.62s/it]

training loss: 1.7769978046417236


training:   0%|          | 58/92900 [15:07<402:58:43, 15.63s/it]

training loss: 1.7191896438598633


training:   0%|          | 59/92900 [15:23<402:59:07, 15.63s/it]

training loss: 1.6719005107879639


training:   0%|          | 60/92900 [15:38<402:59:03, 15.63s/it]

training loss: 1.697094440460205


training:   0%|          | 61/92900 [15:54<402:54:45, 15.62s/it]

training loss: 1.7261371612548828


training:   0%|          | 62/92900 [16:09<402:56:44, 15.63s/it]

training loss: 1.6404268741607666


training:   0%|          | 63/92900 [16:25<402:59:37, 15.63s/it]

training loss: 1.7327909469604492


training:   0%|          | 64/92900 [16:41<402:57:52, 15.63s/it]

training loss: 1.7482047080993652


training:   0%|          | 65/92900 [16:56<402:56:55, 15.63s/it]

training loss: 1.6502010822296143


training:   0%|          | 66/92900 [17:12<402:57:07, 15.63s/it]

training loss: 1.784961223602295


training:   0%|          | 67/92900 [17:28<402:56:19, 15.63s/it]

training loss: 1.6974607706069946


training:   0%|          | 68/92900 [17:43<402:58:30, 15.63s/it]

training loss: 1.8200716972351074


training:   0%|          | 69/92900 [17:59<402:57:25, 15.63s/it]

training loss: 1.7958632707595825


training:   0%|          | 70/92900 [18:14<402:51:36, 15.62s/it]

training loss: 1.6213876008987427


training:   0%|          | 71/92900 [18:30<402:45:50, 15.62s/it]

training loss: 1.7427558898925781


training:   0%|          | 72/92900 [18:46<402:47:14, 15.62s/it]

training loss: 1.8052289485931396


training:   0%|          | 73/92900 [19:01<402:47:30, 15.62s/it]

training loss: 1.7234703302383423


training:   0%|          | 74/92900 [19:17<402:50:10, 15.62s/it]

training loss: 1.6924800872802734


training:   0%|          | 75/92900 [19:33<402:50:38, 15.62s/it]

training loss: 1.7312639951705933


training:   0%|          | 76/92900 [19:48<402:46:25, 15.62s/it]

training loss: 1.6867307424545288


training:   0%|          | 77/92900 [20:04<402:44:23, 15.62s/it]

training loss: 1.7971712350845337


training:   0%|          | 78/92900 [20:19<402:51:34, 15.62s/it]

training loss: 1.769513726234436


training:   0%|          | 79/92900 [20:35<402:46:39, 15.62s/it]

training loss: 1.7346553802490234


training:   0%|          | 80/92900 [20:51<402:49:56, 15.62s/it]

training loss: 1.6858298778533936


training:   0%|          | 81/92900 [21:06<402:55:29, 15.63s/it]

training loss: 1.742235779762268


training:   0%|          | 82/92900 [21:22<402:51:07, 15.62s/it]

training loss: 1.7203102111816406


training:   0%|          | 83/92900 [21:38<402:49:00, 15.62s/it]

training loss: 1.7702040672302246


training:   0%|          | 84/92900 [21:53<402:53:06, 15.63s/it]

training loss: 1.5810576677322388


training:   0%|          | 85/92900 [22:09<402:55:59, 15.63s/it]

training loss: 1.722733736038208


training:   0%|          | 86/92900 [22:24<402:51:44, 15.63s/it]

training loss: 1.7070415019989014


training:   0%|          | 87/92900 [22:40<402:42:38, 15.62s/it]

training loss: 1.723016381263733


training:   0%|          | 88/92900 [22:56<402:41:29, 15.62s/it]

training loss: 1.7787882089614868


training:   0%|          | 89/92900 [23:11<402:41:18, 15.62s/it]

training loss: 1.7308590412139893


training:   0%|          | 90/92900 [23:27<402:40:18, 15.62s/it]

training loss: 1.739487648010254


training:   0%|          | 91/92900 [23:43<402:49:02, 15.63s/it]

training loss: 1.7335829734802246


training:   0%|          | 92/92900 [23:58<402:49:28, 15.63s/it]

training loss: 1.7032020092010498


training:   0%|          | 93/92900 [24:14<402:51:28, 15.63s/it]

training loss: 1.7292089462280273


training:   0%|          | 94/92900 [24:29<402:45:15, 15.62s/it]

training loss: 1.6555389165878296


training:   0%|          | 95/92900 [24:45<402:49:26, 15.63s/it]

training loss: 1.777425765991211


training:   0%|          | 96/92900 [25:01<402:47:44, 15.63s/it]

training loss: 1.7631758451461792


training:   0%|          | 97/92900 [25:16<402:53:35, 15.63s/it]

training loss: 1.7266767024993896


training:   0%|          | 98/92900 [25:32<402:54:31, 15.63s/it]

training loss: 1.7664827108383179


training:   0%|          | 99/92900 [25:48<402:49:01, 15.63s/it]

training loss: 1.6743112802505493


training:   0%|          | 100/92900 [26:03<402:47:23, 15.63s/it]

training loss: 1.7487273216247559
training loss: 1.603800892829895


training:   0%|          | 101/92900 [26:20<413:16:44, 16.03s/it]

validation loss: 1.7840944528579712


training:   0%|          | 102/92900 [26:36<410:14:24, 15.91s/it]

training loss: 1.6680952310562134


training:   0%|          | 103/92900 [26:51<407:57:20, 15.83s/it]

training loss: 1.6256617307662964


training:   0%|          | 104/92900 [27:07<406:24:21, 15.77s/it]

training loss: 1.64841890335083


training:   0%|          | 105/92900 [27:23<405:18:35, 15.72s/it]

training loss: 1.7301509380340576


training:   0%|          | 106/92900 [27:38<404:29:40, 15.69s/it]

training loss: 1.7108664512634277


training:   0%|          | 107/92900 [27:54<403:52:22, 15.67s/it]

training loss: 1.7543845176696777


training:   0%|          | 108/92900 [28:10<403:35:24, 15.66s/it]

training loss: 1.6428864002227783


training:   0%|          | 109/92900 [28:25<403:12:43, 15.64s/it]

training loss: 1.7053438425064087


training:   0%|          | 110/92900 [28:41<402:55:45, 15.63s/it]

training loss: 1.7233476638793945


training:   0%|          | 111/92900 [28:56<402:46:25, 15.63s/it]

training loss: 1.6014286279678345


training:   0%|          | 112/92900 [29:12<402:41:22, 15.62s/it]

training loss: 1.5885158777236938


training:   0%|          | 113/92900 [29:28<402:40:39, 15.62s/it]

training loss: 1.6926604509353638


training:   0%|          | 114/92900 [29:43<402:31:41, 15.62s/it]

training loss: 1.7179429531097412


training:   0%|          | 115/92900 [29:59<402:23:16, 15.61s/it]

training loss: 1.625577688217163


training:   0%|          | 116/92900 [30:14<402:19:08, 15.61s/it]

training loss: 1.7150001525878906


training:   0%|          | 117/92900 [30:30<402:11:59, 15.61s/it]

training loss: 1.8238451480865479


training:   0%|          | 118/92900 [30:46<402:07:33, 15.60s/it]

training loss: 1.718449592590332


training:   0%|          | 119/92900 [31:01<402:07:05, 15.60s/it]

training loss: 1.7226197719573975


training:   0%|          | 120/92900 [31:17<402:12:22, 15.61s/it]

training loss: 1.7083890438079834


training:   0%|          | 121/92900 [31:32<402:16:31, 15.61s/it]

training loss: 1.7878295183181763


training:   0%|          | 122/92900 [31:48<402:08:55, 15.60s/it]

training loss: 1.6498057842254639


training:   0%|          | 123/92900 [32:04<402:03:14, 15.60s/it]

training loss: 1.6031759977340698


training:   0%|          | 124/92900 [32:19<402:04:00, 15.60s/it]

training loss: 1.6466944217681885


training:   0%|          | 125/92900 [32:35<401:55:35, 15.60s/it]

training loss: 1.7788684368133545


training:   0%|          | 126/92900 [32:50<401:50:01, 15.59s/it]

training loss: 1.737708568572998


training:   0%|          | 127/92900 [33:06<401:54:13, 15.60s/it]

training loss: 1.6259958744049072


training:   0%|          | 128/92900 [33:22<401:52:52, 15.59s/it]

training loss: 1.779010534286499


training:   0%|          | 129/92900 [33:37<401:54:45, 15.60s/it]

training loss: 1.7187690734863281


training:   0%|          | 130/92900 [33:53<401:49:19, 15.59s/it]

training loss: 1.7775607109069824


training:   0%|          | 131/92900 [34:08<401:53:17, 15.60s/it]

training loss: 1.7743432521820068


training:   0%|          | 132/92900 [34:24<401:55:21, 15.60s/it]

training loss: 1.6935049295425415


training:   0%|          | 133/92900 [34:40<401:53:49, 15.60s/it]

training loss: 1.7014796733856201


training:   0%|          | 134/92900 [34:55<401:48:40, 15.59s/it]

training loss: 1.7377511262893677


training:   0%|          | 135/92900 [35:11<401:49:19, 15.59s/it]

training loss: 1.7246487140655518


training:   0%|          | 136/92900 [35:26<401:47:42, 15.59s/it]

training loss: 1.6342600584030151


training:   0%|          | 137/92900 [35:42<401:47:51, 15.59s/it]

training loss: 1.7888445854187012


training:   0%|          | 138/92900 [35:58<401:46:09, 15.59s/it]

training loss: 1.660156011581421


training:   0%|          | 139/92900 [36:13<401:47:34, 15.59s/it]

training loss: 1.7133816480636597


training:   0%|          | 140/92900 [36:29<401:48:21, 15.59s/it]

training loss: 1.5979547500610352


training:   0%|          | 141/92900 [36:44<401:44:27, 15.59s/it]

training loss: 1.7767125368118286


training:   0%|          | 142/92900 [37:00<401:41:26, 15.59s/it]

training loss: 1.7991399765014648


training:   0%|          | 143/92900 [37:15<401:40:35, 15.59s/it]

training loss: 1.6792482137680054


training:   0%|          | 144/92900 [37:31<401:39:10, 15.59s/it]

training loss: 1.48764967918396


training:   0%|          | 145/92900 [37:47<401:43:34, 15.59s/it]

training loss: 1.5739883184432983


training:   0%|          | 146/92900 [38:02<401:38:22, 15.59s/it]

training loss: 1.7233340740203857


training:   0%|          | 147/92900 [38:18<401:39:40, 15.59s/it]

training loss: 1.6680983304977417


training:   0%|          | 148/92900 [38:33<401:34:37, 15.59s/it]

training loss: 1.820534586906433


training:   0%|          | 149/92900 [38:49<401:35:49, 15.59s/it]

training loss: 1.7470431327819824


training:   0%|          | 150/92900 [39:05<401:40:55, 15.59s/it]

training loss: 1.8211586475372314


training:   0%|          | 151/92900 [39:20<401:46:58, 15.59s/it]

training loss: 1.6244843006134033


training:   0%|          | 152/92900 [39:36<401:46:56, 15.60s/it]

training loss: 1.6524004936218262


training:   0%|          | 153/92900 [39:51<401:43:01, 15.59s/it]

training loss: 1.6245273351669312


training:   0%|          | 154/92900 [40:07<401:42:03, 15.59s/it]

training loss: 1.7440545558929443


training:   0%|          | 155/92900 [40:23<401:40:31, 15.59s/it]

training loss: 1.720760464668274


training:   0%|          | 156/92900 [40:38<401:39:33, 15.59s/it]

training loss: 1.7257108688354492


training:   0%|          | 157/92900 [40:54<401:38:35, 15.59s/it]

training loss: 1.6958708763122559


training:   0%|          | 158/92900 [41:09<401:41:07, 15.59s/it]

training loss: 1.7109802961349487


training:   0%|          | 159/92900 [41:25<401:47:06, 15.60s/it]

training loss: 1.743286371231079


training:   0%|          | 160/92900 [41:41<402:29:44, 15.62s/it]

training loss: 1.7588274478912354


training:   0%|          | 161/92900 [41:56<402:15:16, 15.61s/it]

training loss: 1.7568809986114502


training:   0%|          | 162/92900 [42:12<402:04:05, 15.61s/it]

training loss: 1.611032485961914


training:   0%|          | 163/92900 [42:27<401:59:53, 15.61s/it]

training loss: 1.6568667888641357


training:   0%|          | 164/92900 [42:43<401:55:40, 15.60s/it]

training loss: 1.74855375289917


training:   0%|          | 165/92900 [42:59<401:51:54, 15.60s/it]

training loss: 1.8400070667266846


training:   0%|          | 166/92900 [43:14<401:48:07, 15.60s/it]

training loss: 1.6072875261306763


training:   0%|          | 167/92900 [43:30<401:47:48, 15.60s/it]

training loss: 1.7504973411560059


training:   0%|          | 168/92900 [43:45<401:49:58, 15.60s/it]

training loss: 1.7256799936294556


training:   0%|          | 169/92900 [44:01<401:49:18, 15.60s/it]

training loss: 1.716851830482483


training:   0%|          | 170/92900 [44:17<401:47:19, 15.60s/it]

training loss: 1.4596641063690186


training:   0%|          | 171/92900 [44:32<401:43:54, 15.60s/it]

training loss: 1.6674644947052002


training:   0%|          | 172/92900 [44:48<401:43:20, 15.60s/it]

training loss: 1.672451376914978


training:   0%|          | 173/92900 [45:03<401:37:15, 15.59s/it]

training loss: 1.6848829984664917


training:   0%|          | 174/92900 [45:19<401:42:35, 15.60s/it]

training loss: 1.7314732074737549


training:   0%|          | 175/92900 [45:35<401:38:19, 15.59s/it]

training loss: 1.7266919612884521


training:   0%|          | 176/92900 [45:50<401:36:43, 15.59s/it]

training loss: 1.7785851955413818


training:   0%|          | 177/92900 [46:06<401:37:07, 15.59s/it]

training loss: 1.7150641679763794


training:   0%|          | 178/92900 [46:21<401:41:40, 15.60s/it]

training loss: 1.6598446369171143


training:   0%|          | 179/92900 [46:37<401:41:21, 15.60s/it]

training loss: 1.731834053993225


training:   0%|          | 180/92900 [46:53<401:33:40, 15.59s/it]

training loss: 1.667119026184082


training:   0%|          | 181/92900 [47:08<401:36:33, 15.59s/it]

training loss: 1.7477694749832153


training:   0%|          | 182/92900 [47:24<401:34:52, 15.59s/it]

training loss: 1.7212984561920166


training:   0%|          | 183/92900 [47:39<401:34:51, 15.59s/it]

training loss: 1.5898939371109009


training:   0%|          | 184/92900 [47:55<401:35:42, 15.59s/it]

training loss: 1.6695704460144043


training:   0%|          | 185/92900 [48:11<401:31:46, 15.59s/it]

training loss: 1.7677688598632812


training:   0%|          | 186/92900 [48:26<401:31:49, 15.59s/it]

training loss: 1.6809993982315063


training:   0%|          | 187/92900 [48:42<401:32:02, 15.59s/it]

training loss: 1.647216796875


training:   0%|          | 188/92900 [48:57<401:35:10, 15.59s/it]

training loss: 1.7933801412582397


training:   0%|          | 189/92900 [49:13<401:34:26, 15.59s/it]

training loss: 1.7750508785247803


training:   0%|          | 190/92900 [49:28<401:34:45, 15.59s/it]

training loss: 1.6292122602462769


training:   0%|          | 191/92900 [49:44<401:31:13, 15.59s/it]

training loss: 1.7137798070907593


training:   0%|          | 192/92900 [50:00<401:35:03, 15.59s/it]

training loss: 1.7480533123016357


training:   0%|          | 193/92900 [50:15<401:35:02, 15.59s/it]

training loss: 1.7925947904586792


training:   0%|          | 194/92900 [50:31<401:29:42, 15.59s/it]

training loss: 1.7410308122634888


training:   0%|          | 195/92900 [50:46<401:33:26, 15.59s/it]

training loss: 1.6605701446533203


training:   0%|          | 196/92900 [51:02<401:31:30, 15.59s/it]

training loss: 1.790672779083252


training:   0%|          | 197/92900 [51:18<401:37:48, 15.60s/it]

training loss: 1.6313841342926025


training:   0%|          | 198/92900 [51:33<401:34:24, 15.59s/it]

training loss: 1.7809441089630127


training:   0%|          | 199/92900 [51:49<401:34:28, 15.59s/it]

training loss: 1.7501516342163086


training:   0%|          | 200/92900 [52:04<401:34:37, 15.60s/it]

training loss: 1.7198959589004517
training loss: 1.7533049583435059


training:   0%|          | 201/92900 [52:21<412:04:18, 16.00s/it]

validation loss: 1.6933138370513916


training:   0%|          | 202/92900 [52:37<408:58:29, 15.88s/it]

training loss: 1.6541767120361328


training:   0%|          | 203/92900 [52:53<406:38:14, 15.79s/it]

training loss: 1.5437932014465332


training:   0%|          | 204/92900 [53:08<405:09:26, 15.73s/it]

training loss: 1.741005778312683


training:   0%|          | 205/92900 [53:24<404:02:45, 15.69s/it]

training loss: 1.6471315622329712


training:   0%|          | 206/92900 [53:39<403:17:46, 15.66s/it]

training loss: 1.6805921792984009


training:   0%|          | 207/92900 [53:55<402:45:18, 15.64s/it]

training loss: 1.534804105758667


training:   0%|          | 208/92900 [54:11<402:18:21, 15.62s/it]

training loss: 1.6482746601104736


training:   0%|          | 209/92900 [54:26<402:00:03, 15.61s/it]

training loss: 1.7396061420440674


training:   0%|          | 210/92900 [54:42<401:46:10, 15.60s/it]

training loss: 1.6748270988464355


training:   0%|          | 211/92900 [54:57<401:38:20, 15.60s/it]

training loss: 1.5131464004516602


training:   0%|          | 212/92900 [55:13<401:42:42, 15.60s/it]

training loss: 1.675018072128296


training:   0%|          | 213/92900 [55:28<401:38:00, 15.60s/it]

training loss: 1.7303202152252197


training:   0%|          | 214/92900 [55:44<401:34:59, 15.60s/it]

training loss: 1.774868369102478


training:   0%|          | 215/92900 [56:00<401:28:01, 15.59s/it]

training loss: 1.6259546279907227


training:   0%|          | 216/92900 [56:15<401:23:38, 15.59s/it]

training loss: 1.6259218454360962


training:   0%|          | 217/92900 [56:31<401:21:59, 15.59s/it]

training loss: 1.6416271924972534


training:   0%|          | 218/92900 [56:46<401:20:59, 15.59s/it]

training loss: 1.7815343141555786


training:   0%|          | 219/92900 [57:02<401:19:28, 15.59s/it]

training loss: 1.6041487455368042


training:   0%|          | 220/92900 [57:18<401:24:36, 15.59s/it]

training loss: 1.7253810167312622


training:   0%|          | 221/92900 [57:33<401:20:07, 15.59s/it]

training loss: 1.7610682249069214


training:   0%|          | 222/92900 [57:49<401:21:31, 15.59s/it]

training loss: 1.703945517539978


training:   0%|          | 223/92900 [58:04<401:22:20, 15.59s/it]

training loss: 1.6681855916976929


training:   0%|          | 224/92900 [58:20<401:26:09, 15.59s/it]

training loss: 1.719533085823059


training:   0%|          | 225/92900 [58:36<401:22:39, 15.59s/it]

training loss: 1.7741096019744873


training:   0%|          | 226/92900 [58:51<401:19:33, 15.59s/it]

training loss: 1.602205753326416


training:   0%|          | 227/92900 [59:07<401:19:37, 15.59s/it]

training loss: 1.7035202980041504


training:   0%|          | 228/92900 [59:22<401:21:25, 15.59s/it]

training loss: 1.573218822479248


training:   0%|          | 229/92900 [59:38<401:22:52, 15.59s/it]

training loss: 1.6491031646728516


training:   0%|          | 230/92900 [59:54<401:19:18, 15.59s/it]

training loss: 1.5924370288848877


training:   0%|          | 231/92900 [1:00:09<401:20:31, 15.59s/it]

training loss: 1.6871922016143799


training:   0%|          | 232/92900 [1:00:25<401:26:44, 15.60s/it]

training loss: 1.6999517679214478


training:   0%|          | 233/92900 [1:00:40<401:24:57, 15.59s/it]

training loss: 1.7076560258865356


training:   0%|          | 234/92900 [1:00:56<401:25:26, 15.60s/it]

training loss: 1.6739609241485596


training:   0%|          | 235/92900 [1:01:11<401:23:40, 15.59s/it]

training loss: 1.7154220342636108


training:   0%|          | 236/92900 [1:01:27<401:25:29, 15.60s/it]

training loss: 1.5496656894683838


training:   0%|          | 237/92900 [1:01:43<401:30:09, 15.60s/it]

training loss: 1.6374419927597046


training:   0%|          | 238/92900 [1:01:58<401:24:52, 15.60s/it]

training loss: 1.6667143106460571


training:   0%|          | 239/92900 [1:02:14<401:23:51, 15.59s/it]

training loss: 1.508140206336975


training:   0%|          | 240/92900 [1:02:29<401:20:13, 15.59s/it]

training loss: 1.511535406112671


training:   0%|          | 241/92900 [1:02:45<401:24:32, 15.60s/it]

training loss: 1.6308245658874512


training:   0%|          | 242/92900 [1:03:01<401:22:35, 15.59s/it]

training loss: 1.76625394821167


training:   0%|          | 243/92900 [1:03:16<401:17:45, 15.59s/it]

training loss: 1.6288505792617798


training:   0%|          | 244/92900 [1:03:32<401:17:45, 15.59s/it]

training loss: 1.6797093152999878


training:   0%|          | 245/92900 [1:03:47<401:17:57, 15.59s/it]

training loss: 1.6402732133865356


training:   0%|          | 246/92900 [1:04:03<401:16:30, 15.59s/it]

training loss: 1.659310221672058


training:   0%|          | 247/92900 [1:04:19<401:16:03, 15.59s/it]

training loss: 1.6771941184997559


training:   0%|          | 248/92900 [1:04:34<401:12:41, 15.59s/it]

training loss: 1.692868947982788


training:   0%|          | 249/92900 [1:04:50<401:11:15, 15.59s/it]

training loss: 1.6335233449935913


training:   0%|          | 250/92900 [1:05:05<401:14:36, 15.59s/it]

training loss: 1.6485496759414673


training:   0%|          | 251/92900 [1:05:21<401:21:10, 15.60s/it]

training loss: 1.765161156654358


training:   0%|          | 252/92900 [1:05:37<401:21:26, 15.60s/it]

training loss: 1.668360710144043


training:   0%|          | 253/92900 [1:05:52<401:19:56, 15.59s/it]

training loss: 1.6879668235778809


training:   0%|          | 254/92900 [1:06:08<401:20:44, 15.60s/it]

training loss: 1.7082421779632568


training:   0%|          | 255/92900 [1:06:23<401:22:17, 15.60s/it]

training loss: 1.6063826084136963


training:   0%|          | 256/92900 [1:06:39<401:22:34, 15.60s/it]

training loss: 1.635172724723816


training:   0%|          | 257/92900 [1:06:55<401:19:16, 15.59s/it]

training loss: 1.7064404487609863


training:   0%|          | 258/92900 [1:07:10<401:18:39, 15.59s/it]

training loss: 1.6652125120162964


training:   0%|          | 259/92900 [1:07:26<401:18:33, 15.59s/it]

training loss: 1.6359450817108154


training:   0%|          | 260/92900 [1:07:41<401:15:09, 15.59s/it]

training loss: 1.5636518001556396


training:   0%|          | 261/92900 [1:07:57<401:16:14, 15.59s/it]

training loss: 1.6890202760696411


training:   0%|          | 262/92900 [1:08:13<401:15:28, 15.59s/it]

training loss: 1.6867055892944336


training:   0%|          | 263/92900 [1:08:28<401:16:58, 15.59s/it]

training loss: 1.6521514654159546


training:   0%|          | 264/92900 [1:08:44<401:17:15, 15.59s/it]

training loss: 1.6691261529922485


training:   0%|          | 265/92900 [1:08:59<401:15:24, 15.59s/it]

training loss: 1.792036771774292


training:   0%|          | 266/92900 [1:09:15<401:11:35, 15.59s/it]

training loss: 1.6942468881607056


training:   0%|          | 267/92900 [1:09:30<401:10:25, 15.59s/it]

training loss: 1.6462756395339966


training:   0%|          | 268/92900 [1:09:46<401:08:42, 15.59s/it]

training loss: 1.6666115522384644


training:   0%|          | 269/92900 [1:10:02<401:04:42, 15.59s/it]

training loss: 1.64992094039917


training:   0%|          | 270/92900 [1:10:17<401:13:23, 15.59s/it]

training loss: 1.6359235048294067


training:   0%|          | 271/92900 [1:10:33<401:10:42, 15.59s/it]

training loss: 1.6953319311141968


training:   0%|          | 272/92900 [1:10:48<401:10:05, 15.59s/it]

training loss: 1.731873869895935


training:   0%|          | 273/92900 [1:11:04<401:06:23, 15.59s/it]

training loss: 1.6718895435333252


training:   0%|          | 274/92900 [1:11:20<401:08:30, 15.59s/it]

training loss: 1.6249055862426758


training:   0%|          | 275/92900 [1:11:35<401:04:24, 15.59s/it]

training loss: 1.6284517049789429


training:   0%|          | 276/92900 [1:11:51<401:06:54, 15.59s/it]

training loss: 1.737566351890564


training:   0%|          | 277/92900 [1:12:06<401:07:49, 15.59s/it]

training loss: 1.756954312324524


training:   0%|          | 278/92900 [1:12:22<401:08:00, 15.59s/it]

training loss: 1.6764779090881348


training:   0%|          | 279/92900 [1:12:38<401:07:31, 15.59s/it]

training loss: 1.6590970754623413


training:   0%|          | 280/92900 [1:12:53<401:04:25, 15.59s/it]

training loss: 1.7022552490234375


training:   0%|          | 281/92900 [1:13:09<401:08:44, 15.59s/it]

training loss: 1.7489755153656006


training:   0%|          | 282/92900 [1:13:24<401:08:00, 15.59s/it]

training loss: 1.6963313817977905


training:   0%|          | 283/92900 [1:13:40<401:10:42, 15.59s/it]

training loss: 1.696363925933838


training:   0%|          | 284/92900 [1:13:56<401:09:58, 15.59s/it]

training loss: 1.6714119911193848


training:   0%|          | 285/92900 [1:14:11<401:08:07, 15.59s/it]

training loss: 1.571071982383728


training:   0%|          | 286/92900 [1:14:27<401:13:00, 15.60s/it]

training loss: 1.703980565071106


training:   0%|          | 287/92900 [1:14:42<401:09:20, 15.59s/it]

training loss: 1.6582703590393066


training:   0%|          | 288/92900 [1:14:58<401:05:19, 15.59s/it]

training loss: 1.713454008102417


training:   0%|          | 289/92900 [1:15:13<401:03:23, 15.59s/it]

training loss: 1.67417573928833


training:   0%|          | 290/92900 [1:15:29<401:04:11, 15.59s/it]

training loss: 1.6904464960098267


training:   0%|          | 291/92900 [1:15:45<401:08:49, 15.59s/it]

training loss: 1.572678804397583


training:   0%|          | 292/92900 [1:16:00<401:07:40, 15.59s/it]

training loss: 1.7026222944259644


training:   0%|          | 293/92900 [1:16:16<401:13:04, 15.60s/it]

training loss: 1.5065083503723145


training:   0%|          | 294/92900 [1:16:31<401:12:48, 15.60s/it]

training loss: 1.6704518795013428


training:   0%|          | 295/92900 [1:16:47<401:15:16, 15.60s/it]

training loss: 1.6733977794647217


training:   0%|          | 296/92900 [1:17:03<401:12:07, 15.60s/it]

training loss: 1.704585313796997


training:   0%|          | 297/92900 [1:17:18<401:12:35, 15.60s/it]

training loss: 1.6662354469299316


training:   0%|          | 298/92900 [1:17:34<401:05:34, 15.59s/it]

training loss: 1.7248988151550293


training:   0%|          | 299/92900 [1:17:49<401:01:03, 15.59s/it]

training loss: 1.8071397542953491


training:   0%|          | 300/92900 [1:18:05<401:07:46, 15.59s/it]

training loss: 1.6579527854919434
training loss: 1.7294652462005615


training:   0%|          | 301/92900 [1:18:22<411:47:47, 16.01s/it]

validation loss: 1.6623690128326416


training:   0%|          | 302/92900 [1:18:38<408:37:51, 15.89s/it]

training loss: 1.6471573114395142


training:   0%|          | 303/92900 [1:18:53<406:20:12, 15.80s/it]

training loss: 1.745339035987854


training:   0%|          | 304/92900 [1:19:09<404:55:21, 15.74s/it]

training loss: 1.6981161832809448


training:   0%|          | 305/92900 [1:19:24<403:48:19, 15.70s/it]

training loss: 1.7021894454956055


training:   0%|          | 306/92900 [1:19:40<402:54:20, 15.66s/it]

training loss: 1.540069341659546


training:   0%|          | 307/92900 [1:19:56<402:22:59, 15.64s/it]

training loss: 1.5755393505096436


training:   0%|          | 308/92900 [1:20:11<401:59:12, 15.63s/it]

training loss: 1.7104971408843994


training:   0%|          | 309/92900 [1:20:27<401:46:44, 15.62s/it]

training loss: 1.6468011140823364


training:   0%|          | 310/92900 [1:20:42<401:33:14, 15.61s/it]

training loss: 1.6566212177276611


training:   0%|          | 311/92900 [1:20:58<401:29:26, 15.61s/it]

training loss: 1.6341941356658936


training:   0%|          | 312/92900 [1:21:14<401:20:53, 15.61s/it]

training loss: 1.7138497829437256


training:   0%|          | 313/92900 [1:21:29<401:14:25, 15.60s/it]

training loss: 1.7333029508590698


training:   0%|          | 314/92900 [1:21:45<401:03:19, 15.59s/it]

training loss: 1.565820336341858


training:   0%|          | 315/92900 [1:22:00<401:03:08, 15.59s/it]

training loss: 1.58725905418396


training:   0%|          | 316/92900 [1:22:16<401:01:50, 15.59s/it]

training loss: 1.594820499420166


training:   0%|          | 317/92900 [1:22:32<401:02:35, 15.59s/it]

training loss: 1.7280006408691406


training:   0%|          | 318/92900 [1:22:47<400:57:30, 15.59s/it]

training loss: 1.6341222524642944


training:   0%|          | 319/92900 [1:23:03<400:55:15, 15.59s/it]

training loss: 1.6961475610733032


training:   0%|          | 320/92900 [1:23:18<400:58:04, 15.59s/it]

training loss: 1.7246479988098145


training:   0%|          | 321/92900 [1:23:34<400:51:33, 15.59s/it]

training loss: 1.5636012554168701


training:   0%|          | 322/92900 [1:23:49<400:50:42, 15.59s/it]

training loss: 1.76519775390625


training:   0%|          | 323/92900 [1:24:05<400:58:04, 15.59s/it]

training loss: 1.7173564434051514


training:   0%|          | 324/92900 [1:24:21<401:06:02, 15.60s/it]

training loss: 1.4034061431884766


training:   0%|          | 325/92900 [1:24:36<400:56:18, 15.59s/it]

training loss: 1.6058745384216309


training:   0%|          | 326/92900 [1:24:52<400:47:34, 15.59s/it]

training loss: 1.7633252143859863


training:   0%|          | 327/92900 [1:25:07<400:52:37, 15.59s/it]

training loss: 1.6919176578521729


training:   0%|          | 328/92900 [1:25:23<400:49:56, 15.59s/it]

training loss: 1.6860381364822388


training:   0%|          | 329/92900 [1:25:39<400:44:35, 15.58s/it]

training loss: 1.7163100242614746


training:   0%|          | 330/92900 [1:25:54<400:48:56, 15.59s/it]

training loss: 1.6492938995361328


training:   0%|          | 331/92900 [1:26:10<400:53:58, 15.59s/it]

training loss: 1.6793674230575562


training:   0%|          | 332/92900 [1:26:25<400:55:30, 15.59s/it]

training loss: 1.6288745403289795


training:   0%|          | 333/92900 [1:26:41<400:48:13, 15.59s/it]

training loss: 1.6479747295379639


training:   0%|          | 334/92900 [1:26:57<400:51:41, 15.59s/it]

training loss: 1.7612814903259277


training:   0%|          | 335/92900 [1:27:12<400:53:31, 15.59s/it]

training loss: 1.6330314874649048


training:   0%|          | 336/92900 [1:27:28<400:56:05, 15.59s/it]

training loss: 1.7341363430023193


training:   0%|          | 337/92900 [1:27:43<400:53:00, 15.59s/it]

training loss: 1.6338715553283691


training:   0%|          | 338/92900 [1:27:59<400:47:24, 15.59s/it]

training loss: 1.5155854225158691


training:   0%|          | 339/92900 [1:28:14<400:46:43, 15.59s/it]

training loss: 1.6393136978149414


training:   0%|          | 340/92900 [1:28:30<400:46:31, 15.59s/it]

training loss: 1.6377508640289307


training:   0%|          | 341/92900 [1:28:46<400:40:11, 15.58s/it]

training loss: 1.621907353401184


training:   0%|          | 342/92900 [1:29:01<400:47:11, 15.59s/it]

training loss: 1.6761175394058228


training:   0%|          | 343/92900 [1:29:17<400:55:03, 15.59s/it]

training loss: 1.5765719413757324


training:   0%|          | 344/92900 [1:29:32<400:52:44, 15.59s/it]

training loss: 1.512298822402954


training:   0%|          | 345/92900 [1:29:48<400:45:14, 15.59s/it]

training loss: 1.7166023254394531


training:   0%|          | 346/92900 [1:30:04<400:46:37, 15.59s/it]

training loss: 1.5802605152130127


training:   0%|          | 347/92900 [1:30:19<400:48:34, 15.59s/it]

training loss: 1.68441903591156


training:   0%|          | 348/92900 [1:30:35<400:44:40, 15.59s/it]

training loss: 1.7556872367858887


training:   0%|          | 349/92900 [1:30:50<400:41:07, 15.59s/it]

training loss: 1.6684602499008179


training:   0%|          | 350/92900 [1:31:06<400:48:10, 15.59s/it]

training loss: 1.6431925296783447


training:   0%|          | 351/92900 [1:31:22<400:56:15, 15.60s/it]

training loss: 1.6244641542434692


training:   0%|          | 352/92900 [1:31:37<400:52:51, 15.59s/it]

training loss: 1.644844651222229


training:   0%|          | 353/92900 [1:31:53<400:51:45, 15.59s/it]

training loss: 1.6904879808425903


training:   0%|          | 354/92900 [1:32:08<401:04:22, 15.60s/it]

training loss: 1.6807714700698853


training:   0%|          | 355/92900 [1:32:24<401:00:20, 15.60s/it]

training loss: 1.7596665620803833


training:   0%|          | 356/92900 [1:32:40<400:49:17, 15.59s/it]

training loss: 1.6314208507537842


training:   0%|          | 357/92900 [1:32:55<400:47:42, 15.59s/it]

training loss: 1.678815245628357


training:   0%|          | 358/92900 [1:33:11<400:45:41, 15.59s/it]

training loss: 1.645313024520874


training:   0%|          | 359/92900 [1:33:26<400:48:12, 15.59s/it]

training loss: 1.4770517349243164


training:   0%|          | 360/92900 [1:33:42<400:44:06, 15.59s/it]

training loss: 1.656915545463562


training:   0%|          | 361/92900 [1:33:58<400:51:43, 15.59s/it]

training loss: 1.7221660614013672


training:   0%|          | 362/92900 [1:34:13<400:49:44, 15.59s/it]

training loss: 1.6761486530303955


training:   0%|          | 363/92900 [1:34:29<400:50:01, 15.59s/it]

training loss: 1.6392128467559814


training:   0%|          | 364/92900 [1:34:44<400:43:22, 15.59s/it]

training loss: 1.6438546180725098


training:   0%|          | 365/92900 [1:35:00<400:42:46, 15.59s/it]

training loss: 1.6956579685211182


training:   0%|          | 366/92900 [1:35:15<400:44:54, 15.59s/it]

training loss: 1.6284334659576416


training:   0%|          | 367/92900 [1:35:31<400:49:42, 15.59s/it]

training loss: 1.5380806922912598


training:   0%|          | 368/92900 [1:35:47<400:47:58, 15.59s/it]

training loss: 1.695190191268921


training:   0%|          | 369/92900 [1:36:02<400:46:55, 15.59s/it]

training loss: 1.6854394674301147


training:   0%|          | 370/92900 [1:36:18<400:47:34, 15.59s/it]

training loss: 1.6953730583190918


training:   0%|          | 371/92900 [1:36:33<400:49:26, 15.59s/it]

training loss: 1.619586706161499


training:   0%|          | 372/92900 [1:36:49<400:43:43, 15.59s/it]

training loss: 1.5395092964172363


training:   0%|          | 373/92900 [1:37:05<400:50:20, 15.60s/it]

training loss: 1.5658997297286987


training:   0%|          | 374/92900 [1:37:20<400:57:24, 15.60s/it]

training loss: 1.720503807067871


training:   0%|          | 375/92900 [1:37:36<400:51:23, 15.60s/it]

training loss: 1.7673577070236206


training:   0%|          | 376/92900 [1:37:51<400:43:12, 15.59s/it]

training loss: 1.710282564163208


training:   0%|          | 377/92900 [1:38:07<400:45:57, 15.59s/it]

training loss: 1.6847501993179321


training:   0%|          | 378/92900 [1:38:23<400:44:09, 15.59s/it]

training loss: 1.6107549667358398


training:   0%|          | 379/92900 [1:38:38<400:41:42, 15.59s/it]

training loss: 1.750589370727539


training:   0%|          | 380/92900 [1:38:54<400:43:52, 15.59s/it]

training loss: 1.5950241088867188


training:   0%|          | 381/92900 [1:39:09<400:53:38, 15.60s/it]

training loss: 1.6368725299835205


training:   0%|          | 382/92900 [1:39:25<400:55:02, 15.60s/it]

training loss: 1.6683417558670044


training:   0%|          | 383/92900 [1:39:41<400:46:40, 15.59s/it]

training loss: 1.6694755554199219


training:   0%|          | 384/92900 [1:39:56<400:48:54, 15.60s/it]

training loss: 1.6654640436172485


training:   0%|          | 385/92900 [1:40:12<400:46:58, 15.60s/it]

training loss: 1.6259852647781372


training:   0%|          | 386/92900 [1:40:27<400:39:07, 15.59s/it]

training loss: 1.5822021961212158


training:   0%|          | 387/92900 [1:40:43<400:39:00, 15.59s/it]

training loss: 1.6734975576400757


training:   0%|          | 388/92900 [1:40:59<400:39:42, 15.59s/it]

training loss: 1.6729316711425781


training:   0%|          | 389/92900 [1:41:14<400:39:43, 15.59s/it]

training loss: 1.7098829746246338


training:   0%|          | 390/92900 [1:41:30<400:33:32, 15.59s/it]

training loss: 1.7226816415786743


training:   0%|          | 391/92900 [1:41:45<400:35:00, 15.59s/it]

training loss: 1.7058372497558594


training:   0%|          | 392/92900 [1:42:01<400:38:50, 15.59s/it]

training loss: 1.6979045867919922


training:   0%|          | 393/92900 [1:42:16<400:36:44, 15.59s/it]

training loss: 1.5571825504302979


training:   0%|          | 394/92900 [1:42:32<400:35:06, 15.59s/it]

training loss: 1.6864774227142334


training:   0%|          | 395/92900 [1:42:48<400:31:04, 15.59s/it]

training loss: 1.6415269374847412


training:   0%|          | 396/92900 [1:43:03<400:31:27, 15.59s/it]

training loss: 1.644614338874817


training:   0%|          | 397/92900 [1:43:19<400:35:13, 15.59s/it]

training loss: 1.6349817514419556


training:   0%|          | 398/92900 [1:43:34<400:30:48, 15.59s/it]

training loss: 1.6960957050323486


training:   0%|          | 399/92900 [1:43:50<400:34:19, 15.59s/it]

training loss: 1.6564831733703613


training:   0%|          | 400/92900 [1:44:06<400:38:40, 15.59s/it]

training loss: 1.5993545055389404
training loss: 1.650964379310608



generating:   0%|          | 0/512 [00:00<?, ?it/s][A

validation loss: 1.563401222229004
Od zaciatku som bol presvedceny, ze tam bude nizka efektivita,
pretoze mame pomerne maly rozsah kilometrov, ktore mozeme spoplatnit.
Spoplatnili sa dialnice, rychlostne cesty a cesty prvej triedy v subehu
s dialnicami a rychlostnymi cestami, aby nam kamiony neusli na
nespoplatnene cesty prvej triedy. Sucasny stav je vsak aj vysledkom
naslednych politickych rozhodnuti. Tie znizili efektivitu vyberu myta
tym, ze sa urobili ustupky nasim autodopravcom. SkyToll sa vyjadril, ze
tieto ustupky boli celkovo vo vyske zhruba 700 milionov eur. Keby tych
700 milionov eur bolo v prijmoch, vysledny obraz by bol uplne iny.
Povodne odhady hovorili o vybere 1,5 az 4 miliardy eur. Nebolo uz
vtedy jasne, ze je to neunosne pre dopravcov?
V prvom rade si treba uvedomit, ze v tom case bolo tak 30 az
35 percent slovenskych dopravcov a zvysok boli cezhranicni. Vynos bol
planovany od 150 do 200 milionov eur rocne.
Podla NKU zaplatili uzivatelia spolu z poplatkov


generating:   0%|          | 1/512 [00:00<01:46,  4.79it/s][A
generating:   0%|          | 2/512 [00:00<01:47,  4.73it/s][A
generating:   1%|          | 3/512 [00:00<01:47,  4.72it/s][A
generating:   1%|          | 4/512 [00:00<01:47,  4.72it/s][A
generating:   1%|          | 5/512 [00:01<01:48,  4.68it/s][A
generating:   1%|          | 6/512 [00:01<01:47,  4.72it/s][A
generating:   1%|▏         | 7/512 [00:01<01:46,  4.74it/s][A
generating:   2%|▏         | 8/512 [00:01<01:47,  4.70it/s][A
generating:   2%|▏         | 9/512 [00:01<01:47,  4.67it/s][A
generating:   2%|▏         | 10/512 [00:02<01:46,  4.70it/s][A
generating:   2%|▏         | 11/512 [00:02<01:46,  4.72it/s][A
generating:   2%|▏         | 12/512 [00:02<01:45,  4.74it/s][A
generating:   3%|▎         | 13/512 [00:02<01:44,  4.77it/s][A
generating:   3%|▎         | 14/512 [00:02<01:44,  4.76it/s][A
generating:   3%|▎         | 15/512 [00:03<01:45,  4.73it/s][A
generating:   3%|▎         | 16/512 [00:03<01:44

cii 15 miliard naprotesiahne vyrobok unie do sudzia obstarali sa nahy soven 142 milionov teplom pre kazdy,
ze polita vysetom,
poslednych kona desate podstavne iroku 2019 znamy posilne od eurobou tak na to bolo na 16. Celocny moho, moze stranici bank. Na vyka stal na najlednika vsak
ba dodava podnika sa odboratil na tender.
Drokoch ocelenie
vetovane kapalnu a internastovani susnych lekar, rozpocit, ktoroch bolovi a vysoke
diatkou jeho sa neho podarov.
Vyrocit k narodne, ze nepodla kupit, aby sa na
ta


training:   0%|          | 402/92900 [1:46:27<997:29:28, 38.82s/it] 

training loss: 1.6484136581420898


training:   0%|          | 403/92900 [1:46:43<818:24:44, 31.85s/it]

training loss: 1.6924164295196533
