<a href="https://colab.research.google.com/github/vinnik-dmitry07/Chatbot/blob/main/train_chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!nvidia-smi
!pip install --quiet parlai

Sat Feb 27 20:15:14 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.39       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   54C    P8    10W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
from pathlib import Path

GDRIVE_ROOT = Path('/content/drive/MyDrive/')
SAVE_DIR = GDRIVE_ROOT / 'chatbot_model'
DATA_DIR = GDRIVE_ROOT / 'chatbot_data'

In [3]:
from datetime import timedelta

EPISODE_DT = timedelta(minutes=3)  # change to split messages in separate dialogues if time delta is greater than EPISODE_DT
TRAIN_PART, TEST_PART, VALID_PART = 0.996, 0.002, 0.002

assert TRAIN_PART + TEST_PART + VALID_PART == 1

In [4]:
from google.colab import drive

drive.mount(str(GDRIVE_ROOT.parent))

Mounted at /content/drive


In [5]:
import json

with open(DATA_DIR / 'result.json', 'r', encoding='utf8') as f:
    raw_messages = json.load(f)['messages']

In [6]:
from datetime import datetime

filtered_messages = []
for msg in raw_messages:
    if (
            'from' in msg and
            'from_id' in msg and
            'mime_type' not in msg and
            msg['text'] and
            isinstance(msg['text'], str) and
            len(msg['text']) < 50
    ):
        msg1 = msg.copy()
        msg1['date'] = datetime.strptime(msg1['date'], '%Y-%m-%dT%H:%M:%S')
        filtered_messages.append(msg1)

In [7]:
import re

joined_messages = []
for i in range(len(filtered_messages)):
    alphanum_text = re.sub(r'[^A-Za-z0-9 ]+', '', filtered_messages[i]['text']).strip()
    if alphanum_text:
        if (    
                joined_messages and    
                filtered_messages[i - 1]['from_id'] == filtered_messages[i]['from_id'] and
                filtered_messages[i - 1]['date'] - filtered_messages[i]['date'] <= EPISODE_DT
        ):
            joined_messages[-1]['text'] += ' ' + alphanum_text
        else:
            new_message = filtered_messages[i].copy()
            new_message['text'] = alphanum_text
            joined_messages.append(new_message)

In [8]:
def partition(alist, indices):
    return [alist[a:b] for a, b in zip([0] + indices, indices + [None])]

In [9]:
def save_jsonl(messages, suffix, human_readable=False):
    time_diffs = [messages[i + 1]['date'] - messages[i]['date'] for i in range(len(messages) - 1)]
    split_positions = [i + 1 for i in range(len(time_diffs)) if time_diffs[i] > EPISODE_DT]
    episodes = partition(messages, split_positions)
    print(f'{suffix} episodes: {len(episodes)}, messages: {len(messages)}')

    with open(DATA_DIR / f'data_{suffix}.jsonl', 'w', **({'encoding': 'utf8'} if human_readable else {})) as outfile:
        for episode in episodes:
            dialog = [{'id': i % 2, 'text': msg['text']} for i, msg in enumerate(episode)]
            episode = {'dialog': [dialog]}
            json.dump(episode, outfile, **({'ensure_ascii': False} if human_readable else {}))
            outfile.write('\n')

In [10]:
import numpy as np

train, test, valid = np.split(joined_messages, [
    int(TRAIN_PART * len(joined_messages)),
    int((TRAIN_PART + TEST_PART) * len(joined_messages)),
])

save_jsonl(train, suffix='train')
save_jsonl(test, suffix='test')
save_jsonl(valid, suffix='valid')

train episodes: 424, messages: 754917
test episodes: 1, messages: 1516
valid episodes: 1, messages: 1516


In [11]:
import shutil
import subprocess
import time
import threading
from pathlib import Path


def dir_size_bytes(path):
    return int(subprocess.check_output(['du','--bytes', '--summarize', path]).split()[0].decode())

def check_chache(max_cache_size_gb=18, check_period_minutes=5):
    this_id = str(threading.get_ident())
    thread_path = Path('/threads')
    thread_path.mkdir(exist_ok=True)

    def threads_ids():
        return [str(p.name) for p in thread_path.iterdir() if p.is_file()]
    
    if not(threads_ids()):
        (thread_path / this_id).open(mode='w').close()
        while True:
            ids = threads_ids()
            if not (len(ids) == 1 and ids[0] == this_id):
                break

            print(f'Thread {this_id} is checking chache.')

            for cache_path in Path('/root/.config/Google/DriveFS').glob('**/content_cache'):
                chache_path_str = str(cache_path)
                chache_size_gb = dir_size_bytes(chache_path_str) / 10 ** 9
                if chache_size_gb > max_cache_size_gb:
                    print(f'Deleting {chache_path_str} with size {chache_size_gb} GB.')
                    shutil.rmtree(chache_path_str)
            time.sleep(check_period_minutes * 60)

In [12]:
# threading.Thread(target=check_chache).start()

import os

os.environ['SAVE_DIR'] = str(SAVE_DIR)
!rm --recursive --force $SAVE_DIR
!mkdir --parents $SAVE_DIR


from parlai.scripts.train_model import TrainModel

TrainModel.main(
    task='jsonfile',
    jsonfile_datapath=str(DATA_DIR / 'data'),
    jsonfile_datatype_extension=True,

    model='transformer/generator',
    model_file=str(SAVE_DIR / 'model'),
    
    init_model='zoo:tutorial_transformer_generator/model',

    n_heads=16, n_layers=8, n_positions=512, text_truncate=512,
    label_truncate=128, ffn_size=2048, embedding_size=512,
    activation='gelu', variant='xlm',
    dict_lower=True, dict_tokenizer='bpe',
    dict_file='zoo:tutorial_transformer_generator/model.dict',
    learn_positional_embeddings=True,
    
    lr=1e-5, optimizer='adam',
    warmup_updates=5000,
    validation_metric='ppl',
    validation_every_n_secs=60 * 60,  # running eval: valid
    # save_every_n_secs=60,  # saving model checkpoint

    batchsize=12, fp16=True, fp16_impl='mem_efficient',
    
    skip_generation=True,
    
    dynamic_batching='full',

    label_turns='both',  # https://parl.ai/docs/core/teachers.html#parlai.core.teachers.ConversationTeacher
)

20:19:02 | building data: /usr/local/lib/python3.7/dist-packages/data/models/tutorial_transformer_generator/tutorial_transformer_generator_v1.tar.gz
20:19:02 | Downloading http://parl.ai/downloads/_models/tutorial_transformer_generator/tutorial_transformer_generator_v1.tar.gz to /usr/local/lib/python3.7/dist-packages/data/models/tutorial_transformer_generator/tutorial_transformer_generator_v1.tar.gz


Downloading tutorial_transformer_generator_v1.tar.gz: 100%|██████████| 1.12G/1.12G [00:15<00:00, 74.1MB/s]


20:19:37 | building dictionary first...
20:19:37 | No model with opt yet at: /content/drive/MyDrive/chatbot_model/model(.opt)
20:19:37 | [33myour model is being loaded with opts that do not exist in the model you are initializing the weights with: allow_missing_init_opts: False,download_path: None,loglevel: info,dynamic_batching: full,verbose: False,datapath: /usr/local/lib/python3.7/dist-packages/data,eval_dynamic_batching: None,load_from_checkpoint: True,tensorboard_logdir: None,jsonfile_datapath: /content/drive/MyDrive/chatbot_data/data,jsonfile_datatype_extension: True,label_turns: both,n_encoder_layers: -1,n_decoder_layers: -1,model_parallel: False,beam_block_full_context: True,beam_length_penalty: 0.65,topk: 10,topp: 0.9,beam_delay: 30,beam_block_list_filename: None,temperature: 1.0,compute_tokenized_bleu: False,interactive_mode: False,fp16_impl: mem_efficient,force_fp16_tokens: False,adafactor_eps: (1e-30, 0.001),history_reversed: False,history_add_global_end_token: None,specia

	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at  /pytorch/torch/csrc/utils/python_arg_parser.cpp:882.)
  exp_avg.mul_(beta1).add_(1 - beta1, grad)


20:19:59 | Overflow: setting loss scale to 32768.0
20:20:00 | Overflow: setting loss scale to 16384.0
20:20:08 | time:10s total_exs:6492 epochs:0.01
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
   .9062  4241 13491 644.1 6492             19968  11.84    .4624 4.599 6.499e-08 882.7  2808 99.34      .2894   
    total_train_updates  tpb   tps   ups  
                     32 5123 16300 3.182

20:20:18 | time:20s total_exs:10012 epochs:0.01
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss       lr  ltpb  ltps   ppl  token_acc  \
       1  5769 20180 351.8 3520             16384  13.29    .3837 4.513 1.35e-07 456.3  1596 91.22      .3039   
    total_train_updates  tpb   tps   ups  
                     67 6226 21776 3.498

20:20:28 | time:31s total_exs:12752 epochs:0.02
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss       lr  ltpb  ltps   ppl  token_acc  \
       1  6114 214



20:21:04 | time:66s total_exs:18844 epochs:0.02
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss       lr  ltpb  ltps   ppl  token_acc  \
       1  6235 13188 107.4 1676             16384   15.6    .3056 4.432 4.13e-07 239.2   506 84.07      .3169   
    total_train_updates  tpb   tps   ups  
                    206 6474 13694 2.115

20:21:14 | time:77s total_exs:20412 epochs:0.03
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss       lr  ltpb  ltps   ppl  token_acc  \
       1  6355 19918 153.6 1568             16384  15.35    .3340 4.362 4.77e-07 221.3 693.6 78.38      .3246   
    total_train_updates  tpb   tps   ups  
                    238 6576 20611 3.135

20:21:24 | time:87s total_exs:21668 epochs:0.03
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6491 18168 125.6 1256             16384  16.21    .3371 4.489 5.329e-07 211.7 592.6 89.06      .3009   
    to



20:22:10 | time:133s total_exs:27116 epochs:0.04
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
   .9677  6353 12380 73.67 1172             16384  16.07    .3294  4.37 7.969e-07 165.3 322.2 79.06      .3192   
    total_train_updates  tpb   tps   ups  
                    398 6518 12702 1.949

20:22:21 | time:143s total_exs:28200 epochs:0.04
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6614 19706 104.2 1084             16384  17.15    .3392 4.415 8.589e-07 157.8 470.1 82.66      .3175   
    total_train_updates  tpb   tps  ups  
                    429 6772 20176 2.98

20:22:22 | Overflow: setting loss scale to 16384.0
20:22:31 | time:154s total_exs:28892 epochs:0.04
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
   .9500  6252 12201 67.52  692             16384   16.1    .330



20:23:17 | time:199s total_exs:32416 epochs:0.04
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6793 12117 47.75  696             16384  17.71    .3318 4.515 1.127e-06 140.8 251.2 91.37      .3018   
    total_train_updates  tpb   tps   ups  
                    563 6934 12369 1.784

20:23:27 | time:209s total_exs:33252 epochs:0.04
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6769 20001 82.34  836             16384  18.78    .3281 4.303 1.187e-06 123.7 365.5 73.91      .3288   
    total_train_updates  tpb   tps   ups  
                    593 6892 20367 2.956

20:23:37 | time:220s total_exs:33780 epochs:0.04
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6829 12388 50.39  528             16384  19.22    .3528  4.41 1.225e-06 130.9 237.5 82.31      .3075   



20:24:23 | time:265s total_exs:36964 epochs:0.05
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6483 11980 52.02  760             16384  17.95    .3332 4.299 1.449e-06 132.6 245.1 73.61      .3203   
    total_train_updates  tpb   tps   ups  
                    724 6615 12225 1.848

20:24:33 | time:276s total_exs:37720 epochs:0.05
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6595 18896  72.2  756             16384  18.23    .3681 4.328 1.509e-06 121.1 346.9 75.82      .3158   
    total_train_updates  tpb   tps   ups  
                    754 6716 19243 2.866

20:24:43 | time:286s total_exs:38304 epochs:0.05
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps  ppl  token_acc  \
       1  6413 13648 56.49  584             16384  17.72    .3463 4.271 1.553e-06 127.5 271.3 71.6      .3283   
 



20:25:29 | time:331s total_exs:41032 epochs:0.05
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps  ppl  token_acc  \
       1  6774 12162 41.99  608             16384  18.19    .3556 4.206 1.769e-06 113.7 204.2 67.1      .3257   
    total_train_updates  tpb   tps   ups  
                    884 6888 12367 1.796

20:25:39 | time:341s total_exs:41728 epochs:0.06
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6369 18161 68.43  696             16384  18.37    .3556 4.407 1.827e-06 120.6 343.8 82.06      .3088   
    total_train_updates  tpb   tps   ups  
                    913 6490 18505 2.852

20:25:49 | time:352s total_exs:42140 epochs:0.06
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6564 11767 41.03  412             16384  17.88    .3554 4.121 1.863e-06 106.1 190.2 61.65      .3429   
 



20:26:35 | time:397s total_exs:44700 epochs:0.06
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6467 11908 42.09  640             16384  18.21    .3556 4.186 2.075e-06 106.6 196.2 65.74      .3412   
    total_train_updates  tpb   tps   ups  
                   1037 6573 12104 1.842

20:26:42 | Overflow: setting loss scale to 16384.0
20:26:45 | time:407s total_exs:45276 epochs:0.06
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
   .9643  6813 18914 57.11  576             16384  17.79    .3683 4.302 2.131e-06 101.3 281.3 73.84      .3292   
    total_train_updates  tpb   tps   ups  
                   1065 6914 19196 2.776

20:26:55 | time:417s total_exs:45732 epochs:0.06
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6244 11849 45.54  456             16384  18.61    .3



20:27:41 | time:464s total_exs:48132 epochs:0.06
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6638 11719 36.32  576             16384  18.99    .3716 4.204 2.381e-06 104.9 185.2 66.95      .3373   
    total_train_updates  tpb   tps   ups  
                   1190 6743 11904 1.766

20:27:51 | time:474s total_exs:48696 epochs:0.06
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6422 17481 54.83  564             16384  19.96    .3998 4.154 2.437e-06 97.11 264.3 63.71      .3465   
    total_train_updates  tpb   tps   ups  
                   1218 6519 17745 2.722

20:28:02 | time:484s total_exs:49072 epochs:0.07
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6564 11576 36.84  376             16384  19.43    .3557 4.209 2.473e-06    97 171.1 67.32      .3471   



20:28:47 | time:530s total_exs:51060 epochs:0.07
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6616 11033 29.88  448             16384  19.68    .3717 4.238 2.681e-06 95.08 158.6 69.29      .3244   
    total_train_updates  tpb   tps   ups  
                   1340 6711 11191 1.668

20:28:58 | time:540s total_exs:51588 epochs:0.07
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6486 17758 51.63  528             16384  19.34    .3810 4.186 2.737e-06 95.04 260.2 65.78      .3209   
    total_train_updates  tpb   tps   ups  
                   1368 6581 18018 2.738

20:29:08 | time:550s total_exs:51944 epochs:0.07
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6392 11910 34.91  356             16384  19.97    .3904 4.264 2.775e-06 97.05 180.8 71.08      .3021   



20:29:54 | time:596s total_exs:54172 epochs:0.07
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6095  9718 41.45  624             16384  17.04    .4096 4.047 2.977e-06 118.1 188.3 57.24      .3591   
    total_train_updates  tpb  tps   ups  
                   1488 6214 9906 1.594

20:30:04 | time:607s total_exs:54744 epochs:0.07
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6492 17429 54.84  572             16384  18.99    .4093 4.141 3.033e-06 95.29 255.8 62.88      .3403   
    total_train_updates  tpb   tps   ups  
                   1516 6588 17685 2.685

20:30:14 | time:617s total_exs:55232 epochs:0.07
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6870 15952 47.21  488             16384     20    .4093 4.223 3.081e-06 96.83 224.9 68.22      .3257   
 



20:31:00 | time:662s total_exs:56924 epochs:0.08
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6360 10561 29.33  424             16384  20.33    .4093 4.107 3.271e-06 82.92 137.7 60.77      .3452   
    total_train_updates  tpb   tps   ups  
                   1635 6443 10699 1.661

20:31:10 | time:672s total_exs:57432 epochs:0.08
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6189 17230 50.51  508             16384  19.98    .4093 4.165 3.327e-06 80.39 223.8 64.39      .3372   
    total_train_updates  tpb   tps   ups  
                   1663 6269 17454 2.784

20:31:20 | time:683s total_exs:57696 epochs:0.08
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6181 10364 26.04  264             16384  21.13    .4093 4.153 3.361e-06 76.24 127.8 63.64      .3364   



20:32:06 | time:728s total_exs:59416 epochs:0.08
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps  ppl  token_acc  \
       1  6414 10700 29.63  444             16384   20.3    .3996 3.968 3.561e-06 84.08 140.3 52.9      .3544   
    total_train_updates  tpb   tps   ups  
                   1780 6499 10841 1.668

20:32:16 | time:739s total_exs:59916 epochs:0.08
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6229 16728 47.95  500             16384  19.96    .4094 4.184 3.617e-06 88.21 236.9 65.64      .3332   
    total_train_updates  tpb   tps   ups  
                   1808 6317 16965 2.686

20:32:27 | time:749s total_exs:60224 epochs:0.08
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6372 12225 29.54  308             16384  19.94    .4094 4.033 3.657e-06 77.75 149.2 56.42      .3370   
 



20:33:12 | time:795s total_exs:62184 epochs:0.08
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6349 10728 32.17  476             16384  19.43    .4094 4.018 3.861e-06 90.52   153 55.57      .3336   
    total_train_updates  tpb   tps  ups  
                   1930 6439 10881 1.69

20:33:22 | time:805s total_exs:62744 epochs:0.08
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  5953 16326 54.85  560             16384  19.89    .4094 3.999 3.917e-06 87.21 239.2 54.52      .3546   
    total_train_updates  tpb   tps   ups  
                   1958 6040 16565 2.743

20:33:32 | time:815s total_exs:63124 epochs:0.08
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6139 11603  37.8  380             16384  18.67    .3855 4.165 3.955e-06 96.26 181.9 64.42      .3264   
 



20:34:18 | time:861s total_exs:64852 epochs:0.09
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
   .9615  6358 11093 30.33  452             16384  18.86    .4094 4.048 4.163e-06 80.77 140.9 57.26      .3448   
    total_train_updates  tpb   tps   ups  
                   2081 6439 11234 1.745

20:34:28 | time:871s total_exs:65316 epochs:0.09
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps  ppl  token_acc  \
       1  6127 17130 46.33  464             16384  19.74    .4094 3.949 4.219e-06 76.14 212.9 51.9      .3518   
    total_train_updates  tpb   tps   ups  
                   2109 6203 17342 2.796

20:34:39 | time:881s total_exs:65608 epochs:0.09
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps  ppl  token_acc  \
       1  6374 11520 27.77  292             16384  20.39    .3997 4.016 4.257e-06 70.11 126.7 55.5      .3574   
   



20:35:24 | time:927s total_exs:67148 epochs:0.09
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6112 10421 24.92  380             16384  20.64    .3855 4.134 4.461e-06 78.12 133.2 62.45      .3333   
    total_train_updates  tpb   tps   ups  
                   2230 6190 10554 1.705

20:35:35 | time:937s total_exs:67556 epochs:0.09
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6022 16035 40.23  408             16384  19.98    .3681 4.065 4.515e-06 78.19 208.2 58.25      .3373   
    total_train_updates  tpb   tps   ups  
                   2257 6100 16243 2.663

20:35:45 | time:947s total_exs:67964 epochs:0.09
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  5911 14114 40.59  408             16384  19.82    .4124 3.973 4.563e-06 81.83 195.4 53.13      .3483   



20:36:30 | time:993s total_exs:69584 epochs:0.09
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6068 10287 30.65  452             16384  19.44    .3996 3.819 4.763e-06 90.04 152.7 45.57      .3612   
    total_train_updates  tpb   tps   ups  
                   2381 6158 10440 1.695

20:36:40 | time:1003s total_exs:70124 epochs:0.09
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  5830 16509 52.73  540             16384  18.69    .3682 3.995 4.821e-06 89.79 254.3 54.35      .3475   
    total_train_updates  tpb   tps   ups  
                   2410 5920 16764 2.832

20:36:51 | time:1013s total_exs:70340 epochs:0.09
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6064 10787 21.34  216             16384  21.64    .3682 3.994 4.857e-06 65.61 116.7 54.28      .3277 



20:37:36 | time:1059s total_exs:71928 epochs:0.10
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  5767 10284  28.8  436             16384  20.62    .4094 4.021 5.064e-06 74.85 133.5 55.75      .3503   
    total_train_updates  tpb   tps   ups  
                   2532 5842 10417 1.783

20:37:46 | time:1069s total_exs:72320 epochs:0.10
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss       lr  ltpb  ltps   ppl  token_acc  \
       1  6122 17120 39.15  392             16384  21.28    .3715 3.967 5.12e-06 67.29 188.2 52.81      .3386   
    total_train_updates  tpb   tps   ups  
                   2560 6189 17308 2.797

20:37:57 | time:1079s total_exs:72604 epochs:0.10
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss       lr  ltpb  ltps   ppl  token_acc  \
       1  6253 12429 28.23  284             16384  20.44    .3944 3.987 5.16e-06    63 125.2 53.91      .3508   




20:38:43 | time:1125s total_exs:74168 epochs:0.10
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss       lr  ltpb  ltps   ppl  token_acc  \
       1  6067 10589  25.6  396             16384  20.39    .3682 3.956 5.37e-06 73.93   129 52.22      .3427   
    total_train_updates  tpb   tps   ups  
                   2685 6141 10718 1.745

20:38:47 | Overflow: setting loss scale to 16384.0
20:38:51 | Overflow: setting loss scale to 16384.0
20:38:53 | time:1136s total_exs:74556 epochs:0.10
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
   .9286  6006 16424 37.89  388             16384  19.77    .3682 3.871 5.426e-06 57.32 156.7 47.98      .3688   
    total_train_updates  tpb   tps   ups  
                   2713 6064 16581 2.735

20:38:56 | Overflow: setting loss scale to 16384.0
20:39:03 | time:1146s total_exs:74836 epochs:0.10
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss  



20:39:49 | time:1191s total_exs:76288 epochs:0.10
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
   .9630  5893 10580 26.86  404             16384  19.44    .3715 4.046 5.668e-06 76.48 137.3 57.14      .3356   
    total_train_updates  tpb   tps   ups  
                   2834 5969 10718 1.796

20:39:59 | time:1202s total_exs:76728 epochs:0.10
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  5938 16158 42.76  440             16384  20.73    .3944 3.785 5.724e-06 68.21 185.6 44.03      .3759   
    total_train_updates  tpb   tps   ups  
                   2862 6007 16344 2.721

20:40:09 | time:1212s total_exs:76988 epochs:0.10
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6194 10470 25.85  260             16384   20.1    .3973 3.907 5.758e-06 73.24 123.8 49.75      .3574



20:40:55 | time:1258s total_exs:78496 epochs:0.10
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6022 10376 27.57  416             16384  19.68    .3973 3.804 5.964e-06 73.69   127 44.88      .3638   
    total_train_updates  tpb   tps   ups  
                   2982 6096 10503 1.723

20:41:05 | time:1268s total_exs:78912 epochs:0.10
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  5946 15936 41.29  416             16384  20.58    .3682 3.588 6.018e-06 64.48 172.8 36.17      .3963   
    total_train_updates  tpb   tps  ups  
                   3009 6011 16109 2.68

20:41:15 | time:1278s total_exs:79168 epochs:0.10
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6108 11601 25.59  256             16384  21.41    .3682 3.829 6.056e-06 63.74 121.1 46.02      .3576  



20:42:01 | time:1324s total_exs:80636 epochs:0.11
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6146 10330 23.27  360             16384  20.38    .4124 3.978 6.264e-06  69.5 116.8 53.42      .3365   
    total_train_updates  tpb   tps   ups  
                   3132 6216 10446 1.681

20:42:12 | time:1334s total_exs:80980 epochs:0.11
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6210 16478 33.81  344             16384  21.09    .3944   3.7 6.318e-06 57.41 152.3 40.45      .3703   
    total_train_updates  tpb   tps   ups  
                   3159 6267 16631 2.654

20:42:22 | time:1344s total_exs:81232 epochs:0.11
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6281 11751 24.81  252             16384  20.69    .3944 3.769 6.356e-06 68.42   128 43.33      .3577



20:43:08 | time:1390s total_exs:82736 epochs:0.11
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6244 10635 23.58  360             16384  20.49    .3682  3.77 6.564e-06    59 100.5 43.38      .3664   
    total_train_updates  tpb   tps   ups  
                   3282 6303 10735 1.703

20:43:18 | time:1400s total_exs:83156 epochs:0.11
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss       lr  ltpb  ltps   ppl  token_acc  \
       1  6000 16620 41.55  420             16384  20.04    .3716 3.858 6.62e-06 71.18 197.2 47.39      .3663   
    total_train_updates  tpb   tps  ups  
                   3310 6071 16817 2.77

20:43:28 | time:1411s total_exs:83404 epochs:0.11
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6249 10857 23.94  248             16384  20.42    .3763  3.85 6.656e-06 66.61 115.7 46.99      .3411   




20:44:14 | time:1457s total_exs:84976 epochs:0.11
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  5985 10236    25  380             16384  20.63    .4125 3.746 6.866e-06 62.77 107.3 42.33      .3762   
    total_train_updates  tpb   tps  ups  
                   3433 6048 10343 1.71

20:44:24 | time:1467s total_exs:85396 epochs:0.11
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6113 16791  41.2  420             16384  20.13    .3997 3.737 6.922e-06 66.07 181.5 41.96      .3784   
    total_train_updates  tpb   tps   ups  
                   3461 6179 16972 2.749

20:44:35 | time:1477s total_exs:85708 epochs:0.11
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  5872 11960 30.26  312             16384  20.36    .3857 3.916 6.964e-06 74.19 151.1 50.22      .3293  



20:45:20 | time:1523s total_exs:87172 epochs:0.12
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6095 10361 24.32  372             16384   20.9    .3683 3.787 7.166e-06 61.58 104.7 44.13      .3704   
    total_train_updates  tpb   tps  ups  
                   3583 6157 10465  1.7

20:45:31 | time:1533s total_exs:87532 epochs:0.12
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6097 16565 34.93  360             16384  21.24    .3768 3.947 7.222e-06 58.75 159.6 51.76      .3471   
    total_train_updates  tpb   tps   ups  
                   3611 6156 16725 2.717

20:45:41 | time:1544s total_exs:87796 epochs:0.12
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6044 10848 26.32  264             16384  19.95    .3857 3.926 7.258e-06 77.44   139 50.69      .3465  



20:46:27 | time:1589s total_exs:89232 epochs:0.12
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps  ppl  token_acc  \
       1  6122 10777 21.12  324             16384  20.64    .3683 3.681 7.466e-06 52.96 93.23 39.7      .3720   
    total_train_updates  tpb   tps  ups  
                   3733 6175 10871 1.76

20:46:37 | time:1600s total_exs:89580 epochs:0.12
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps  ppl  token_acc  \
       1  6178 14787 33.31  348             16384  20.98    .3683 3.777 7.516e-06 60.44 144.7 43.7      .3594   
    total_train_updates  tpb   tps   ups  
                   3758 6239 14932 2.394

20:46:47 | time:1610s total_exs:89940 epochs:0.12
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss       lr  ltpb  ltps   ppl  token_acc  \
       1  6060 16012 35.22  360             16384  19.99    .3998 3.892 7.57e-06 72.67   192 49.02      .3379   
    



20:47:33 | time:1656s total_exs:91268 epochs:0.12
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6065  9463 20.08  296             16384  20.95    .3857 3.774 7.766e-06 63.65 99.31 43.54      .3552   
    total_train_updates  tpb  tps  ups  
                   3883 6129 9562 1.56

20:47:43 | time:1666s total_exs:91664 epochs:0.12
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss       lr  ltpb  ltps   ppl  token_acc  \
       1  6013 16045 39.14  396             16384  19.61    .4095 4.113 7.82e-06 76.19 203.3 61.14      .3296   
    total_train_updates  tpb   tps  ups  
                   3910 6089 16249 2.67

20:47:53 | time:1676s total_exs:91988 epochs:0.12
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  5990 12825 31.53  324             16384  21.15    .4033 3.752 7.864e-06 62.18 133.1 42.59      .3713   
    



20:48:39 | time:1722s total_exs:93288 epochs:0.12
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6120 10094 20.58  312             16384  20.39    .3878 3.941 8.062e-06 64.76 106.8 51.49      .3360   
    total_train_updates  tpb   tps  ups  
                   4031 6185 10201 1.65

20:48:50 | time:1732s total_exs:93648 epochs:0.12
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6156 16347  35.4  360             16384  19.64    .4033  3.72 8.116e-06 65.78 174.7 41.26      .3654   
    total_train_updates  tpb   tps   ups  
                   4058 6222 16522 2.656

20:49:00 | time:1742s total_exs:93888 epochs:0.12
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps  ppl  token_acc  \
       1  6143 12198 23.83  240             16384  22.07    .3683  3.63 8.156e-06  51.8 102.9 37.7      .3774   




20:49:46 | time:1788s total_exs:95204 epochs:0.13
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss       lr  ltpb  ltps   ppl  token_acc  \
       1  6122 10395 21.42  328             16384  20.83    .3764 3.838 8.35e-06 63.12 107.2 46.45      .3662   
    total_train_updates  tpb   tps   ups  
                   4175 6185 10502 1.698

20:49:56 | time:1799s total_exs:95552 epochs:0.13
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps  ppl  token_acc  \
       1  5990 15395 33.12  348             16384  20.72    .3683 3.627 8.404e-06 54.11 139.1 37.6      .3970   
    total_train_updates  tpb   tps  ups  
                   4202 6044 15534 2.57

20:50:06 | time:1809s total_exs:95812 epochs:0.13
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  5978 11335 25.95  260             16384  21.42    .3683 3.733 8.442e-06 57.32 108.7 41.82      .3701   
  



20:50:52 | time:1855s total_exs:97240 epochs:0.13
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  5796  9851 25.56  376             16384  20.03    .3764 4.072 8.644e-06 79.08 134.4 58.65      .3313   
    total_train_updates  tpb  tps  ups  
                   4322 5875 9985  1.7

20:50:55 | Overflow: setting loss scale to 16384.0
20:51:02 | time:1865s total_exs:97644 epochs:0.13
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
   .9655  5861 16465 39.13  404             16384  20.36    .3683 3.608 8.702e-06 58.72   165 36.91      .3964   
    total_train_updates  tpb   tps  ups  
                   4351 5920 16630 2.81

20:51:13 | time:1875s total_exs:97900 epochs:0.13
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps  ppl  token_acc  \
       1  6077 11782 24.81  256             16384  20.88    .3683 



20:51:58 | time:1921s total_exs:99420 epochs:0.13
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps  ppl  token_acc  \
       1  5881 10059    26  380             16384  19.54    .3790 3.886 8.948e-06 79.48   136 48.7      .3563   
    total_train_updates  tpb   tps   ups  
                   4474 5960 10195 1.711

20:52:09 | time:1931s total_exs:99756 epochs:0.13
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6118 16268  31.9  336             16384  21.05    .3683 3.738 9.004e-06 55.18 146.7 42.03      .3663   
    total_train_updates  tpb   tps   ups  
                   4502 6174 16415 2.659

20:52:19 | time:1942s total_exs:100016 epochs:0.13
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6134 12526 25.28  260             16384  20.78    .3683 3.839 9.046e-06 58.57 119.6 46.47      .3691 



20:53:04 | time:1987s total_exs:101408 epochs:0.13
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  5920  9923 24.67  368             16384  19.41    .3717 3.952 9.242e-06 77.04 129.1 52.02      .3437   
    total_train_updates  tpb   tps   ups  
                   4621 5997 10052 1.676

20:53:15 | time:1998s total_exs:101784 epochs:0.13
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6029 16133 35.93  376             16384  20.64    .3683 3.715 9.298e-06  64.5 172.6 41.04      .3726   
    total_train_updates  tpb   tps   ups  
                   4649 6093 16306 2.676

20:53:25 | time:2008s total_exs:102004 epochs:0.14
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6134 10775 21.47  220             16384  21.68    .3764 3.925 9.334e-06 68.11 119.6 50.66      .3



20:54:11 | time:2053s total_exs:103436 epochs:0.14
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps  ppl  token_acc  \
       1  6172 10511 22.01  336             16384  20.79    .4126 3.694 9.538e-06    56 95.37 40.2      .3805   
    total_train_updates  tpb   tps   ups  
                   4769 6228 10606 1.703

20:54:21 | time:2064s total_exs:103796 epochs:0.14
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6179 17272 35.94  360             16384  20.58    .3683 3.794 9.594e-06 59.11 165.2 44.41      .3553   
    total_train_updates  tpb   tps   ups  
                   4797 6238 17437 2.796

20:54:31 | time:2074s total_exs:104028 epochs:0.14
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6136 11610  23.1  232             16384   21.1    .3684 3.623 9.632e-06 53.16 100.6 37.45      .383



20:55:17 | time:2120s total_exs:105340 epochs:0.14
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6023 10418 22.04  344             16384  20.32    .3764 3.783 9.834e-06 76.26 131.9 43.93      .3409   
    total_train_updates  tpb   tps  ups  
                   4917 6099 10550 1.73

20:55:27 | time:2130s total_exs:105684 epochs:0.14
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6174 16561 34.17  344             16384   20.5    .4033 3.731 9.888e-06 66.96 179.6 41.73      .3496   
    total_train_updates  tpb   tps   ups  
                   4944 6241 16741 2.683

20:55:37 | time:2140s total_exs:105908 epochs:0.14
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss        lr  ltpb  ltps   ppl  token_acc  \
       1  6190 11015 22.14  224             16384   21.3    .3683 3.685 9.924e-06 58.28 103.7 39.83      .374



20:56:23 | time:2186s total_exs:107316 epochs:0.14
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6050 10263  21.4  328             16384  20.69    .3684  3.71 1e-05 59.65 101.2 40.86      .3630   
    total_train_updates  tpb   tps   ups  
                   5066 6110 10364 1.696

20:56:34 | time:2196s total_exs:107672 epochs:0.14
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6034 16008 34.98  356             16384  19.72    .3845 3.719 1e-05 69.63 184.7 41.24      .3527   
    total_train_updates  tpb   tps   ups  
                   5093 6103 16193 2.653

20:56:44 | time:2207s total_exs:107972 epochs:0.14
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5894 11595  29.5  300             16384  20.16    .3684 3.593 1e-05  62.9 123.7 36.34      .3808   
    total_train_u



20:57:30 | time:2252s total_exs:109336 epochs:0.14
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6034 10125 24.01  372             16384  19.48    .4034 3.886 1e-05 70.92   119 48.72      .3590   
    total_train_updates  tpb   tps   ups  
                   5211 6105 10244 1.678

20:57:40 | time:2262s total_exs:109680 epochs:0.15
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6001 16235 34.47  344             16384  19.81    .3684 3.808 1e-05    65 175.9 45.08      .3544   
    total_train_updates  tpb   tps   ups  
                   5238 6066 16411 2.706

20:57:50 | time:2273s total_exs:109940 epochs:0.15
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5981 10508 25.37  260             16384  19.96    .4034 3.787 1e-05    69 121.2 44.11      .3671   
    total_train_u



20:58:36 | time:2319s total_exs:111308 epochs:0.15
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6088 10350 24.33  372             16384  19.62    .3684 3.639 1e-05 70.27 119.5 38.07      .3727   
    total_train_updates  tpb   tps  ups  
                   5356 6158 10470  1.7

20:58:46 | time:2329s total_exs:111684 epochs:0.15
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6114 16262 35.72  376             16384  20.04    .3858 3.655 1e-05 57.43 152.7 38.68      .3905   
    total_train_updates  tpb   tps  ups  
                   5384 6171 16414 2.66

20:58:57 | time:2339s total_exs:111944 epochs:0.15
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6029 11326 25.71  260             16384  19.72    .3684 3.823 1e-05 68.68   129 45.76      .3594   
    total_train_updat



20:59:42 | time:2385s total_exs:113284 epochs:0.15
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6141 10262 21.72  312             16384  20.11    .3684 3.832 1e-05 65.88 110.1 46.15      .3586   
    total_train_updates  tpb   tps   ups  
                   5503 6207 10372 1.671

20:59:52 | time:2395s total_exs:113668 epochs:0.15
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5947 15970 36.83  384             16384  20.52    .3684 3.641 1e-05 64.14 172.2 38.14      .3814   
    total_train_updates  tpb   tps   ups  
                   5531 6011 16142 2.686

21:00:03 | time:2405s total_exs:113952 epochs:0.15
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6104 11892 27.65  284             16384  19.54    .3684 3.659 1e-05  66.4 129.4 38.83      .3818   
    total_train_u



21:00:48 | time:2451s total_exs:115180 epochs:0.15
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6145 10219 21.02  316             16384  21.06    .3684 3.612 1e-05 58.56 97.38 37.03      .3654   
    total_train_updates  tpb   tps   ups  
                   5648 6203 10316 1.663

21:00:59 | time:2461s total_exs:115536 epochs:0.15
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6181 15851 33.81  356             16384  20.14    .3946 3.703 1e-05 63.07 161.8 40.56      .3705   
    total_train_updates  tpb   tps   ups  
                   5675 6244 16013 2.565

21:01:09 | time:2472s total_exs:115804 epochs:0.15
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps  ppl  token_acc  \
       1  6059 11926 26.37  268             16384  20.42    .3684 3.706 1e-05 63.25 124.5 40.7      .3818   
    total_train_upd



21:01:54 | time:2517s total_exs:117132 epochs:0.16
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5856  9956  23.4  344             16384  20.48    .3684 3.525 1e-05 60.68 103.2 33.95      .3942   
    total_train_updates  tpb   tps  ups  
                   5793 5916 10060  1.7

21:02:05 | time:2527s total_exs:117456 epochs:0.16
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps  ppl  token_acc  \
       1  6149 15713 31.84  324             16384  21.39    .3859 3.694 1e-05 52.96 135.3 40.2      .3863   
    total_train_updates  tpb   tps   ups  
                   5819 6202 15849 2.556

21:02:15 | time:2537s total_exs:117748 epochs:0.16
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6062 13903 29.12  292             16384  21.27    .3765 3.683 1e-05 65.26 149.7 39.78      .3671   
    total_train_updat



21:03:01 | time:2583s total_exs:119060 epochs:0.16
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5862  8831  20.7  316             16384  20.47    .3684   3.7 1e-05  64.3 96.88 40.46      .3658   
    total_train_updates  tpb  tps   ups  
                   5937 5926 8928 1.507

21:03:11 | time:2594s total_exs:119444 epochs:0.16
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5935 16124 37.26  384             16384  20.22    .3681  3.83 1e-05 70.57 191.7 46.07      .3654   
    total_train_updates  tpb   tps   ups  
                   5965 6005 16316 2.717

21:03:21 | time:2604s total_exs:119688 epochs:0.16
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6142 11908 23.65  244             16384  21.08    .3681 3.832 1e-05 58.25 112.9 46.15      .3708   
    total_train_upd



21:04:07 | time:2649s total_exs:120876 epochs:0.16
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6043  9776 21.03  312             16384  20.59    .3681 3.589 1e-05 60.12 97.27 36.18      .3818   
    total_train_updates  tpb  tps   ups  
                   6080 6103 9873 1.618

21:04:11 | Overflow: setting loss scale to 16384.0
21:04:17 | time:2660s total_exs:121220 epochs:0.16
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
   .9643  6145 16474 32.94  344             16384   20.4    .3761 3.748 1e-05 59.14 158.6 42.45      .3623   
    total_train_updates  tpb   tps   ups  
                   6108 6204 16633 2.681

21:04:27 | time:2670s total_exs:121508 epochs:0.16
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6017 13022 28.33  288             16384  20.62    .3681 3.715 1e-05 



21:05:13 | time:2716s total_exs:122732 epochs:0.16
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6080  9750 20.91  300             16384  19.89    .3901  3.75 1e-05 63.26 101.4 42.53      .3574   
    total_train_updates  tpb  tps   ups  
                   6224 6144 9851 1.604

21:05:23 | time:2726s total_exs:123112 epochs:0.16
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6090 16465 36.69  380             16384  20.33    .3681  3.75 1e-05 65.32 176.6 42.52      .3652   
    total_train_updates  tpb   tps   ups  
                   6252 6155 16642 2.705

21:05:33 | time:2736s total_exs:123348 epochs:0.16
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps  ppl  token_acc  \
       1  6139 11666  23.6  236             16384  21.04    .3681 3.671 1e-05 48.89 92.92 39.3      .3994   
    total_train_updat



21:06:19 | time:2782s total_exs:124616 epochs:0.17
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6096  9850 23.16  344             16384  19.62    .3943 3.635 1e-05 70.58   114 37.91      .3808   
    total_train_updates  tpb  tps   ups  
                   6366 6167 9964 1.616

21:06:30 | time:2792s total_exs:124948 epochs:0.17
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6173 15937 31.75  332             16384  20.38    .3855 3.881 1e-05  70.3 181.5 48.47      .3477   
    total_train_updates  tpb   tps   ups  
                   6393 6243 16119 2.582

21:06:40 | time:2803s total_exs:125152 epochs:0.17
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6142  9773 20.29  204             16384  21.67    .3681  3.78 1e-05 59.12 94.08 43.83      .3753   
    total_train_upd



21:07:25 | time:2848s total_exs:126456 epochs:0.17
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5976 10087 21.87  324             16384  20.69    .3761 3.789 1e-05 74.16 125.2 44.21      .3554   
    total_train_updates  tpb   tps   ups  
                   6510 6050 10212 1.688

21:07:36 | time:2859s total_exs:126788 epochs:0.17
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6004 14725 31.31  332             16384  20.75    .3681 3.534 1e-05 55.08 135.1 34.27      .3911   
    total_train_updates  tpb   tps   ups  
                   6536 6060 14860 2.453

21:07:46 | time:2869s total_exs:127140 epochs:0.17
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5728 14174 34.84  352             16384  20.24    .3681 3.588 1e-05 59.96 148.4 36.16      .3956   
    total_train_u



21:08:32 | time:2914s total_exs:128472 epochs:0.17
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6046  9641 22.06  332             16384  19.73    .3996 3.644 1e-05 63.71 101.6 38.23      .3787   
    total_train_updates  tpb  tps   ups  
                   6659 6109 9743 1.595

21:08:42 | time:2925s total_exs:128836 epochs:0.17
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6045 15951 35.57  364             16384  20.26    .3681 3.548 1e-05 63.63 167.9 34.74      .3912   
    total_train_updates  tpb   tps   ups  
                   6686 6108 16119 2.639

21:08:52 | time:2935s total_exs:129080 epochs:0.17
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6264 11643 23.87  244             16384  20.21    .4094 3.682 1e-05 60.89 113.2 39.71      .3691   
    total_train_upd



21:09:38 | time:2981s total_exs:130392 epochs:0.17
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5996  9953 20.98  316             16384  19.67    .3681 3.563 1e-05 65.96 109.5 35.26      .4008   
    total_train_updates  tpb   tps  ups  
                   6803 6062 10062 1.66

21:09:48 | time:2991s total_exs:130772 epochs:0.17
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5814 15562 37.67  380             16384  19.36    .3943 3.623 1e-05 71.37   191 37.47      .3726   
    total_train_updates  tpb   tps   ups  
                   6830 5885 15753 2.677

21:09:52 | Overflow: setting loss scale to 16384.0
21:09:59 | time:3002s total_exs:130980 epochs:0.17
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
   .9412  6132  9986 19.92  208             16384  19.24    .3681 3.868 1e-05 



21:10:44 | time:3047s total_exs:132248 epochs:0.18
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6236 10481 21.51  320             16384  20.35    .4094 3.703 1e-05 60.72   102 40.57      .3650   
    total_train_updates  tpb   tps   ups  
                   6947 6297 10583 1.681

21:10:55 | time:3057s total_exs:132600 epochs:0.18
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6013 16013 33.48  352             16384  20.06    .3762 3.638 1e-05 65.68 174.9 38.02      .3709   
    total_train_updates  tpb   tps   ups  
                   6975 6079 16188 2.663

21:11:05 | time:3067s total_exs:132888 epochs:0.18
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5797 11546 28.68  288             16384  19.31    .3682 3.596 1e-05 59.35 118.2 36.46      .4027   
    total_train_u



21:11:50 | time:3113s total_exs:134132 epochs:0.18
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5890  9954 22.98  340             16384  19.43    .3682 3.594 1e-05  61.8 104.4 36.39      .3858   
    total_train_updates  tpb   tps  ups  
                   7093 5952 10059 1.69

21:12:01 | time:3123s total_exs:134500 epochs:0.18
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5899 15728 35.04  368             16384  19.92    .3681 3.555 1e-05 60.89 162.4 34.98      .3865   
    total_train_updates  tpb   tps   ups  
                   7121 5960 15891 2.667

21:12:11 | time:3134s total_exs:134716 epochs:0.18
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6131 10716 20.97  216             16384  21.05    .3681  3.69 1e-05 58.06 101.5 40.04      .3608   
    total_train_upd



21:12:57 | time:3179s total_exs:135992 epochs:0.18
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6132  9856 19.29  288             16384  20.14    .3682 3.485 1e-05 49.25 79.16 32.61      .4188   
    total_train_updates  tpb  tps   ups  
                   7238 6181 9935 1.607

21:13:07 | time:3190s total_exs:136284 epochs:0.18
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6145 14552 28.81  292             16384  19.67    .3682 3.836 1e-05 69.29 164.1 46.34      .3506   
    total_train_updates  tpb   tps   ups  
                   7262 6215 14716 2.368

21:13:17 | time:3200s total_exs:136636 epochs:0.18
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5858 14597 35.08  352             16384  19.63    .3972 3.649 1e-05  69.8 173.9 38.45      .3713   
    total_train_upd



21:14:03 | time:3246s total_exs:137904 epochs:0.18
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6086  9693 19.91  300             16384  19.79    .3682 3.569 1e-05 59.96  95.5 35.48      .3760   
    total_train_updates  tpb  tps   ups  
                   7384 6146 9788 1.593

21:14:13 | time:3256s total_exs:138256 epochs:0.18
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6054 16095 33.42  352             16384  20.44    .3682 3.644 1e-05 58.82 156.4 38.23      .3868   
    total_train_updates  tpb   tps   ups  
                   7412 6113 16251 2.659

21:14:24 | time:3266s total_exs:138500 epochs:0.18
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6183 11475 23.83  244             16384  20.05    .3682 3.779 1e-05 72.16 133.9 43.76      .3494   
    total_train_upd



21:15:09 | time:3312s total_exs:139684 epochs:0.19
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5982  9757 20.39  300             16384  20.19    .3682  3.53 1e-05 54.88  89.5 34.13      .3850   
    total_train_updates  tpb  tps   ups  
                   7527 6037 9846 1.631

21:15:20 | time:3323s total_exs:140008 epochs:0.19
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6115 15630 30.67  324             16384  21.34    .3682 3.713 1e-05 53.81 137.5 40.96      .3696   
    total_train_updates  tpb   tps   ups  
                   7554 6169 15767 2.556

21:15:30 | time:3333s total_exs:140268 epochs:0.19
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6064 11950 25.62  260             16384   20.5    .3762 3.844 1e-05  69.4 136.8 46.72      .3465   
    total_train_upd



21:16:16 | time:3378s total_exs:141456 epochs:0.19
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6181 10010 19.97  296             16384  20.78    .3856 3.702 1e-05 58.46 94.66 40.51      .3778   
    total_train_updates  tpb   tps   ups  
                   7669 6240 10105 1.619

21:16:26 | time:3389s total_exs:141812 epochs:0.19
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6024 16209 34.21  356             16384  20.97    .3809 3.584 1e-05 59.96 161.3 36.02      .3842   
    total_train_updates  tpb   tps   ups  
                   7697 6084 16370 2.691

21:16:37 | time:3399s total_exs:142044 epochs:0.19
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6201 11388 22.42  232             16384  20.85    .3902  3.71 1e-05 59.37   109 40.86      .3679   
    total_train_u



21:17:22 | time:3445s total_exs:143272 epochs:0.19
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5919  9767 21.65  328             16384  20.24    .3682 3.626 1e-05 60.76 100.3 37.55      .3831   
    total_train_updates  tpb  tps  ups  
                   7811 5980 9867 1.65

21:17:33 | time:3455s total_exs:143640 epochs:0.19
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5924 15712 34.86  368             16384  20.37    .3682 3.529 1e-05 60.14 159.5 34.09      .3777   
    total_train_updates  tpb   tps   ups  
                   7839 5984 15871 2.652

21:17:43 | time:3466s total_exs:143892 epochs:0.19
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps  ppl  token_acc  \
       1  6144 12764 24.93  252             16384  21.12    .3682 3.529 1e-05    54 112.2 34.1      .3792   
    total_train_updates



21:18:28 | time:3511s total_exs:145164 epochs:0.19
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5918  9796 22.35  324             16384  19.83    .3682 3.565 1e-05 63.62 105.3 35.35      .3779   
    total_train_updates  tpb  tps   ups  
                   7955 5982 9901 1.655

21:18:39 | time:3521s total_exs:145536 epochs:0.19
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5750 14943 35.81  372             16384  19.85    .3682 3.689 1e-05 70.74 183.9 40.02      .3733   
    total_train_updates  tpb   tps   ups  
                   7982 5820 15127 2.599

21:18:49 | time:3532s total_exs:145744 epochs:0.19
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6221 10240 20.14  208             16384  21.42    .4124 3.618 1e-05 54.41 89.56 37.26      .3676   
    total_train_upd



21:19:34 | time:3577s total_exs:147076 epochs:0.19
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
   .9583  5629  9173 24.71  364             16384  19.12    .3682 3.619 1e-05    71 115.7 37.32      .3744   
    total_train_updates  tpb  tps  ups  
                   8098 5700 9289 1.63

21:19:45 | time:3588s total_exs:147416 epochs:0.20
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps  ppl  token_acc  \
       1  6084 15660 32.41  340             16384  19.38    .3682 3.719 1e-05  68.3 175.8 41.2      .3639   
    total_train_updates  tpb   tps   ups  
                   8125 6153 15836 2.574

21:19:55 | time:3598s total_exs:147676 epochs:0.20
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6094 12167 25.95  260             16384  19.96    .3682 3.532 1e-05  59.6   119 34.21      .3985   
    total_train_updates



21:20:59 | saving model checkpoint: /content/drive/MyDrive/chatbot_model/model.checkpoint
21:21:27 | time:3689s total_exs:147764 epochs:0.20
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6144 13726 26.79   12             16384  18.77    .3682 4.572 1e-05   125 279.2 96.73      .2560   
    total_train_updates  tpb   tps   ups  
                   8152 6269 14005 2.238

21:21:37 | time:3700s total_exs:148060 epochs:0.20
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5848 11982 28.88  296             16384  19.89    .3682 3.754 1e-05 69.33 142.1 42.67      .3640   
    total_train_updates  tpb   tps  ups  
                   8173 5917 12124 2.05

21:21:47 | time:3710s total_exs:148376 epochs:0.20
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6032 14621 30.63  316        



21:22:33 | time:3756s total_exs:149620 epochs:0.20
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6010  8802 18.59  292             16384  20.42    .3682  3.78 1e-05 60.48 88.58 43.83      .3587   
    total_train_updates  tpb  tps   ups  
                   8294 6070 8891 1.465

21:22:44 | time:3767s total_exs:149980 epochs:0.20
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6009 16049 34.34  360             16384  20.45    .3682 3.396 1e-05 55.36 147.8 29.84      .4194   
    total_train_updates  tpb   tps   ups  
                   8322 6065 16197 2.671

21:22:54 | time:3777s total_exs:150232 epochs:0.20
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6030 11372 25.01  252             16384  20.71    .3877 3.379 1e-05 55.68   105 29.33      .4008   
    total_train_upd



21:23:40 | time:3822s total_exs:151524 epochs:0.20
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6074 10324 21.44  328             16384  20.54    .3683 3.607 1e-05 54.92 93.36 36.86      .3845   
    total_train_updates  tpb   tps  ups  
                   8441 6129 10417  1.7

21:23:50 | time:3832s total_exs:151856 epochs:0.20
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6235 16202 33.18  332             16384  20.33    .4124 3.827 1e-05 64.12 166.6 45.93      .3707   
    total_train_updates  tpb   tps   ups  
                   8467 6299 16368 2.599

21:24:00 | time:3843s total_exs:152088 epochs:0.20
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5938 10280 22.31  232             16384   19.9    .3682 3.569 1e-05 59.39 102.8 35.49      .3891   
    total_train_upd



21:24:46 | time:3888s total_exs:153308 epochs:0.20
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6144 10351 20.22  312             16384  20.56    .3683 3.283 1e-05 47.85 80.61 26.66      .4140   
    total_train_updates  tpb   tps   ups  
                   8585 6192 10432 1.685

21:24:56 | time:3899s total_exs:153660 epochs:0.20
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5993 15915 33.38  352             16384  20.17    .3763 3.763 1e-05 68.86 182.8 43.07      .3522   
    total_train_updates  tpb   tps   ups  
                   8613 6062 16098 2.656

21:25:07 | time:3909s total_exs:153888 epochs:0.20
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps  ppl  token_acc  \
       1  6144 11392 22.25  228             16384     21    .3683 3.606 1e-05 52.95 98.17 36.8      .3907   
    total_train_upd



21:25:52 | time:3954s total_exs:155064 epochs:0.21
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6105 10189 20.86  300             16384  21.13    .3683 3.482 1e-05 55.38 92.42 32.54      .3875   
    total_train_updates  tpb   tps   ups  
                   8729 6160 10282 1.669

21:26:02 | time:3965s total_exs:155448 epochs:0.21
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps  ppl  token_acc  \
       1  5770 15426 36.67  384             16384  19.11    .3857  3.52 1e-05 63.32 169.3 33.8      .3897   
    total_train_updates  tpb   tps   ups  
                   8757 5833 15596 2.674

21:26:07 | Overflow: setting loss scale to 16384.0
21:26:13 | time:3975s total_exs:155704 epochs:0.21
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
   .9524  6117 12665 25.24  256             16384  18.44    .3683 3.838 1e-05 



21:26:58 | time:4021s total_exs:156916 epochs:0.21
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6078 10101 21.01  316             16384  19.56    .4032 3.657 1e-05 59.16 98.32 38.73      .3773   
    total_train_updates  tpb   tps   ups  
                   8874 6137 10199 1.662

21:27:08 | time:4031s total_exs:157240 epochs:0.21
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6121 16004 31.38  324             16384  20.91    .3683 3.529 1e-05    52   136 34.08      .3846   
    total_train_updates  tpb   tps   ups  
                   8901 6173 16140 2.615

21:27:19 | time:4041s total_exs:157500 epochs:0.21
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5745 10329 25.97  260             16384  19.53    .4125 3.415 1e-05 64.72 116.4 30.43      .4017   
    total_train_u



21:28:04 | time:4087s total_exs:158812 epochs:0.21
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5958 10545 22.55  344             16384  19.76    .3683 3.549 1e-05 61.74 109.3 34.77      .3941   
    total_train_updates  tpb   tps  ups  
                   9021 6020 10654 1.77

21:28:15 | time:4097s total_exs:159180 epochs:0.21
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5851 15766 35.42  368             16384  18.87    .3683 3.662 1e-05 66.25 178.5 38.93      .3612   
    total_train_updates  tpb   tps   ups  
                   9049 5917 15945 2.695

21:28:25 | time:4108s total_exs:159400 epochs:0.21
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5921  9693 21.19  220             16384  19.53    .3683 3.363 1e-05 58.06 95.08 28.86      .3951   
    total_train_upd



21:29:10 | time:4153s total_exs:160632 epochs:0.21
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5982 10064 21.31  304             16384  19.98    .3683 3.541 1e-05 54.25 91.27 34.49      .4017   
    total_train_updates  tpb   tps   ups  
                   9164 6036 10155 1.683

21:29:20 | time:4163s total_exs:160956 epochs:0.21
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6132 16101 31.51  324             16384  19.47    .3763 3.654 1e-05  63.3 166.2 38.62      .3733   
    total_train_updates  tpb   tps   ups  
                   9191 6196 16267 2.626

21:29:30 | time:4173s total_exs:161148 epochs:0.21
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6144  9763 19.06  192             16384  19.98    .3683 3.458 1e-05  54.5  86.6 31.76      .4048   
    total_train_u



21:30:16 | time:4219s total_exs:162424 epochs:0.22
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5918  9984 22.94  340             16384  18.79    .3683 3.604 1e-05 71.04 119.8 36.74      .3773   
    total_train_updates  tpb   tps   ups  
                   9307 5989 10103 1.687

21:30:26 | time:4229s total_exs:162800 epochs:0.22
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5994 16246 36.39  376             16384  20.14    .4033 3.705 1e-05    67 181.6 40.63      .3614   
    total_train_updates  tpb   tps   ups  
                   9335 6061 16428 2.711

21:30:36 | time:4239s total_exs:163036 epochs:0.22
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6022 10874 23.67  236             16384  19.63    .3683  3.43 1e-05 62.89 113.5 30.89      .3949   
    total_train_u



21:31:22 | time:4285s total_exs:164276 epochs:0.22
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6154 10016 20.31  312             16384  19.51    .3683 3.634 1e-05    64 104.2 37.85      .3631   
    total_train_updates  tpb   tps   ups  
                   9451 6218 10120 1.628

21:31:33 | time:4295s total_exs:164616 epochs:0.22
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6025 15776 32.97  340             16384  19.72    .3683 3.614 1e-05    64 167.6 37.13      .3727   
    total_train_updates  tpb   tps   ups  
                   9478 6089 15944 2.619

21:31:43 | time:4306s total_exs:164856 epochs:0.22
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6116 11345 23.43  240             16384  20.03    .3764 3.861 1e-05 64.89 120.4 47.49      .3520   
    total_train_u



21:32:29 | time:4351s total_exs:166108 epochs:0.22
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5767  9316 21.81  324             16384  18.82    .3683 3.486 1e-05 64.12 103.6 32.66      .3873   
    total_train_updates  tpb  tps   ups  
                   9594 5831 9420 1.616

21:32:39 | time:4361s total_exs:166436 epochs:0.22
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6041 15758 32.91  328             16384  19.85    .3683 3.649 1e-05 61.04 159.2 38.43      .3869   
    total_train_updates  tpb   tps   ups  
                   9620 6102 15917 2.609

21:32:49 | time:4372s total_exs:166664 epochs:0.22
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6111 10621 22.01  228             16384  19.83    .3683 3.718 1e-05  60.5 105.1 41.18      .3673   
    total_train_upd



21:33:34 | time:4417s total_exs:167900 epochs:0.22
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6144 10181 20.71  300             16384   19.7    .3683  3.45 1e-05  60.5 100.3 31.51      .4022   
    total_train_updates  tpb   tps   ups  
                   9737 6204 10281 1.657

21:33:45 | time:4428s total_exs:168236 epochs:0.22
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6144 16216 31.67  336             16384  20.79    .3683 3.673 1e-05 57.29 151.2 39.35      .3728   
    total_train_updates  tpb   tps  ups  
                   9765 6201 16368 2.64

21:33:55 | time:4438s total_exs:168452 epochs:0.22
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6144 10755    21  216             16384  20.49    .3683 3.795 1e-05 66.06 115.6 44.48      .3482   
    total_train_upd



21:34:40 | time:4483s total_exs:169664 epochs:0.22
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5969  9707 20.36  288             16384   19.8    .3683 3.584 1e-05 61.57 100.1 36.02      .3686   
    total_train_updates  tpb  tps   ups  
                   9880 6030 9807 1.626

21:34:51 | time:4493s total_exs:170020 epochs:0.23
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6022 15792 34.57  356             16384  18.78    .3683 3.717 1e-05 62.85 164.8 41.13      .3765   
    total_train_updates  tpb   tps   ups  
                   9907 6085 15957 2.623

21:35:01 | time:4503s total_exs:170248 epochs:0.23
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5973 10631 22.55  228             16384  19.77    .3683  3.51 1e-05 59.33 105.6 33.45      .3708   
    total_train_upd



21:35:47 | time:4550s total_exs:171500 epochs:0.23
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6144  9988 19.51  300             16384  19.87    .3684 3.543 1e-05 59.12 96.11 34.56      .3823   
    total_train_updates  tpb   tps   ups  
                  10024 6203 10085 1.626

21:35:57 | time:4560s total_exs:171828 epochs:0.23
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6004 15371 32.29  328             16384  20.49    .3683 3.616 1e-05 53.65 137.4 37.19      .3814   
    total_train_updates  tpb   tps  ups  
                  10050 6058 15509 2.56

21:36:07 | time:4570s total_exs:172084 epochs:0.23
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps  ppl  token_acc  \
       1  5973 11916 25.53  256             16384  20.03    .3684 3.619 1e-05 61.95 123.6 37.3      .3818   
    total_train_updat



21:36:53 | time:4615s total_exs:173244 epochs:0.23
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6080 10153 20.57  308             16384  20.34    .3846 3.936 1e-05 71.64 119.6 51.24      .3356   
    total_train_updates  tpb   tps  ups  
                  10163 6152 10272 1.67

21:37:03 | time:4626s total_exs:173568 epochs:0.23
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6144 15759 30.78  324             16384  19.38    .3684 3.543 1e-05 60.74 155.8 34.58      .3774   
    total_train_updates  tpb   tps   ups  
                  10190 6205 15915 2.565

21:37:14 | time:4636s total_exs:173780 epochs:0.23
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6186 10463 21.09  212             16384  19.76    .3684 3.602 1e-05 61.18 103.5 36.68      .3731   
    total_train_upd



21:37:59 | time:4682s total_exs:174976 epochs:0.23
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps  ppl  token_acc  \
       1  5989  9867 20.83  316             16384  20.19    .3681 3.635 1e-05 61.96 102.1 37.9      .3538   
    total_train_updates  tpb  tps   ups  
                  10303 6051 9969 1.648

21:38:09 | time:4692s total_exs:175316 epochs:0.23
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5971 15750 33.22  340             16384  19.73    .3681 3.613 1e-05 67.22 177.3 37.06      .3785   
    total_train_updates  tpb   tps   ups  
                  10330 6038 15927 2.638

21:38:19 | time:4702s total_exs:175516 epochs:0.23
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6149  9852 20.03  200             16384  19.05    .3761 3.697 1e-05 73.25 117.4 40.31      .3677   
    total_train_updat



21:39:05 | time:4748s total_exs:176740 epochs:0.23
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6062  9850 20.04  296             16384  20.29    .3681 3.434 1e-05 52.12  84.7 31.01      .4077   
    total_train_updates  tpb  tps   ups  
                  10444 6114 9935 1.625

21:39:15 | time:4758s total_exs:177080 epochs:0.23
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6034 15935 33.26  340             16384  20.91    .3681 3.743 1e-05 58.78 155.2 42.24      .3623   
    total_train_updates  tpb   tps   ups  
                  10471 6092 16091 2.641

21:39:25 | time:4768s total_exs:177300 epochs:0.23
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6108 10917 21.84  220             16384  19.42    .3681 3.554 1e-05 56.44 100.9 34.96      .3780   
    total_train_upd



21:40:11 | time:4814s total_exs:178548 epochs:0.24
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6104  9817 20.37  304             16384  20.33    .3972 3.818 1e-05    64 102.9 45.53      .3509   
    total_train_updates  tpb  tps   ups  
                  10587 6168 9920 1.608

21:40:22 | time:4824s total_exs:178892 epochs:0.24
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6059 15297  33.4  344             16384  18.55    .4123 3.812 1e-05 70.81 178.8 45.23      .3552   
    total_train_updates  tpb   tps   ups  
                  10613 6130 15476 2.525

21:40:32 | time:4834s total_exs:179120 epochs:0.24
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6144 11539 22.53  228             16384  20.82    .3681 3.695 1e-05 54.74 102.8 40.25      .3712   
    total_train_upd



21:41:17 | time:4880s total_exs:180404 epochs:0.24
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6006  9988 21.02  316             16384  20.41    .3766 3.412 1e-05 52.08 86.61 30.33      .4071   
    total_train_updates  tpb   tps   ups  
                  10729 6058 10075 1.663

21:41:24 | Overflow: setting loss scale to 16384.0
21:41:28 | time:4890s total_exs:180728 epochs:0.24
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
   .9630  6144 16182 31.61  324             16384  20.57    .3681 3.543 1e-05 47.19 124.3 34.58      .3838   
    total_train_updates  tpb   tps   ups  
                  10756 6191 16307 2.634

21:41:38 | time:4901s total_exs:180956 epochs:0.24
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5942 10375 22.12  228             16384  19.22    .3681 3.641 1e-0



21:42:24 | time:4946s total_exs:182144 epochs:0.24
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5828  9462 21.11  312             16384  19.65    .3681 3.442 1e-05 52.92 85.91 31.23      .4087   
    total_train_updates  tpb  tps   ups  
                  10867 5881 9548 1.624

21:42:34 | time:4956s total_exs:182460 epochs:0.24
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6129 15707 31.14  316             16384  20.16    .3681 3.637 1e-05 52.46 134.4 37.98      .3915   
    total_train_updates  tpb   tps   ups  
                  10893 6182 15842 2.563

21:42:44 | time:4967s total_exs:182684 epochs:0.24
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6147 10952 22.17  224             16384  19.05    .3681 3.543 1e-05 64.22 114.4 34.56      .3746   
    total_train_upd



21:43:29 | time:5012s total_exs:183916 epochs:0.24
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5976 10340 21.83  328             16384   19.3    .3681 3.584 1e-05 58.27 100.8 36.02      .3848   
    total_train_updates  tpb   tps  ups  
                  11011 6034 10441 1.73

21:43:40 | time:5023s total_exs:184264 epochs:0.24
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6058 15612 33.21  348             16384   19.1    .4031 3.529 1e-05  60.3 155.4 34.09      .3870   
    total_train_updates  tpb   tps   ups  
                  11038 6119 15767 2.577

21:43:50 | time:5033s total_exs:184480 epochs:0.24
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6004 10201 21.59  216             16384  20.19    .3681 3.577 1e-05 58.59 99.54 35.75      .3835   
    total_train_upd



21:44:36 | time:5079s total_exs:185696 epochs:0.25
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6116  9381 18.94  284             16384  20.43    .3762 3.459 1e-05 61.48 94.29 31.78      .3876   
    total_train_updates  tpb  tps   ups  
                  11153 6178 9475 1.534

21:44:46 | time:5089s total_exs:186024 epochs:0.25
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5942 15501 32.91  328             16384  19.98    .3762 3.762 1e-05 63.04 164.4 43.04      .3569   
    total_train_updates  tpb   tps   ups  
                  11179 6005 15666 2.609

21:44:56 | time:5099s total_exs:186244 epochs:0.25
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6201 10980 21.64  220             16384  20.89    .4094 3.375 1e-05  49.5 87.65 29.21      .4063   
    total_train_upd



21:45:42 | time:5145s total_exs:187520 epochs:0.25
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6146  9882 19.56  292             16384  20.46    .3762 3.568 1e-05 62.17 99.95 35.45      .3747   
    total_train_updates  tpb  tps   ups  
                  11294 6209 9982 1.608

21:45:52 | time:5155s total_exs:187840 epochs:0.25
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6131 15583 31.28  320             16384  20.17    .3715 3.716 1e-05 63.12 160.4 41.08      .3601   
    total_train_updates  tpb   tps   ups  
                  11320 6194 15743 2.543

21:46:03 | time:5165s total_exs:188080 epochs:0.25
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps  ppl  token_acc  \
       1  5892 11029 23.64  240             16384  19.62    .3762 3.747 1e-05 66.16 123.8 42.4      .3580   
    total_train_updat



21:46:48 | time:5211s total_exs:189228 epochs:0.25
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6101  9782 19.52  280             16384  20.03    .3681 3.316 1e-05 49.43 79.27 27.54      .4230   
    total_train_updates  tpb  tps   ups  
                  11432 6150 9862 1.604

21:46:58 | time:5221s total_exs:189592 epochs:0.25
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5814 15378 35.66  364             16384  19.39    .3681 3.484 1e-05 56.93 150.6 32.58      .4034   
    total_train_updates  tpb   tps   ups  
                  11459 5871 15529 2.645

21:47:09 | time:5231s total_exs:189772 epochs:0.25
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6144  9200 17.97  180             16384  20.38    .3682 3.435 1e-05 49.27 73.77 31.05      .3884   
    total_train_upd



21:47:54 | time:5277s total_exs:190996 epochs:0.25
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6144  9888 19.31  288             16384  19.87    .3682 3.641 1e-05 58.21 93.68 38.12      .3822   
    total_train_updates  tpb  tps  ups  
                  11572 6202 9982 1.61

21:48:05 | time:5287s total_exs:191312 epochs:0.25
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6129 15394 30.52  316             16384  20.49    .3762 3.559 1e-05 55.04 138.2 35.13      .3781   
    total_train_updates  tpb   tps   ups  
                  11598 6184 15532 2.512

21:48:15 | time:5297s total_exs:191588 epochs:0.25
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6144 14026 27.39  276             16384  19.43    .3682 3.528 1e-05 56.61 129.2 34.04      .3955   
    total_train_updat



21:49:00 | time:5343s total_exs:192740 epochs:0.26
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps  ppl  token_acc  \
       1  6119  9367 18.37  276             16384  19.99    .3762 3.786 1e-05 65.04 99.57 44.1      .3529   
    total_train_updates  tpb  tps   ups  
                  11715 6184 9466 1.531

21:49:11 | time:5353s total_exs:193064 epochs:0.26
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps  ppl  token_acc  \
       1  6136 16178 31.64  324             16384  19.95    .3682 3.696 1e-05 61.41 161.9 40.3      .3685   
    total_train_updates  tpb   tps   ups  
                  11742 6197 16340 2.637

21:49:21 | time:5363s total_exs:193288 epochs:0.26
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5933 10094 22.42  224             16384  19.48    .3682 3.429 1e-05 56.94 96.87 30.83      .4153   
    total_train_updates



21:50:07 | time:5409s total_exs:194548 epochs:0.26
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5998  9661  20.4  304             16384  19.55    .3682 3.571 1e-05 54.92 88.46 35.54      .3938   
    total_train_updates  tpb  tps   ups  
                  11856 6053 9750 1.611

21:50:17 | time:5419s total_exs:194900 epochs:0.26
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5906 15384 35.27  352             16384  19.92    .3682 3.719 1e-05 63.38 165.1 41.22      .3750   
    total_train_updates  tpb   tps   ups  
                  11882 5969 15549 2.605

21:50:27 | time:5430s total_exs:195160 epochs:0.26
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5719 10082 25.46  260             16384  18.92    .3689 3.499 1e-05 76.44 134.8 33.09      .3866   
    total_train_upd



21:51:13 | time:5475s total_exs:196408 epochs:0.26
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5937 10059 22.42  344             16384  18.86    .3682 3.748 1e-05 78.46 132.9 42.42      .3525   
    total_train_updates  tpb   tps   ups  
                  11998 6015 10192 1.694

21:51:23 | time:5486s total_exs:196772 epochs:0.26
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5880 15226 34.91  364             16384  18.75    .3682 3.583 1e-05 68.63 177.7 35.97      .3697   
    total_train_updates  tpb   tps  ups  
                  12025 5948 15404 2.59

21:51:33 | time:5496s total_exs:197052 epochs:0.26
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5743 10242 27.74  280             16384  17.81    .4032 3.359 1e-05 68.83 122.8 28.76      .4003   
    total_train_upd



21:52:19 | time:5541s total_exs:198336 epochs:0.26
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6000 10105 21.29  316             16384  20.14    .3682 3.435 1e-05 54.44 91.68 31.03      .4129   
    total_train_updates  tpb   tps   ups  
                  12140 6054 10197 1.684

21:52:29 | time:5552s total_exs:198672 epochs:0.26
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6082 15319 32.55  336             16384  19.95    .3682 3.514 1e-05 63.46 159.8 33.59      .3903   
    total_train_updates  tpb   tps   ups  
                  12166 6145 15479 2.519

21:52:39 | time:5562s total_exs:198888 epochs:0.26
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6144 10869 21.23  216             16384  19.73    .3762 3.959 1e-05 74.33 131.5 52.42      .3438   
    total_train_u



21:53:25 | time:5608s total_exs:200124 epochs:0.27
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6176  9902 20.58  308             16384  19.06    .3944 3.509 1e-05 60.79 97.47 33.42      .3859   
    total_train_updates  tpb   tps   ups  
                  12278 6237 10000 1.603

21:53:35 | time:5618s total_exs:200468 epochs:0.27
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5951 15054 33.47  344             16384  19.78    .3682 3.473 1e-05 59.73 151.1 32.22      .3928   
    total_train_updates  tpb   tps  ups  
                  12304 6011 15205 2.53

21:53:45 | time:5628s total_exs:200688 epochs:0.27
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps  ppl  token_acc  \
       1  5888  9953 21.87  220             16384  19.78    .3682 3.511 1e-05 56.94 96.24 33.5      .3926   
    total_train_updat



21:54:31 | time:5674s total_exs:201916 epochs:0.27
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6144  9570 18.69  276             16384  19.63    .3682 3.385 1e-05 50.04 77.95 29.53      .4075   
    total_train_updates  tpb  tps   ups  
                  12417 6194 9648 1.558

21:54:42 | time:5684s total_exs:202240 epochs:0.27
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6144 15930 31.11  324             16384   20.1    .3682 3.705 1e-05 61.04 158.2 40.65      .3592   
    total_train_updates  tpb   tps   ups  
                  12444 6205 16088 2.593

21:54:52 | time:5695s total_exs:202436 epochs:0.27
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6123  9503 19.01  196             16384  19.61    .3682 3.547 1e-05 60.38 93.71 34.69      .3758   
    total_train_upd



21:55:37 | time:5740s total_exs:203664 epochs:0.27
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5911  9606  21.2  300             16384   19.6    .3682 3.697 1e-05 57.13 92.85 40.34      .3676   
    total_train_updates  tpb  tps   ups  
                  12556 5968 9699 1.625

21:55:47 | time:5750s total_exs:203988 epochs:0.27
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6144 16165 31.57  324             16384  19.83    .3682 3.515 1e-05 58.04 152.7 33.61      .3778   
    total_train_updates  tpb   tps   ups  
                  12583 6202 16318 2.631

21:55:58 | time:5760s total_exs:204228 epochs:0.27
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5822  9796 23.75  240             16384  19.52    .3682 3.385 1e-05 57.41 96.59 29.52      .4098   
    total_train_upd



21:56:44 | time:5806s total_exs:205408 epochs:0.27
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5956  9414 20.02  304             16384  18.95    .3763 3.518 1e-05 64.83 102.5 33.72      .3907   
    total_train_updates  tpb  tps   ups  
                  12696 6021 9517 1.581

21:56:54 | time:5817s total_exs:205744 epochs:0.27
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6142 15968 32.35  336             16384  19.48    .3682 3.619 1e-05 61.48 159.8 37.28      .3783   
    total_train_updates  tpb   tps  ups  
                  12723 6203 16128  2.6

21:57:04 | time:5827s total_exs:205968 epochs:0.27
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6037 10843 22.35  224             16384  19.14    .3682 3.323 1e-05 56.44 101.4 27.74      .4124   
    total_train_updat



21:57:50 | time:5872s total_exs:207180 epochs:0.27
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5824  9826 22.14  328             16384  19.94    .3683 3.615 1e-05 62.72 105.8 37.14      .3795   
    total_train_updates  tpb  tps   ups  
                  12835 5886 9932 1.687

21:58:00 | time:5883s total_exs:207508 epochs:0.27
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6120 15978 31.71  328             16384  19.61    .3682 3.601 1e-05 59.15 154.4 36.64      .3688   
    total_train_updates  tpb   tps   ups  
                  12862 6179 16132 2.611

21:58:10 | time:5893s total_exs:207776 epochs:0.28
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5850 11534 26.41  268             16384  19.26    .3763 3.583 1e-05 67.75 133.6 35.98      .3860   
    total_train_upd



21:58:56 | time:5939s total_exs:208912 epochs:0.28
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6065  9769 20.13  300             16384  19.63    .3683 3.567 1e-05 52.38 84.36 35.41      .3978   
    total_train_updates  tpb  tps   ups  
                  12972 6117 9853 1.611

21:59:06 | time:5949s total_exs:209200 epochs:0.28
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6144 14272 27.88  288             16384  20.15    .3682 3.314 1e-05 48.08 111.7 27.49      .4142   
    total_train_updates  tpb   tps   ups  
                  12996 6192 14384 2.323

21:59:17 | time:5959s total_exs:209512 epochs:0.28
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps  ppl  token_acc  \
       1  5987 14190 30.81  312             16384  18.99    .3683  3.44 1e-05 59.42 140.8 31.2      .3927   
    total_train_updat



22:00:02 | time:6005s total_exs:210700 epochs:0.28
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5745  8197 19.56  288             16384  19.34    .3683 3.061 1e-05 55.33 78.94 21.34      .4372   
    total_train_updates  tpb  tps   ups  
                  13112 5801 8276 1.427

22:00:12 | time:6015s total_exs:211020 epochs:0.28
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6104 15757 31.77  320             16384  19.72    .3683 3.412 1e-05 57.46 148.3 30.32      .3902   
    total_train_updates  tpb   tps   ups  
                  13138 6161 15906 2.582

22:00:22 | time:6025s total_exs:211256 epochs:0.28
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5940 10522 23.22  236             16384  19.74    .3683 3.559 1e-05 65.11 115.3 35.13      .3814   
    total_train_upd



22:01:08 | time:6071s total_exs:212536 epochs:0.28
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5750  9373 22.43  344             16384  19.25    .3683 3.403 1e-05 65.32 106.5 30.06      .3889   
    total_train_updates  tpb  tps  ups  
                  13253 5815 9480 1.63

22:01:19 | time:6081s total_exs:212896 epochs:0.28
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps  ppl  token_acc  \
       1  5840 14950 35.44  360             16384  19.08    .3683 3.532 1e-05 66.65 170.6 34.2      .3907   
    total_train_updates  tpb   tps  ups  
                  13279 5907 15120 2.56

22:01:29 | time:6092s total_exs:213188 epochs:0.28
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5871 11924 28.24  292             16384  19.06    .3683 3.516 1e-05 65.43 132.9 33.65      .3945   
    total_train_updates  



22:02:15 | time:6137s total_exs:214424 epochs:0.28
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6030  9612 20.45  308             16384  19.23    .3768 3.463 1e-05 59.04 94.11 31.92      .3888   
    total_train_updates  tpb  tps   ups  
                  13394 6089 9706 1.594

22:02:25 | time:6147s total_exs:214760 epochs:0.28
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5953 14737 33.27  336             16384  18.74    .3683 3.439 1e-05 66.24   164 31.15      .3859   
    total_train_updates  tpb   tps   ups  
                  13419 6019 14901 2.476

22:02:35 | time:6157s total_exs:214992 epochs:0.28
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6171 11732 23.21  232             16384  19.82    .3768  3.72 1e-05 62.32 118.5 41.25      .3691   
    total_train_upd



22:03:21 | time:6204s total_exs:216204 epochs:0.29
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6156  9948 19.91  308             16384  19.41    .3857 3.549 1e-05 57.72 93.28 34.78      .3791   
    total_train_updates  tpb   tps   ups  
                  13532 6213 10041 1.616

22:03:32 | time:6214s total_exs:216520 epochs:0.29
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps  ppl  token_acc  \
       1  6136 15220 30.14  316             16384   19.6    .3857 3.463 1e-05 52.35 129.8 31.9      .4012   
    total_train_updates  tpb   tps   ups  
                  13558 6188 15349 2.481

22:03:42 | time:6225s total_exs:216752 epochs:0.29
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6205 11405 22.44  232             16384  20.12    .3998 3.698 1e-05 60.32 110.8 40.38      .3534   
    total_train_upd



22:04:27 | time:6270s total_exs:217988 epochs:0.29
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6008  9719 21.94  312             16384  17.97    .4033  3.62 1e-05 73.65 119.1 37.35      .3589   
    total_train_updates  tpb  tps   ups  
                  13668 6082 9838 1.618

22:04:37 | time:6280s total_exs:218304 epochs:0.29
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5776 12905  30.7  316             16384  18.82    .3683 3.403 1e-05 57.78 129.1 30.05      .4003   
    total_train_updates  tpb   tps   ups  
                  13691 5833 13034 2.235

22:04:47 | time:6290s total_exs:218592 epochs:0.29
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6144 14816 28.94  288             16384  18.92    .3683 3.794 1e-05 73.12 176.3 44.42      .3499   
    total_train_upd



22:05:33 | time:6335s total_exs:219836 epochs:0.29
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
   .9565  5909  9331 20.87  304             29206  18.11    .3683 3.295 1e-05 58.48 92.35 26.99      .4104   
    total_train_updates  tpb  tps   ups  
                  13807 5967 9423 1.579

22:05:43 | time:6346s total_exs:220180 epochs:0.29
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6013 15439 32.71  344             16384  20.06    .3683 3.545 1e-05 58.78 150.9 34.64      .3825   
    total_train_updates  tpb   tps   ups  
                  13834 6072 15590 2.568

22:05:54 | time:6356s total_exs:220440 epochs:0.29
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5741 10005 25.17  260             16384  18.73    .3945  3.45 1e-05 62.94 109.7 31.51      .3954   
    total_train_upd



22:06:39 | time:6402s total_exs:221664 epochs:0.29
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5983  9563 20.29  292             16384  19.58    .3764 3.627 1e-05  65.3 104.4 37.59      .3722   
    total_train_updates  tpb  tps   ups  
                  13944 6048 9667 1.598

22:06:49 | time:6412s total_exs:222024 epochs:0.29
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6063 15705 34.54  360             16384  19.25    .3683 3.389 1e-05 55.22   143 29.64      .4199   
    total_train_updates  tpb   tps   ups  
                  13971 6118 15848 2.591

22:07:00 | time:6422s total_exs:222216 epochs:0.29
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6144  9608 18.77  192             16384  19.53    .3683 3.661 1e-05 63.94 99.99 38.88      .3705   
    total_train_upd



22:07:45 | time:6468s total_exs:223484 epochs:0.30
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6141  9747  19.6  284             16384  19.22    .3764  3.47 1e-05 60.09 95.37 32.14      .3821   
    total_train_updates  tpb  tps   ups  
                  14084 6201 9843 1.587

22:07:56 | time:6478s total_exs:223844 epochs:0.30
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6003 15512 34.45  360             16384  18.46    .3683 3.507 1e-05 62.63 161.8 33.34      .3909   
    total_train_updates  tpb   tps   ups  
                  14111 6065 15674 2.585

22:08:06 | time:6488s total_exs:224072 epochs:0.30
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6144 11680 22.81  228             16384  18.87    .3683 3.517 1e-05 60.89 115.8 33.67      .3941   
    total_train_upd



22:08:51 | time:6534s total_exs:225196 epochs:0.30
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6144  9456 18.47  276             16384  19.44    .3683 3.604 1e-05    59  90.8 36.76      .3788   
    total_train_updates  tpb  tps   ups  
                  14221 6203 9547 1.539

22:09:01 | time:6544s total_exs:225528 epochs:0.30
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps  ppl  token_acc  \
       1  5993 15343 32.69  332             16384  19.13    .3683 3.747 1e-05 70.46 180.4 42.4      .3543   
    total_train_updates  tpb   tps   ups  
                  14247 6063 15523 2.561

22:09:12 | time:6554s total_exs:225784 epochs:0.30
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5709 10161 25.31  256             16384  18.39    .3764 3.479 1e-05    72 128.1 32.41      .3981   
    total_train_updat



22:09:57 | time:6600s total_exs:226968 epochs:0.30
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5916  9554 21.53  320             16384   19.3    .3684 3.421 1e-05 58.46 94.41 30.61      .4056   
    total_train_updates  tpb  tps   ups  
                  14359 5975 9649 1.615

22:10:08 | time:6610s total_exs:227252 epochs:0.30
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6151 13862 27.82  284             16384  19.78    .3684 3.486 1e-05 59.35 133.7 32.66      .3890   
    total_train_updates  tpb   tps   ups  
                  14382 6211 13996 2.254

22:10:18 | time:6621s total_exs:227580 epochs:0.30
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  6006 14585 31.86  328             16384  18.85    .3684 3.395 1e-05 61.88 150.3 29.83      .3943   
    total_train_upd



22:11:03 | time:6666s total_exs:228844 epochs:0.30
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5916  8787 19.17  284             16384  20.02    .3684 3.424 1e-05    58 86.14 30.69      .4122   
    total_train_updates  tpb  tps   ups  
                  14498 5974 8873 1.485

22:11:14 | time:6677s total_exs:229224 epochs:0.30
    clip  ctpb  ctps  exps  exs  fp16_loss_scalar  gnorm  gpu_mem  loss    lr  ltpb  ltps   ppl  token_acc  \
       1  5849 15152 36.46  380             16384  18.14    .3858 3.375 1e-05 67.15   174 29.23      .4175   
    total_train_updates  tpb   tps   ups  
                  14525 5916 15326 2.591



ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2882, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-12-13afe375ff26>", line 41, in <module>
    label_turns='both',  # https://parl.ai/docs/core/teachers.html#parlai.core.teachers.ConversationTeacher
  File "/usr/local/lib/python3.7/dist-packages/parlai/core/script.py", line 108, in main
    return cls._run_kwargs(kwargs)
  File "/usr/local/lib/python3.7/dist-packages/parlai/core/script.py", line 74, in _run_kwargs
    return cls._run_from_parser_and_opt(opt, parser)
  File "/usr/local/lib/python3.7/dist-packages/parlai/core/script.py", line 89, in _run_from_parser_and_opt
    return script.run()
  File "/usr/local/lib/python3.7/dist-packages/parlai/scripts/train_model.py", line 789, in run
    return self.train_loop.train()
  File "/usr/local/lib/python3.7/dist-packages/parlai/scripts/train_model.py", line 679, in train
  

KeyboardInterrupt: ignored

In [None]:
!ls /content/drive/MyDrive/chatbot_model/