In [1]:
import wandb
import os
import numpy as np
from datasets import load_dataset
from transformers import Seq2SeqTrainingArguments as TrainingArguments
from transformers import Seq2SeqTrainer as Trainer
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import evaluate
import datetime

In [2]:
source_lang = "zh"
target_lang = "en"

def preprocess_function(examples):
    inputs = [example[source_lang] for example in examples["translation"]]
    targets = [example[target_lang] for example in examples["translation"]]
    model_inputs = tokenizer(inputs, text_target=targets, padding="max_length", max_length=32, truncation=True)
    model_inputs["labels"] = [[-100 if token == tokenizer.pad_token_id else token for token in labels] for labels in model_inputs["labels"]]
    return model_inputs

metric = evaluate.load("sacrebleu")
def postprocess_text(preds, labels):
    preds = [pred.strip() for pred in preds]
    labels = [[label.strip()] for label in labels]

    return preds, labels


def compute_metrics(eval_preds):
    preds, labels = eval_preds
    if isinstance(preds, tuple):
        preds = preds[0]
    decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)

    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    decoded_preds, decoded_labels = postprocess_text(decoded_preds, decoded_labels)

    result = metric.compute(predictions=decoded_preds, references=decoded_labels)
    result = {"bleu": result["score"]}

    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in preds]
    result["gen_len"] = np.mean(prediction_lens)
    result = {k: round(v, 4) for k, v in result.items()}
    return result

# download prepare the data
dataset = load_dataset("iwslt2017", "iwslt2017-zh-en", cache_dir="./cache") # optional
checkpoint = "google/mt5-small"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

tokenized_sentences = dataset.map(preprocess_function, batched=True)

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


In [9]:
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
model.config.max_length = 32
model.config.min_length = 8
model.config.no_repeat_ngram_size = 3
model.config.early_stopping = True
model.config.length_penalty = 2.0
model.config.num_beams = 4


dt = datetime.datetime.now(datetime.timezone.utc)
dt = dt.replace(microsecond=0, tzinfo=None)

# set the wandb project where this run will be logged
os.environ["WANDB_PROJECT"]="MT5"
# save your trained model checkpoint to wandb
os.environ["WANDB_LOG_MODEL"]="true"
# turn off watch to log faster
os.environ["WANDB_WATCH"]="false"
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:32"
os.environ["WANDB_NAME"] = str(dt)

# pass "wandb" to the 'report_to' parameter to turn on wandb logging
training_args = TrainingArguments(
    output_dir='wandb_mt5',
    report_to="wandb",
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    evaluation_strategy="steps",
    logging_steps=2000,
    eval_steps= 2000,
    max_steps = 300000,
    save_strategy="steps",
    save_steps = 5000,
    learning_rate=1e-5,
    weight_decay=0.005,
    predict_with_generate=True,
)

# define the trainer and start training
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_sentences["train"],
    eval_dataset=tokenized_sentences["validation"],
    compute_metrics=compute_metrics,
)

In [10]:
trainer.train(resume_from_checkpoint="./wandb_mt5/checkpoint-100000")

# [optional] finish the wandb run, necessary in notebooks
wandb.finish()

There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

  0%|          | 0/300000 [00:00<?, ?it/s]



  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1504266262054443, 'eval_bleu': 13.223, 'eval_gen_len': 23.8589, 'eval_runtime': 59.9964, 'eval_samples_per_second': 14.651, 'eval_steps_per_second': 0.917, 'epoch': 6.93}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.165928840637207, 'eval_bleu': 12.9764, 'eval_gen_len': 23.4448, 'eval_runtime': 75.7278, 'eval_samples_per_second': 11.607, 'eval_steps_per_second': 0.726, 'epoch': 6.95}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.192328929901123, 'eval_bleu': 12.7061, 'eval_gen_len': 23.7782, 'eval_runtime': 44.1176, 'eval_samples_per_second': 19.924, 'eval_steps_per_second': 1.247, 'epoch': 6.96}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1959340572357178, 'eval_bleu': 13.0156, 'eval_gen_len': 23.8305, 'eval_runtime': 44.849, 'eval_samples_per_second': 19.599, 'eval_steps_per_second': 1.226, 'epoch': 6.97}
{'loss': 1.8254, 'grad_norm': 1.1842527389526367, 'learning_rate': 0.0006633333333333334, 'epoch': 6.99}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1877317428588867, 'eval_bleu': 12.0581, 'eval_gen_len': 23.7395, 'eval_runtime': 44.4861, 'eval_samples_per_second': 19.759, 'eval_steps_per_second': 1.236, 'epoch': 6.99}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.186307907104492, 'eval_bleu': 12.5302, 'eval_gen_len': 23.7725, 'eval_runtime': 46.067, 'eval_samples_per_second': 19.081, 'eval_steps_per_second': 1.194, 'epoch': 7.0}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2012596130371094, 'eval_bleu': 12.1818, 'eval_gen_len': 23.8896, 'eval_runtime': 45.1159, 'eval_samples_per_second': 19.483, 'eval_steps_per_second': 1.219, 'epoch': 7.01}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2071115970611572, 'eval_bleu': 12.3895, 'eval_gen_len': 23.6985, 'eval_runtime': 46.0256, 'eval_samples_per_second': 19.098, 'eval_steps_per_second': 1.195, 'epoch': 7.03}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2166519165039062, 'eval_bleu': 12.2415, 'eval_gen_len': 23.215, 'eval_runtime': 46.289, 'eval_samples_per_second': 18.989, 'eval_steps_per_second': 1.188, 'epoch': 7.04}
{'loss': 1.8373, 'grad_norm': 1.516446590423584, 'learning_rate': 0.00066, 'epoch': 7.06}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.196681022644043, 'eval_bleu': 12.5012, 'eval_gen_len': 23.9317, 'eval_runtime': 45.2994, 'eval_samples_per_second': 19.404, 'eval_steps_per_second': 1.214, 'epoch': 7.06}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.21464204788208, 'eval_bleu': 12.5265, 'eval_gen_len': 23.7747, 'eval_runtime': 44.1486, 'eval_samples_per_second': 19.91, 'eval_steps_per_second': 1.246, 'epoch': 7.07}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2152256965637207, 'eval_bleu': 12.3077, 'eval_gen_len': 23.8237, 'eval_runtime': 46.4999, 'eval_samples_per_second': 18.903, 'eval_steps_per_second': 1.183, 'epoch': 7.08}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.206073045730591, 'eval_bleu': 12.1442, 'eval_gen_len': 24.058, 'eval_runtime': 44.8867, 'eval_samples_per_second': 19.583, 'eval_steps_per_second': 1.225, 'epoch': 7.1}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.202686309814453, 'eval_bleu': 12.3263, 'eval_gen_len': 24.0637, 'eval_runtime': 44.213, 'eval_samples_per_second': 19.881, 'eval_steps_per_second': 1.244, 'epoch': 7.11}
{'loss': 1.8553, 'grad_norm': 1.324501872062683, 'learning_rate': 0.0006566666666666666, 'epoch': 7.13}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.217543363571167, 'eval_bleu': 12.3533, 'eval_gen_len': 23.8828, 'eval_runtime': 43.5027, 'eval_samples_per_second': 20.206, 'eval_steps_per_second': 1.264, 'epoch': 7.13}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2124671936035156, 'eval_bleu': 12.5383, 'eval_gen_len': 23.8635, 'eval_runtime': 43.9514, 'eval_samples_per_second': 19.999, 'eval_steps_per_second': 1.251, 'epoch': 7.14}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.222832202911377, 'eval_bleu': 12.1605, 'eval_gen_len': 24.0239, 'eval_runtime': 44.6229, 'eval_samples_per_second': 19.698, 'eval_steps_per_second': 1.233, 'epoch': 7.15}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2046616077423096, 'eval_bleu': 12.4679, 'eval_gen_len': 24.0694, 'eval_runtime': 44.2735, 'eval_samples_per_second': 19.854, 'eval_steps_per_second': 1.242, 'epoch': 7.17}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.197195529937744, 'eval_bleu': 12.5597, 'eval_gen_len': 23.8862, 'eval_runtime': 45.0832, 'eval_samples_per_second': 19.497, 'eval_steps_per_second': 1.22, 'epoch': 7.18}
{'loss': 1.863, 'grad_norm': 1.0741591453552246, 'learning_rate': 0.0006533333333333333, 'epoch': 7.19}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.218447208404541, 'eval_bleu': 12.8397, 'eval_gen_len': 24.1684, 'eval_runtime': 45.4458, 'eval_samples_per_second': 19.342, 'eval_steps_per_second': 1.21, 'epoch': 7.19}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.217489719390869, 'eval_bleu': 12.9641, 'eval_gen_len': 23.3993, 'eval_runtime': 45.0512, 'eval_samples_per_second': 19.511, 'eval_steps_per_second': 1.221, 'epoch': 7.21}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.201611280441284, 'eval_bleu': 12.3279, 'eval_gen_len': 23.6314, 'eval_runtime': 45.557, 'eval_samples_per_second': 19.294, 'eval_steps_per_second': 1.207, 'epoch': 7.22}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.217944383621216, 'eval_bleu': 12.3215, 'eval_gen_len': 23.7736, 'eval_runtime': 45.2942, 'eval_samples_per_second': 19.406, 'eval_steps_per_second': 1.214, 'epoch': 7.24}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2151737213134766, 'eval_bleu': 12.6679, 'eval_gen_len': 23.8646, 'eval_runtime': 47.4432, 'eval_samples_per_second': 18.527, 'eval_steps_per_second': 1.159, 'epoch': 7.25}
{'loss': 1.8864, 'grad_norm': 1.229161262512207, 'learning_rate': 0.0006500000000000001, 'epoch': 7.26}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1962292194366455, 'eval_bleu': 12.7894, 'eval_gen_len': 23.7122, 'eval_runtime': 46.3328, 'eval_samples_per_second': 18.971, 'eval_steps_per_second': 1.187, 'epoch': 7.26}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2046897411346436, 'eval_bleu': 12.5104, 'eval_gen_len': 23.8737, 'eval_runtime': 45.3474, 'eval_samples_per_second': 19.384, 'eval_steps_per_second': 1.213, 'epoch': 7.28}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1956377029418945, 'eval_bleu': 12.4678, 'eval_gen_len': 23.554, 'eval_runtime': 45.0839, 'eval_samples_per_second': 19.497, 'eval_steps_per_second': 1.22, 'epoch': 7.29}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2192437648773193, 'eval_bleu': 12.696, 'eval_gen_len': 23.9898, 'eval_runtime': 45.529, 'eval_samples_per_second': 19.306, 'eval_steps_per_second': 1.208, 'epoch': 7.31}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2186923027038574, 'eval_bleu': 12.4846, 'eval_gen_len': 24.0887, 'eval_runtime': 45.6904, 'eval_samples_per_second': 19.238, 'eval_steps_per_second': 1.204, 'epoch': 7.32}
{'loss': 1.89, 'grad_norm': 1.1173967123031616, 'learning_rate': 0.0006466666666666666, 'epoch': 7.33}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2051126956939697, 'eval_bleu': 12.7778, 'eval_gen_len': 23.8476, 'eval_runtime': 44.9528, 'eval_samples_per_second': 19.554, 'eval_steps_per_second': 1.224, 'epoch': 7.33}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2044777870178223, 'eval_bleu': 12.679, 'eval_gen_len': 24.1217, 'eval_runtime': 47.4357, 'eval_samples_per_second': 18.53, 'eval_steps_per_second': 1.159, 'epoch': 7.35}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2014734745025635, 'eval_bleu': 12.8466, 'eval_gen_len': 23.7804, 'eval_runtime': 44.5969, 'eval_samples_per_second': 19.71, 'eval_steps_per_second': 1.233, 'epoch': 7.36}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1957802772521973, 'eval_bleu': 12.7577, 'eval_gen_len': 23.8225, 'eval_runtime': 46.9883, 'eval_samples_per_second': 18.707, 'eval_steps_per_second': 1.171, 'epoch': 7.37}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.195293664932251, 'eval_bleu': 12.5919, 'eval_gen_len': 24.1763, 'eval_runtime': 45.4387, 'eval_samples_per_second': 19.345, 'eval_steps_per_second': 1.21, 'epoch': 7.39}
{'loss': 1.9023, 'grad_norm': 1.194529414176941, 'learning_rate': 0.0006433333333333333, 'epoch': 7.4}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.204763174057007, 'eval_bleu': 13.1388, 'eval_gen_len': 23.6507, 'eval_runtime': 44.7282, 'eval_samples_per_second': 19.652, 'eval_steps_per_second': 1.23, 'epoch': 7.4}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1978602409362793, 'eval_bleu': 12.942, 'eval_gen_len': 23.9898, 'eval_runtime': 45.8839, 'eval_samples_per_second': 19.157, 'eval_steps_per_second': 1.199, 'epoch': 7.42}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2034618854522705, 'eval_bleu': 12.6955, 'eval_gen_len': 24.0421, 'eval_runtime': 44.0156, 'eval_samples_per_second': 19.97, 'eval_steps_per_second': 1.25, 'epoch': 7.43}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.198448657989502, 'eval_bleu': 12.9142, 'eval_gen_len': 23.6792, 'eval_runtime': 45.7531, 'eval_samples_per_second': 19.212, 'eval_steps_per_second': 1.202, 'epoch': 7.44}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2019002437591553, 'eval_bleu': 12.7183, 'eval_gen_len': 24.2025, 'eval_runtime': 44.6392, 'eval_samples_per_second': 19.691, 'eval_steps_per_second': 1.232, 'epoch': 7.46}
{'loss': 1.9069, 'grad_norm': 1.3244589567184448, 'learning_rate': 0.00064, 'epoch': 7.47}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2048282623291016, 'eval_bleu': 12.7994, 'eval_gen_len': 23.9272, 'eval_runtime': 45.9588, 'eval_samples_per_second': 19.126, 'eval_steps_per_second': 1.197, 'epoch': 7.47}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1937057971954346, 'eval_bleu': 12.6832, 'eval_gen_len': 23.6177, 'eval_runtime': 46.8619, 'eval_samples_per_second': 18.757, 'eval_steps_per_second': 1.174, 'epoch': 7.49}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2041683197021484, 'eval_bleu': 12.422, 'eval_gen_len': 24.0819, 'eval_runtime': 44.7518, 'eval_samples_per_second': 19.642, 'eval_steps_per_second': 1.229, 'epoch': 7.5}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2008376121520996, 'eval_bleu': 13.2527, 'eval_gen_len': 23.9909, 'eval_runtime': 45.9103, 'eval_samples_per_second': 19.146, 'eval_steps_per_second': 1.198, 'epoch': 7.51}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.187385082244873, 'eval_bleu': 12.7148, 'eval_gen_len': 24.215, 'eval_runtime': 43.3499, 'eval_samples_per_second': 20.277, 'eval_steps_per_second': 1.269, 'epoch': 7.53}
{'loss': 1.8988, 'grad_norm': 1.0979771614074707, 'learning_rate': 0.0006366666666666667, 'epoch': 7.54}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1891679763793945, 'eval_bleu': 12.6669, 'eval_gen_len': 23.9852, 'eval_runtime': 44.5101, 'eval_samples_per_second': 19.748, 'eval_steps_per_second': 1.236, 'epoch': 7.54}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1927263736724854, 'eval_bleu': 12.8393, 'eval_gen_len': 23.9397, 'eval_runtime': 45.8305, 'eval_samples_per_second': 19.179, 'eval_steps_per_second': 1.2, 'epoch': 7.55}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.188312292098999, 'eval_bleu': 12.891, 'eval_gen_len': 23.7497, 'eval_runtime': 89.0222, 'eval_samples_per_second': 9.874, 'eval_steps_per_second': 0.618, 'epoch': 7.57}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1845996379852295, 'eval_bleu': 12.5747, 'eval_gen_len': 23.9761, 'eval_runtime': 88.5994, 'eval_samples_per_second': 9.921, 'eval_steps_per_second': 0.621, 'epoch': 7.58}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2079355716705322, 'eval_bleu': 12.5529, 'eval_gen_len': 23.8134, 'eval_runtime': 87.6612, 'eval_samples_per_second': 10.027, 'eval_steps_per_second': 0.627, 'epoch': 7.6}
{'loss': 1.9017, 'grad_norm': 1.2584043741226196, 'learning_rate': 0.0006333333333333333, 'epoch': 7.61}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.193101167678833, 'eval_bleu': 13.0025, 'eval_gen_len': 23.9067, 'eval_runtime': 89.1782, 'eval_samples_per_second': 9.857, 'eval_steps_per_second': 0.617, 'epoch': 7.61}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.204484224319458, 'eval_bleu': 12.8093, 'eval_gen_len': 23.8658, 'eval_runtime': 87.7672, 'eval_samples_per_second': 10.015, 'eval_steps_per_second': 0.627, 'epoch': 7.62}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2226123809814453, 'eval_bleu': 12.7897, 'eval_gen_len': 23.8749, 'eval_runtime': 89.2698, 'eval_samples_per_second': 9.847, 'eval_steps_per_second': 0.616, 'epoch': 7.64}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1969563961029053, 'eval_bleu': 12.8773, 'eval_gen_len': 23.6257, 'eval_runtime': 88.1155, 'eval_samples_per_second': 9.976, 'eval_steps_per_second': 0.624, 'epoch': 7.65}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1949119567871094, 'eval_bleu': 12.207, 'eval_gen_len': 24.165, 'eval_runtime': 88.1357, 'eval_samples_per_second': 9.973, 'eval_steps_per_second': 0.624, 'epoch': 7.67}
{'loss': 1.9034, 'grad_norm': 1.2199145555496216, 'learning_rate': 0.00063, 'epoch': 7.68}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.204676866531372, 'eval_bleu': 12.2216, 'eval_gen_len': 24.0353, 'eval_runtime': 87.848, 'eval_samples_per_second': 10.006, 'eval_steps_per_second': 0.626, 'epoch': 7.68}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1761767864227295, 'eval_bleu': 12.6456, 'eval_gen_len': 24.1024, 'eval_runtime': 87.9212, 'eval_samples_per_second': 9.998, 'eval_steps_per_second': 0.626, 'epoch': 7.69}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1884443759918213, 'eval_bleu': 12.7353, 'eval_gen_len': 23.9317, 'eval_runtime': 88.1272, 'eval_samples_per_second': 9.974, 'eval_steps_per_second': 0.624, 'epoch': 7.71}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.200662851333618, 'eval_bleu': 13.0985, 'eval_gen_len': 23.9716, 'eval_runtime': 87.5664, 'eval_samples_per_second': 10.038, 'eval_steps_per_second': 0.628, 'epoch': 7.72}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1798245906829834, 'eval_bleu': 12.8636, 'eval_gen_len': 24.0432, 'eval_runtime': 88.7623, 'eval_samples_per_second': 9.903, 'eval_steps_per_second': 0.62, 'epoch': 7.73}
{'loss': 1.8967, 'grad_norm': 1.1623622179031372, 'learning_rate': 0.0006266666666666668, 'epoch': 7.75}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.182363986968994, 'eval_bleu': 12.9675, 'eval_gen_len': 23.9192, 'eval_runtime': 87.9202, 'eval_samples_per_second': 9.998, 'eval_steps_per_second': 0.626, 'epoch': 7.75}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1951193809509277, 'eval_bleu': 12.9253, 'eval_gen_len': 23.975, 'eval_runtime': 88.3764, 'eval_samples_per_second': 9.946, 'eval_steps_per_second': 0.622, 'epoch': 7.76}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1939914226531982, 'eval_bleu': 12.8714, 'eval_gen_len': 23.9841, 'eval_runtime': 88.8662, 'eval_samples_per_second': 9.891, 'eval_steps_per_second': 0.619, 'epoch': 7.78}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.190626859664917, 'eval_bleu': 12.8473, 'eval_gen_len': 23.818, 'eval_runtime': 87.5942, 'eval_samples_per_second': 10.035, 'eval_steps_per_second': 0.628, 'epoch': 7.79}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1893720626831055, 'eval_bleu': 12.8543, 'eval_gen_len': 24.0046, 'eval_runtime': 88.0605, 'eval_samples_per_second': 9.982, 'eval_steps_per_second': 0.625, 'epoch': 7.8}
{'loss': 1.9077, 'grad_norm': 1.3289134502410889, 'learning_rate': 0.0006233333333333333, 'epoch': 7.82}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1920042037963867, 'eval_bleu': 13.0175, 'eval_gen_len': 23.7497, 'eval_runtime': 88.3759, 'eval_samples_per_second': 9.946, 'eval_steps_per_second': 0.622, 'epoch': 7.82}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1780126094818115, 'eval_bleu': 12.8634, 'eval_gen_len': 24.1092, 'eval_runtime': 88.625, 'eval_samples_per_second': 9.918, 'eval_steps_per_second': 0.621, 'epoch': 7.83}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.169506788253784, 'eval_bleu': 12.5177, 'eval_gen_len': 24.1092, 'eval_runtime': 89.2574, 'eval_samples_per_second': 9.848, 'eval_steps_per_second': 0.616, 'epoch': 7.85}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1805026531219482, 'eval_bleu': 13.01, 'eval_gen_len': 23.9704, 'eval_runtime': 89.3535, 'eval_samples_per_second': 9.837, 'eval_steps_per_second': 0.616, 'epoch': 7.86}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.190020799636841, 'eval_bleu': 12.5785, 'eval_gen_len': 24.0637, 'eval_runtime': 90.959, 'eval_samples_per_second': 9.664, 'eval_steps_per_second': 0.605, 'epoch': 7.87}
{'loss': 1.9019, 'grad_norm': 1.055449366569519, 'learning_rate': 0.00062, 'epoch': 7.89}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1839826107025146, 'eval_bleu': 13.2552, 'eval_gen_len': 23.9158, 'eval_runtime': 87.5841, 'eval_samples_per_second': 10.036, 'eval_steps_per_second': 0.628, 'epoch': 7.89}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1704301834106445, 'eval_bleu': 12.8167, 'eval_gen_len': 24.0592, 'eval_runtime': 88.495, 'eval_samples_per_second': 9.933, 'eval_steps_per_second': 0.622, 'epoch': 7.9}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1797397136688232, 'eval_bleu': 12.7962, 'eval_gen_len': 23.9693, 'eval_runtime': 88.2951, 'eval_samples_per_second': 9.955, 'eval_steps_per_second': 0.623, 'epoch': 7.91}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1890993118286133, 'eval_bleu': 13.2197, 'eval_gen_len': 23.9738, 'eval_runtime': 88.6641, 'eval_samples_per_second': 9.914, 'eval_steps_per_second': 0.62, 'epoch': 7.93}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.193145513534546, 'eval_bleu': 13.1292, 'eval_gen_len': 24.0808, 'eval_runtime': 89.2151, 'eval_samples_per_second': 9.853, 'eval_steps_per_second': 0.616, 'epoch': 7.94}
{'loss': 1.898, 'grad_norm': 1.1108272075653076, 'learning_rate': 0.0006166666666666667, 'epoch': 7.96}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.177680015563965, 'eval_bleu': 12.8281, 'eval_gen_len': 24.1456, 'eval_runtime': 88.4903, 'eval_samples_per_second': 9.933, 'eval_steps_per_second': 0.622, 'epoch': 7.96}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1907401084899902, 'eval_bleu': 12.8038, 'eval_gen_len': 23.967, 'eval_runtime': 87.8691, 'eval_samples_per_second': 10.004, 'eval_steps_per_second': 0.626, 'epoch': 7.97}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.180034637451172, 'eval_bleu': 13.234, 'eval_gen_len': 23.8714, 'eval_runtime': 88.5311, 'eval_samples_per_second': 9.929, 'eval_steps_per_second': 0.621, 'epoch': 7.98}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.178515911102295, 'eval_bleu': 12.8833, 'eval_gen_len': 23.8885, 'eval_runtime': 88.3429, 'eval_samples_per_second': 9.95, 'eval_steps_per_second': 0.623, 'epoch': 8.0}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.183622360229492, 'eval_bleu': 13.3678, 'eval_gen_len': 23.7691, 'eval_runtime': 88.4499, 'eval_samples_per_second': 9.938, 'eval_steps_per_second': 0.622, 'epoch': 8.01}
{'loss': 1.8378, 'grad_norm': 1.4863662719726562, 'learning_rate': 0.0006133333333333334, 'epoch': 8.02}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.192103624343872, 'eval_bleu': 13.4509, 'eval_gen_len': 23.7941, 'eval_runtime': 88.4212, 'eval_samples_per_second': 9.941, 'eval_steps_per_second': 0.622, 'epoch': 8.02}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.18326997756958, 'eval_bleu': 13.3499, 'eval_gen_len': 23.7258, 'eval_runtime': 88.0638, 'eval_samples_per_second': 9.981, 'eval_steps_per_second': 0.625, 'epoch': 8.04}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.194843292236328, 'eval_bleu': 13.3972, 'eval_gen_len': 23.876, 'eval_runtime': 88.8875, 'eval_samples_per_second': 9.889, 'eval_steps_per_second': 0.619, 'epoch': 8.05}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1862714290618896, 'eval_bleu': 12.9243, 'eval_gen_len': 24.0853, 'eval_runtime': 89.028, 'eval_samples_per_second': 9.873, 'eval_steps_per_second': 0.618, 'epoch': 8.07}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1832447052001953, 'eval_bleu': 12.7399, 'eval_gen_len': 24.1479, 'eval_runtime': 88.999, 'eval_samples_per_second': 9.877, 'eval_steps_per_second': 0.618, 'epoch': 8.08}
{'loss': 1.7703, 'grad_norm': 1.1195236444473267, 'learning_rate': 0.00061, 'epoch': 8.09}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.197089910507202, 'eval_bleu': 12.7717, 'eval_gen_len': 23.8009, 'eval_runtime': 88.4499, 'eval_samples_per_second': 9.938, 'eval_steps_per_second': 0.622, 'epoch': 8.09}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1923086643218994, 'eval_bleu': 13.357, 'eval_gen_len': 23.6325, 'eval_runtime': 88.4608, 'eval_samples_per_second': 9.937, 'eval_steps_per_second': 0.622, 'epoch': 8.11}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.191666603088379, 'eval_bleu': 13.2097, 'eval_gen_len': 23.785, 'eval_runtime': 88.2761, 'eval_samples_per_second': 9.957, 'eval_steps_per_second': 0.623, 'epoch': 8.12}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1952173709869385, 'eval_bleu': 13.2256, 'eval_gen_len': 23.7736, 'eval_runtime': 88.4271, 'eval_samples_per_second': 9.94, 'eval_steps_per_second': 0.622, 'epoch': 8.14}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.181628704071045, 'eval_bleu': 13.554, 'eval_gen_len': 23.7076, 'eval_runtime': 91.3875, 'eval_samples_per_second': 9.618, 'eval_steps_per_second': 0.602, 'epoch': 8.15}
{'loss': 1.7732, 'grad_norm': 1.1469320058822632, 'learning_rate': 0.0006066666666666667, 'epoch': 8.16}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2000043392181396, 'eval_bleu': 13.0246, 'eval_gen_len': 23.711, 'eval_runtime': 88.382, 'eval_samples_per_second': 9.945, 'eval_steps_per_second': 0.622, 'epoch': 8.16}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1832308769226074, 'eval_bleu': 12.8801, 'eval_gen_len': 24.0569, 'eval_runtime': 89.1412, 'eval_samples_per_second': 9.861, 'eval_steps_per_second': 0.617, 'epoch': 8.18}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.179612636566162, 'eval_bleu': 12.7131, 'eval_gen_len': 23.9613, 'eval_runtime': 88.6472, 'eval_samples_per_second': 9.916, 'eval_steps_per_second': 0.62, 'epoch': 8.19}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.191411018371582, 'eval_bleu': 12.677, 'eval_gen_len': 23.9647, 'eval_runtime': 88.5529, 'eval_samples_per_second': 9.926, 'eval_steps_per_second': 0.621, 'epoch': 8.2}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1937122344970703, 'eval_bleu': 13.1875, 'eval_gen_len': 23.7099, 'eval_runtime': 88.8371, 'eval_samples_per_second': 9.895, 'eval_steps_per_second': 0.619, 'epoch': 8.22}
{'loss': 1.7774, 'grad_norm': 1.4353588819503784, 'learning_rate': 0.0006033333333333334, 'epoch': 8.23}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1755530834198, 'eval_bleu': 13.3619, 'eval_gen_len': 24.1354, 'eval_runtime': 88.8407, 'eval_samples_per_second': 9.894, 'eval_steps_per_second': 0.619, 'epoch': 8.23}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.180590867996216, 'eval_bleu': 13.2591, 'eval_gen_len': 24.0239, 'eval_runtime': 88.319, 'eval_samples_per_second': 9.953, 'eval_steps_per_second': 0.623, 'epoch': 8.25}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1748604774475098, 'eval_bleu': 12.9753, 'eval_gen_len': 23.8931, 'eval_runtime': 88.1381, 'eval_samples_per_second': 9.973, 'eval_steps_per_second': 0.624, 'epoch': 8.26}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1812281608581543, 'eval_bleu': 13.5192, 'eval_gen_len': 23.7759, 'eval_runtime': 94.8586, 'eval_samples_per_second': 9.266, 'eval_steps_per_second': 0.58, 'epoch': 8.27}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1911416053771973, 'eval_bleu': 13.2295, 'eval_gen_len': 23.8134, 'eval_runtime': 89.6638, 'eval_samples_per_second': 9.803, 'eval_steps_per_second': 0.613, 'epoch': 8.29}
{'loss': 1.7924, 'grad_norm': 1.1201565265655518, 'learning_rate': 0.0006, 'epoch': 8.3}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.178586006164551, 'eval_bleu': 13.1497, 'eval_gen_len': 23.9261, 'eval_runtime': 88.2788, 'eval_samples_per_second': 9.957, 'eval_steps_per_second': 0.623, 'epoch': 8.3}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.17160701751709, 'eval_bleu': 13.268, 'eval_gen_len': 24.0091, 'eval_runtime': 88.4876, 'eval_samples_per_second': 9.934, 'eval_steps_per_second': 0.622, 'epoch': 8.32}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1648576259613037, 'eval_bleu': 12.8679, 'eval_gen_len': 24.0705, 'eval_runtime': 88.7079, 'eval_samples_per_second': 9.909, 'eval_steps_per_second': 0.62, 'epoch': 8.33}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.180579423904419, 'eval_bleu': 13.3332, 'eval_gen_len': 23.719, 'eval_runtime': 87.6806, 'eval_samples_per_second': 10.025, 'eval_steps_per_second': 0.627, 'epoch': 8.34}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.176403760910034, 'eval_bleu': 13.5698, 'eval_gen_len': 23.9863, 'eval_runtime': 89.4175, 'eval_samples_per_second': 9.83, 'eval_steps_per_second': 0.615, 'epoch': 8.36}
{'loss': 1.7888, 'grad_norm': 1.0543534755706787, 'learning_rate': 0.0005966666666666667, 'epoch': 8.37}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1704864501953125, 'eval_bleu': 13.3229, 'eval_gen_len': 23.9067, 'eval_runtime': 88.1438, 'eval_samples_per_second': 9.972, 'eval_steps_per_second': 0.624, 'epoch': 8.37}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1662356853485107, 'eval_bleu': 13.3143, 'eval_gen_len': 23.7884, 'eval_runtime': 88.1473, 'eval_samples_per_second': 9.972, 'eval_steps_per_second': 0.624, 'epoch': 8.38}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1839935779571533, 'eval_bleu': 12.95, 'eval_gen_len': 23.5904, 'eval_runtime': 88.1053, 'eval_samples_per_second': 9.977, 'eval_steps_per_second': 0.624, 'epoch': 8.4}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.165977716445923, 'eval_bleu': 13.3601, 'eval_gen_len': 23.9033, 'eval_runtime': 88.2732, 'eval_samples_per_second': 9.958, 'eval_steps_per_second': 0.623, 'epoch': 8.41}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1685492992401123, 'eval_bleu': 13.1086, 'eval_gen_len': 23.7736, 'eval_runtime': 88.2397, 'eval_samples_per_second': 9.962, 'eval_steps_per_second': 0.623, 'epoch': 8.43}
{'loss': 1.7969, 'grad_norm': 1.1885004043579102, 'learning_rate': 0.0005933333333333334, 'epoch': 8.44}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.169006586074829, 'eval_bleu': 13.1091, 'eval_gen_len': 23.8896, 'eval_runtime': 87.9096, 'eval_samples_per_second': 9.999, 'eval_steps_per_second': 0.626, 'epoch': 8.44}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.170658826828003, 'eval_bleu': 13.0787, 'eval_gen_len': 23.8498, 'eval_runtime': 88.6256, 'eval_samples_per_second': 9.918, 'eval_steps_per_second': 0.621, 'epoch': 8.45}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.162078619003296, 'eval_bleu': 13.0302, 'eval_gen_len': 24.0091, 'eval_runtime': 88.0438, 'eval_samples_per_second': 9.984, 'eval_steps_per_second': 0.625, 'epoch': 8.47}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.173826217651367, 'eval_bleu': 12.8615, 'eval_gen_len': 23.7702, 'eval_runtime': 88.2275, 'eval_samples_per_second': 9.963, 'eval_steps_per_second': 0.623, 'epoch': 8.48}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.168449878692627, 'eval_bleu': 13.0267, 'eval_gen_len': 23.9158, 'eval_runtime': 88.3001, 'eval_samples_per_second': 9.955, 'eval_steps_per_second': 0.623, 'epoch': 8.5}
{'loss': 1.8023, 'grad_norm': 1.3921595811843872, 'learning_rate': 0.00059, 'epoch': 8.51}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1789674758911133, 'eval_bleu': 13.0308, 'eval_gen_len': 24.0193, 'eval_runtime': 88.2626, 'eval_samples_per_second': 9.959, 'eval_steps_per_second': 0.623, 'epoch': 8.51}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.173570394515991, 'eval_bleu': 13.1801, 'eval_gen_len': 23.9556, 'eval_runtime': 88.5632, 'eval_samples_per_second': 9.925, 'eval_steps_per_second': 0.621, 'epoch': 8.52}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.176147222518921, 'eval_bleu': 13.227, 'eval_gen_len': 23.562, 'eval_runtime': 89.1176, 'eval_samples_per_second': 9.863, 'eval_steps_per_second': 0.617, 'epoch': 8.54}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1742348670959473, 'eval_bleu': 13.0115, 'eval_gen_len': 23.7452, 'eval_runtime': 88.7944, 'eval_samples_per_second': 9.899, 'eval_steps_per_second': 0.619, 'epoch': 8.55}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.170694589614868, 'eval_bleu': 12.678, 'eval_gen_len': 23.876, 'eval_runtime': 88.2776, 'eval_samples_per_second': 9.957, 'eval_steps_per_second': 0.623, 'epoch': 8.56}
{'loss': 1.8086, 'grad_norm': 1.2329318523406982, 'learning_rate': 0.0005866666666666667, 'epoch': 8.58}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1725521087646484, 'eval_bleu': 12.8091, 'eval_gen_len': 24.0421, 'eval_runtime': 88.5317, 'eval_samples_per_second': 9.929, 'eval_steps_per_second': 0.621, 'epoch': 8.58}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.17716121673584, 'eval_bleu': 12.817, 'eval_gen_len': 23.9943, 'eval_runtime': 88.5638, 'eval_samples_per_second': 9.925, 'eval_steps_per_second': 0.621, 'epoch': 8.59}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.179380178451538, 'eval_bleu': 12.8058, 'eval_gen_len': 23.868, 'eval_runtime': 88.6768, 'eval_samples_per_second': 9.912, 'eval_steps_per_second': 0.62, 'epoch': 8.61}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.165086269378662, 'eval_bleu': 12.932, 'eval_gen_len': 23.8635, 'eval_runtime': 88.4321, 'eval_samples_per_second': 9.94, 'eval_steps_per_second': 0.622, 'epoch': 8.62}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.164734125137329, 'eval_bleu': 13.2916, 'eval_gen_len': 23.9113, 'eval_runtime': 88.3728, 'eval_samples_per_second': 9.947, 'eval_steps_per_second': 0.622, 'epoch': 8.63}
{'loss': 1.7944, 'grad_norm': 1.448270559310913, 'learning_rate': 0.0005833333333333334, 'epoch': 8.65}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1703810691833496, 'eval_bleu': 13.0587, 'eval_gen_len': 23.8589, 'eval_runtime': 88.036, 'eval_samples_per_second': 9.985, 'eval_steps_per_second': 0.625, 'epoch': 8.65}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.185676336288452, 'eval_bleu': 13.3249, 'eval_gen_len': 23.6849, 'eval_runtime': 88.6311, 'eval_samples_per_second': 9.918, 'eval_steps_per_second': 0.621, 'epoch': 8.66}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1739091873168945, 'eval_bleu': 13.0395, 'eval_gen_len': 23.9352, 'eval_runtime': 87.6917, 'eval_samples_per_second': 10.024, 'eval_steps_per_second': 0.627, 'epoch': 8.68}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.157632827758789, 'eval_bleu': 13.2774, 'eval_gen_len': 23.9317, 'eval_runtime': 88.2182, 'eval_samples_per_second': 9.964, 'eval_steps_per_second': 0.623, 'epoch': 8.69}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.16994571685791, 'eval_bleu': 12.6773, 'eval_gen_len': 23.9078, 'eval_runtime': 88.0631, 'eval_samples_per_second': 9.981, 'eval_steps_per_second': 0.625, 'epoch': 8.7}
{'loss': 1.8074, 'grad_norm': 1.4406707286834717, 'learning_rate': 0.00058, 'epoch': 8.72}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.168020009994507, 'eval_bleu': 13.0798, 'eval_gen_len': 23.8908, 'eval_runtime': 88.3537, 'eval_samples_per_second': 9.949, 'eval_steps_per_second': 0.622, 'epoch': 8.72}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.161839723587036, 'eval_bleu': 13.0696, 'eval_gen_len': 24.0296, 'eval_runtime': 91.0782, 'eval_samples_per_second': 9.651, 'eval_steps_per_second': 0.604, 'epoch': 8.73}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1747045516967773, 'eval_bleu': 13.0159, 'eval_gen_len': 23.9363, 'eval_runtime': 87.4404, 'eval_samples_per_second': 10.053, 'eval_steps_per_second': 0.629, 'epoch': 8.74}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1552722454071045, 'eval_bleu': 13.2529, 'eval_gen_len': 23.8931, 'eval_runtime': 88.5122, 'eval_samples_per_second': 9.931, 'eval_steps_per_second': 0.621, 'epoch': 8.76}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1524572372436523, 'eval_bleu': 12.8143, 'eval_gen_len': 23.9431, 'eval_runtime': 87.9543, 'eval_samples_per_second': 9.994, 'eval_steps_per_second': 0.625, 'epoch': 8.77}
{'loss': 1.8089, 'grad_norm': 1.1115697622299194, 'learning_rate': 0.0005766666666666666, 'epoch': 8.79}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1592578887939453, 'eval_bleu': 13.0679, 'eval_gen_len': 23.6746, 'eval_runtime': 88.8572, 'eval_samples_per_second': 9.892, 'eval_steps_per_second': 0.619, 'epoch': 8.79}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1715307235717773, 'eval_bleu': 12.5948, 'eval_gen_len': 24.0387, 'eval_runtime': 89.0131, 'eval_samples_per_second': 9.875, 'eval_steps_per_second': 0.618, 'epoch': 8.8}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.152984857559204, 'eval_bleu': 12.9487, 'eval_gen_len': 23.7998, 'eval_runtime': 87.8043, 'eval_samples_per_second': 10.011, 'eval_steps_per_second': 0.626, 'epoch': 8.81}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1542274951934814, 'eval_bleu': 13.0145, 'eval_gen_len': 23.8385, 'eval_runtime': 87.9867, 'eval_samples_per_second': 9.99, 'eval_steps_per_second': 0.625, 'epoch': 8.83}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.147449493408203, 'eval_bleu': 13.095, 'eval_gen_len': 23.9067, 'eval_runtime': 88.7683, 'eval_samples_per_second': 9.902, 'eval_steps_per_second': 0.62, 'epoch': 8.84}
{'loss': 1.8065, 'grad_norm': 1.2830193042755127, 'learning_rate': 0.0005733333333333334, 'epoch': 8.86}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1631429195404053, 'eval_bleu': 13.0098, 'eval_gen_len': 23.9431, 'eval_runtime': 88.4689, 'eval_samples_per_second': 9.936, 'eval_steps_per_second': 0.622, 'epoch': 8.86}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1543941497802734, 'eval_bleu': 13.2608, 'eval_gen_len': 23.7327, 'eval_runtime': 89.0614, 'eval_samples_per_second': 9.87, 'eval_steps_per_second': 0.618, 'epoch': 8.87}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.152772903442383, 'eval_bleu': 12.7699, 'eval_gen_len': 24.0489, 'eval_runtime': 88.8941, 'eval_samples_per_second': 9.888, 'eval_steps_per_second': 0.619, 'epoch': 8.88}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1627860069274902, 'eval_bleu': 13.0623, 'eval_gen_len': 23.7611, 'eval_runtime': 88.0076, 'eval_samples_per_second': 9.988, 'eval_steps_per_second': 0.625, 'epoch': 8.9}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.160970687866211, 'eval_bleu': 12.7759, 'eval_gen_len': 23.942, 'eval_runtime': 87.8367, 'eval_samples_per_second': 10.007, 'eval_steps_per_second': 0.626, 'epoch': 8.91}
{'loss': 1.811, 'grad_norm': 1.4578046798706055, 'learning_rate': 0.00057, 'epoch': 8.92}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.152345895767212, 'eval_bleu': 12.8152, 'eval_gen_len': 24.2014, 'eval_runtime': 89.0214, 'eval_samples_per_second': 9.874, 'eval_steps_per_second': 0.618, 'epoch': 8.92}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1559407711029053, 'eval_bleu': 12.9395, 'eval_gen_len': 23.884, 'eval_runtime': 88.6316, 'eval_samples_per_second': 9.917, 'eval_steps_per_second': 0.621, 'epoch': 8.94}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1425464153289795, 'eval_bleu': 13.2495, 'eval_gen_len': 23.8749, 'eval_runtime': 88.5352, 'eval_samples_per_second': 9.928, 'eval_steps_per_second': 0.621, 'epoch': 8.95}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1552228927612305, 'eval_bleu': 13.0861, 'eval_gen_len': 23.7418, 'eval_runtime': 88.8719, 'eval_samples_per_second': 9.891, 'eval_steps_per_second': 0.619, 'epoch': 8.97}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.162712812423706, 'eval_bleu': 13.1196, 'eval_gen_len': 23.8567, 'eval_runtime': 88.4831, 'eval_samples_per_second': 9.934, 'eval_steps_per_second': 0.622, 'epoch': 8.98}
{'loss': 1.8182, 'grad_norm': 1.0837191343307495, 'learning_rate': 0.0005666666666666667, 'epoch': 8.99}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.166912317276001, 'eval_bleu': 12.8232, 'eval_gen_len': 23.7122, 'eval_runtime': 87.9797, 'eval_samples_per_second': 9.991, 'eval_steps_per_second': 0.625, 'epoch': 8.99}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.166733980178833, 'eval_bleu': 13.0254, 'eval_gen_len': 23.7713, 'eval_runtime': 89.5583, 'eval_samples_per_second': 9.815, 'eval_steps_per_second': 0.614, 'epoch': 9.01}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.18916916847229, 'eval_bleu': 13.1052, 'eval_gen_len': 23.6246, 'eval_runtime': 87.8044, 'eval_samples_per_second': 10.011, 'eval_steps_per_second': 0.626, 'epoch': 9.02}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.179828643798828, 'eval_bleu': 12.8185, 'eval_gen_len': 23.8487, 'eval_runtime': 87.8672, 'eval_samples_per_second': 10.004, 'eval_steps_per_second': 0.626, 'epoch': 9.03}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.168231725692749, 'eval_bleu': 13.3663, 'eval_gen_len': 23.6462, 'eval_runtime': 88.1527, 'eval_samples_per_second': 9.971, 'eval_steps_per_second': 0.624, 'epoch': 9.05}
{'loss': 1.6812, 'grad_norm': 1.005563497543335, 'learning_rate': 0.0005633333333333333, 'epoch': 9.06}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1718766689300537, 'eval_bleu': 13.6726, 'eval_gen_len': 23.7713, 'eval_runtime': 87.8601, 'eval_samples_per_second': 10.005, 'eval_steps_per_second': 0.626, 'epoch': 9.06}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1752402782440186, 'eval_bleu': 12.8873, 'eval_gen_len': 23.992, 'eval_runtime': 88.57, 'eval_samples_per_second': 9.924, 'eval_steps_per_second': 0.621, 'epoch': 9.08}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1736631393432617, 'eval_bleu': 13.0558, 'eval_gen_len': 23.8282, 'eval_runtime': 89.1416, 'eval_samples_per_second': 9.861, 'eval_steps_per_second': 0.617, 'epoch': 9.09}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.173940658569336, 'eval_bleu': 12.9716, 'eval_gen_len': 24.0319, 'eval_runtime': 89.36, 'eval_samples_per_second': 9.837, 'eval_steps_per_second': 0.615, 'epoch': 9.1}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1766510009765625, 'eval_bleu': 12.6473, 'eval_gen_len': 23.8623, 'eval_runtime': 88.9671, 'eval_samples_per_second': 9.88, 'eval_steps_per_second': 0.618, 'epoch': 9.12}
{'loss': 1.6743, 'grad_norm': 1.3045783042907715, 'learning_rate': 0.0005600000000000001, 'epoch': 9.13}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.187469005584717, 'eval_bleu': 13.1675, 'eval_gen_len': 23.9033, 'eval_runtime': 88.7541, 'eval_samples_per_second': 9.904, 'eval_steps_per_second': 0.62, 'epoch': 9.13}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1742327213287354, 'eval_bleu': 13.3244, 'eval_gen_len': 23.7941, 'eval_runtime': 88.3824, 'eval_samples_per_second': 9.945, 'eval_steps_per_second': 0.622, 'epoch': 9.15}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.175508499145508, 'eval_bleu': 13.2023, 'eval_gen_len': 23.8328, 'eval_runtime': 87.9099, 'eval_samples_per_second': 9.999, 'eval_steps_per_second': 0.626, 'epoch': 9.16}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.176497220993042, 'eval_bleu': 13.787, 'eval_gen_len': 23.8567, 'eval_runtime': 88.5955, 'eval_samples_per_second': 9.921, 'eval_steps_per_second': 0.621, 'epoch': 9.17}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1702208518981934, 'eval_bleu': 13.4814, 'eval_gen_len': 23.8692, 'eval_runtime': 88.7871, 'eval_samples_per_second': 9.9, 'eval_steps_per_second': 0.619, 'epoch': 9.19}
{'loss': 1.6851, 'grad_norm': 1.2443166971206665, 'learning_rate': 0.0005566666666666667, 'epoch': 9.2}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.178849935531616, 'eval_bleu': 13.7562, 'eval_gen_len': 23.6951, 'eval_runtime': 88.7906, 'eval_samples_per_second': 9.9, 'eval_steps_per_second': 0.619, 'epoch': 9.2}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.171945571899414, 'eval_bleu': 13.0816, 'eval_gen_len': 23.8476, 'eval_runtime': 89.7317, 'eval_samples_per_second': 9.796, 'eval_steps_per_second': 0.613, 'epoch': 9.21}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.173243761062622, 'eval_bleu': 13.2379, 'eval_gen_len': 23.7224, 'eval_runtime': 87.8048, 'eval_samples_per_second': 10.011, 'eval_steps_per_second': 0.626, 'epoch': 9.23}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.171342372894287, 'eval_bleu': 13.4479, 'eval_gen_len': 23.8999, 'eval_runtime': 89.2222, 'eval_samples_per_second': 9.852, 'eval_steps_per_second': 0.616, 'epoch': 9.24}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.154907464981079, 'eval_bleu': 13.3126, 'eval_gen_len': 23.8089, 'eval_runtime': 88.3054, 'eval_samples_per_second': 9.954, 'eval_steps_per_second': 0.623, 'epoch': 9.26}
{'loss': 1.6959, 'grad_norm': 1.1442945003509521, 'learning_rate': 0.0005533333333333333, 'epoch': 9.27}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.171236753463745, 'eval_bleu': 13.351, 'eval_gen_len': 23.9056, 'eval_runtime': 89.4868, 'eval_samples_per_second': 9.823, 'eval_steps_per_second': 0.615, 'epoch': 9.27}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1581008434295654, 'eval_bleu': 13.3896, 'eval_gen_len': 24.0182, 'eval_runtime': 88.7695, 'eval_samples_per_second': 9.902, 'eval_steps_per_second': 0.62, 'epoch': 9.28}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.158454179763794, 'eval_bleu': 13.1159, 'eval_gen_len': 23.7838, 'eval_runtime': 89.9245, 'eval_samples_per_second': 9.775, 'eval_steps_per_second': 0.612, 'epoch': 9.3}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.17339825630188, 'eval_bleu': 13.3286, 'eval_gen_len': 23.6849, 'eval_runtime': 89.5197, 'eval_samples_per_second': 9.819, 'eval_steps_per_second': 0.614, 'epoch': 9.31}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.174602508544922, 'eval_bleu': 13.4107, 'eval_gen_len': 23.777, 'eval_runtime': 88.5085, 'eval_samples_per_second': 9.931, 'eval_steps_per_second': 0.621, 'epoch': 9.33}
{'loss': 1.7007, 'grad_norm': 1.1038134098052979, 'learning_rate': 0.00055, 'epoch': 9.34}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.163158893585205, 'eval_bleu': 13.7212, 'eval_gen_len': 23.9477, 'eval_runtime': 88.7071, 'eval_samples_per_second': 9.909, 'eval_steps_per_second': 0.62, 'epoch': 9.34}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1628546714782715, 'eval_bleu': 13.4648, 'eval_gen_len': 23.8965, 'eval_runtime': 88.7271, 'eval_samples_per_second': 9.907, 'eval_steps_per_second': 0.62, 'epoch': 9.35}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.159029483795166, 'eval_bleu': 13.3793, 'eval_gen_len': 23.9363, 'eval_runtime': 88.7949, 'eval_samples_per_second': 9.899, 'eval_steps_per_second': 0.619, 'epoch': 9.37}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1626877784729004, 'eval_bleu': 13.3102, 'eval_gen_len': 23.9374, 'eval_runtime': 89.8666, 'eval_samples_per_second': 9.781, 'eval_steps_per_second': 0.612, 'epoch': 9.38}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1783297061920166, 'eval_bleu': 12.9012, 'eval_gen_len': 23.9625, 'eval_runtime': 88.3323, 'eval_samples_per_second': 9.951, 'eval_steps_per_second': 0.623, 'epoch': 9.39}
{'loss': 1.7158, 'grad_norm': 1.1562845706939697, 'learning_rate': 0.0005466666666666667, 'epoch': 9.41}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1667091846466064, 'eval_bleu': 13.5952, 'eval_gen_len': 24.083, 'eval_runtime': 88.3524, 'eval_samples_per_second': 9.949, 'eval_steps_per_second': 0.623, 'epoch': 9.41}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1730360984802246, 'eval_bleu': 12.9911, 'eval_gen_len': 23.7952, 'eval_runtime': 88.4862, 'eval_samples_per_second': 9.934, 'eval_steps_per_second': 0.622, 'epoch': 9.42}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1622111797332764, 'eval_bleu': 12.8091, 'eval_gen_len': 23.5813, 'eval_runtime': 88.7322, 'eval_samples_per_second': 9.906, 'eval_steps_per_second': 0.62, 'epoch': 9.44}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1685125827789307, 'eval_bleu': 13.6716, 'eval_gen_len': 23.7645, 'eval_runtime': 89.0579, 'eval_samples_per_second': 9.87, 'eval_steps_per_second': 0.618, 'epoch': 9.45}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1615030765533447, 'eval_bleu': 13.2815, 'eval_gen_len': 23.8601, 'eval_runtime': 88.2512, 'eval_samples_per_second': 9.96, 'eval_steps_per_second': 0.623, 'epoch': 9.46}
{'loss': 1.7172, 'grad_norm': 1.1018401384353638, 'learning_rate': 0.0005433333333333334, 'epoch': 9.48}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.15913724899292, 'eval_bleu': 13.5797, 'eval_gen_len': 23.7474, 'eval_runtime': 89.2811, 'eval_samples_per_second': 9.845, 'eval_steps_per_second': 0.616, 'epoch': 9.48}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1620142459869385, 'eval_bleu': 13.2328, 'eval_gen_len': 23.678, 'eval_runtime': 88.2041, 'eval_samples_per_second': 9.966, 'eval_steps_per_second': 0.624, 'epoch': 9.49}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1572554111480713, 'eval_bleu': 13.745, 'eval_gen_len': 23.9272, 'eval_runtime': 88.7952, 'eval_samples_per_second': 9.899, 'eval_steps_per_second': 0.619, 'epoch': 9.51}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1548984050750732, 'eval_bleu': 13.5081, 'eval_gen_len': 24.1331, 'eval_runtime': 89.6908, 'eval_samples_per_second': 9.8, 'eval_steps_per_second': 0.613, 'epoch': 9.52}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1551637649536133, 'eval_bleu': 13.1999, 'eval_gen_len': 23.8987, 'eval_runtime': 88.0314, 'eval_samples_per_second': 9.985, 'eval_steps_per_second': 0.625, 'epoch': 9.53}
{'loss': 1.7125, 'grad_norm': 1.1749025583267212, 'learning_rate': 0.00054, 'epoch': 9.55}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1498987674713135, 'eval_bleu': 13.476, 'eval_gen_len': 23.7873, 'eval_runtime': 88.4814, 'eval_samples_per_second': 9.934, 'eval_steps_per_second': 0.622, 'epoch': 9.55}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.146959066390991, 'eval_bleu': 13.0211, 'eval_gen_len': 23.8828, 'eval_runtime': 89.0713, 'eval_samples_per_second': 9.868, 'eval_steps_per_second': 0.617, 'epoch': 9.56}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1540796756744385, 'eval_bleu': 13.3755, 'eval_gen_len': 23.992, 'eval_runtime': 89.2869, 'eval_samples_per_second': 9.845, 'eval_steps_per_second': 0.616, 'epoch': 9.57}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1580634117126465, 'eval_bleu': 12.9287, 'eval_gen_len': 24.0114, 'eval_runtime': 89.1598, 'eval_samples_per_second': 9.859, 'eval_steps_per_second': 0.617, 'epoch': 9.59}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.162050485610962, 'eval_bleu': 12.9685, 'eval_gen_len': 23.8134, 'eval_runtime': 88.6946, 'eval_samples_per_second': 9.91, 'eval_steps_per_second': 0.62, 'epoch': 9.6}
{'loss': 1.7213, 'grad_norm': 1.325891137123108, 'learning_rate': 0.0005366666666666666, 'epoch': 9.62}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1563737392425537, 'eval_bleu': 13.116, 'eval_gen_len': 23.8896, 'eval_runtime': 89.6888, 'eval_samples_per_second': 9.801, 'eval_steps_per_second': 0.613, 'epoch': 9.62}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.160881280899048, 'eval_bleu': 13.4912, 'eval_gen_len': 23.8203, 'eval_runtime': 88.7356, 'eval_samples_per_second': 9.906, 'eval_steps_per_second': 0.62, 'epoch': 9.63}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.140127658843994, 'eval_bleu': 13.3456, 'eval_gen_len': 23.8373, 'eval_runtime': 88.9868, 'eval_samples_per_second': 9.878, 'eval_steps_per_second': 0.618, 'epoch': 9.64}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.15169358253479, 'eval_bleu': 13.3221, 'eval_gen_len': 24.0853, 'eval_runtime': 89.7166, 'eval_samples_per_second': 9.798, 'eval_steps_per_second': 0.613, 'epoch': 9.66}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1433823108673096, 'eval_bleu': 12.8886, 'eval_gen_len': 23.9989, 'eval_runtime': 88.6657, 'eval_samples_per_second': 9.914, 'eval_steps_per_second': 0.62, 'epoch': 9.67}
{'loss': 1.7287, 'grad_norm': 1.0900052785873413, 'learning_rate': 0.0005333333333333334, 'epoch': 9.69}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1676182746887207, 'eval_bleu': 13.7237, 'eval_gen_len': 23.6883, 'eval_runtime': 88.7102, 'eval_samples_per_second': 9.909, 'eval_steps_per_second': 0.62, 'epoch': 9.69}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.153707981109619, 'eval_bleu': 13.625, 'eval_gen_len': 23.8362, 'eval_runtime': 89.542, 'eval_samples_per_second': 9.817, 'eval_steps_per_second': 0.614, 'epoch': 9.7}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1686043739318848, 'eval_bleu': 13.2376, 'eval_gen_len': 23.7486, 'eval_runtime': 88.5732, 'eval_samples_per_second': 9.924, 'eval_steps_per_second': 0.621, 'epoch': 9.71}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1644389629364014, 'eval_bleu': 13.3564, 'eval_gen_len': 23.8987, 'eval_runtime': 91.5034, 'eval_samples_per_second': 9.606, 'eval_steps_per_second': 0.601, 'epoch': 9.73}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.16599440574646, 'eval_bleu': 13.1043, 'eval_gen_len': 23.835, 'eval_runtime': 89.0639, 'eval_samples_per_second': 9.869, 'eval_steps_per_second': 0.618, 'epoch': 9.74}
{'loss': 1.7224, 'grad_norm': 1.0933908224105835, 'learning_rate': 0.0005300000000000001, 'epoch': 9.75}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1585307121276855, 'eval_bleu': 13.1236, 'eval_gen_len': 23.6906, 'eval_runtime': 89.954, 'eval_samples_per_second': 9.772, 'eval_steps_per_second': 0.611, 'epoch': 9.75}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.154254913330078, 'eval_bleu': 13.6601, 'eval_gen_len': 24.0501, 'eval_runtime': 88.4344, 'eval_samples_per_second': 9.94, 'eval_steps_per_second': 0.622, 'epoch': 9.77}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.146787643432617, 'eval_bleu': 13.1851, 'eval_gen_len': 24.0705, 'eval_runtime': 89.7844, 'eval_samples_per_second': 9.79, 'eval_steps_per_second': 0.613, 'epoch': 9.78}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.161487102508545, 'eval_bleu': 13.2868, 'eval_gen_len': 23.8976, 'eval_runtime': 88.4321, 'eval_samples_per_second': 9.94, 'eval_steps_per_second': 0.622, 'epoch': 9.8}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.157388687133789, 'eval_bleu': 13.6684, 'eval_gen_len': 23.8703, 'eval_runtime': 88.4687, 'eval_samples_per_second': 9.936, 'eval_steps_per_second': 0.622, 'epoch': 9.81}
{'loss': 1.7317, 'grad_norm': 1.1103665828704834, 'learning_rate': 0.0005266666666666666, 'epoch': 9.82}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1518337726593018, 'eval_bleu': 13.5627, 'eval_gen_len': 23.8942, 'eval_runtime': 88.899, 'eval_samples_per_second': 9.888, 'eval_steps_per_second': 0.619, 'epoch': 9.82}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.162597417831421, 'eval_bleu': 13.7806, 'eval_gen_len': 23.9374, 'eval_runtime': 88.5867, 'eval_samples_per_second': 9.922, 'eval_steps_per_second': 0.621, 'epoch': 9.84}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1417195796966553, 'eval_bleu': 13.2299, 'eval_gen_len': 24.0899, 'eval_runtime': 88.7765, 'eval_samples_per_second': 9.901, 'eval_steps_per_second': 0.62, 'epoch': 9.85}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1488900184631348, 'eval_bleu': 13.5041, 'eval_gen_len': 24.033, 'eval_runtime': 89.0233, 'eval_samples_per_second': 9.874, 'eval_steps_per_second': 0.618, 'epoch': 9.87}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1563568115234375, 'eval_bleu': 13.6291, 'eval_gen_len': 23.7531, 'eval_runtime': 90.9521, 'eval_samples_per_second': 9.664, 'eval_steps_per_second': 0.605, 'epoch': 9.88}
{'loss': 1.7208, 'grad_norm': 1.225805640220642, 'learning_rate': 0.0005233333333333333, 'epoch': 9.89}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.154838800430298, 'eval_bleu': 13.5258, 'eval_gen_len': 23.8487, 'eval_runtime': 89.1888, 'eval_samples_per_second': 9.855, 'eval_steps_per_second': 0.617, 'epoch': 9.89}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.143956184387207, 'eval_bleu': 13.6172, 'eval_gen_len': 23.8589, 'eval_runtime': 88.627, 'eval_samples_per_second': 9.918, 'eval_steps_per_second': 0.621, 'epoch': 9.91}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1409199237823486, 'eval_bleu': 13.55, 'eval_gen_len': 23.7486, 'eval_runtime': 88.7503, 'eval_samples_per_second': 9.904, 'eval_steps_per_second': 0.62, 'epoch': 9.92}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.150221109390259, 'eval_bleu': 13.938, 'eval_gen_len': 23.9534, 'eval_runtime': 88.9294, 'eval_samples_per_second': 9.884, 'eval_steps_per_second': 0.618, 'epoch': 9.93}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.144594430923462, 'eval_bleu': 13.4863, 'eval_gen_len': 23.9989, 'eval_runtime': 89.3844, 'eval_samples_per_second': 9.834, 'eval_steps_per_second': 0.615, 'epoch': 9.95}
{'loss': 1.722, 'grad_norm': 1.0636835098266602, 'learning_rate': 0.0005200000000000001, 'epoch': 9.96}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.145406484603882, 'eval_bleu': 13.3948, 'eval_gen_len': 23.9784, 'eval_runtime': 89.1397, 'eval_samples_per_second': 9.861, 'eval_steps_per_second': 0.617, 'epoch': 9.96}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1502413749694824, 'eval_bleu': 13.5743, 'eval_gen_len': 23.8567, 'eval_runtime': 88.0477, 'eval_samples_per_second': 9.983, 'eval_steps_per_second': 0.625, 'epoch': 9.98}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1446926593780518, 'eval_bleu': 13.5522, 'eval_gen_len': 23.9545, 'eval_runtime': 89.347, 'eval_samples_per_second': 9.838, 'eval_steps_per_second': 0.616, 'epoch': 9.99}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1829261779785156, 'eval_bleu': 13.3066, 'eval_gen_len': 23.8623, 'eval_runtime': 88.4766, 'eval_samples_per_second': 9.935, 'eval_steps_per_second': 0.622, 'epoch': 10.0}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.159689426422119, 'eval_bleu': 13.4511, 'eval_gen_len': 23.835, 'eval_runtime': 88.2299, 'eval_samples_per_second': 9.963, 'eval_steps_per_second': 0.623, 'epoch': 10.02}
{'loss': 1.6611, 'grad_norm': 1.489717960357666, 'learning_rate': 0.0005166666666666667, 'epoch': 10.03}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1830832958221436, 'eval_bleu': 13.3036, 'eval_gen_len': 23.9215, 'eval_runtime': 89.2804, 'eval_samples_per_second': 9.845, 'eval_steps_per_second': 0.616, 'epoch': 10.03}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1575675010681152, 'eval_bleu': 13.5185, 'eval_gen_len': 23.8168, 'eval_runtime': 88.201, 'eval_samples_per_second': 9.966, 'eval_steps_per_second': 0.624, 'epoch': 10.04}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1657538414001465, 'eval_bleu': 13.6004, 'eval_gen_len': 24.0751, 'eval_runtime': 87.9548, 'eval_samples_per_second': 9.994, 'eval_steps_per_second': 0.625, 'epoch': 10.06}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1696081161499023, 'eval_bleu': 13.7299, 'eval_gen_len': 24.0353, 'eval_runtime': 88.4649, 'eval_samples_per_second': 9.936, 'eval_steps_per_second': 0.622, 'epoch': 10.07}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.177863836288452, 'eval_bleu': 13.4595, 'eval_gen_len': 23.9363, 'eval_runtime': 89.2675, 'eval_samples_per_second': 9.847, 'eval_steps_per_second': 0.616, 'epoch': 10.09}
{'loss': 1.5905, 'grad_norm': 1.1966267824172974, 'learning_rate': 0.0005133333333333333, 'epoch': 10.1}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1772561073303223, 'eval_bleu': 13.4927, 'eval_gen_len': 23.9613, 'eval_runtime': 89.3467, 'eval_samples_per_second': 9.838, 'eval_steps_per_second': 0.616, 'epoch': 10.1}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.178518056869507, 'eval_bleu': 13.5647, 'eval_gen_len': 23.7395, 'eval_runtime': 88.9391, 'eval_samples_per_second': 9.883, 'eval_steps_per_second': 0.618, 'epoch': 10.11}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1650922298431396, 'eval_bleu': 13.3625, 'eval_gen_len': 23.8282, 'eval_runtime': 88.5217, 'eval_samples_per_second': 9.93, 'eval_steps_per_second': 0.621, 'epoch': 10.13}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1800854206085205, 'eval_bleu': 13.5354, 'eval_gen_len': 23.5233, 'eval_runtime': 88.1472, 'eval_samples_per_second': 9.972, 'eval_steps_per_second': 0.624, 'epoch': 10.14}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.184075355529785, 'eval_bleu': 13.6146, 'eval_gen_len': 23.8191, 'eval_runtime': 88.7472, 'eval_samples_per_second': 9.905, 'eval_steps_per_second': 0.62, 'epoch': 10.16}
{'loss': 1.6037, 'grad_norm': 1.1654468774795532, 'learning_rate': 0.00051, 'epoch': 10.17}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1770007610321045, 'eval_bleu': 13.1691, 'eval_gen_len': 23.7941, 'eval_runtime': 88.4931, 'eval_samples_per_second': 9.933, 'eval_steps_per_second': 0.622, 'epoch': 10.17}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.15898060798645, 'eval_bleu': 13.2285, 'eval_gen_len': 24.0284, 'eval_runtime': 88.2315, 'eval_samples_per_second': 9.962, 'eval_steps_per_second': 0.623, 'epoch': 10.18}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1822352409362793, 'eval_bleu': 13.4795, 'eval_gen_len': 23.5859, 'eval_runtime': 89.2404, 'eval_samples_per_second': 9.85, 'eval_steps_per_second': 0.616, 'epoch': 10.2}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1762032508850098, 'eval_bleu': 13.6326, 'eval_gen_len': 23.835, 'eval_runtime': 88.3406, 'eval_samples_per_second': 9.95, 'eval_steps_per_second': 0.623, 'epoch': 10.21}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1768105030059814, 'eval_bleu': 13.5841, 'eval_gen_len': 23.7338, 'eval_runtime': 88.8288, 'eval_samples_per_second': 9.895, 'eval_steps_per_second': 0.619, 'epoch': 10.22}
{'loss': 1.6193, 'grad_norm': 0.9010143280029297, 'learning_rate': 0.0005066666666666668, 'epoch': 10.24}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1655523777008057, 'eval_bleu': 13.3262, 'eval_gen_len': 23.7531, 'eval_runtime': 88.9458, 'eval_samples_per_second': 9.882, 'eval_steps_per_second': 0.618, 'epoch': 10.24}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.164517402648926, 'eval_bleu': 13.5403, 'eval_gen_len': 23.8771, 'eval_runtime': 88.955, 'eval_samples_per_second': 9.881, 'eval_steps_per_second': 0.618, 'epoch': 10.25}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.164102077484131, 'eval_bleu': 13.8361, 'eval_gen_len': 23.9511, 'eval_runtime': 88.6032, 'eval_samples_per_second': 9.921, 'eval_steps_per_second': 0.621, 'epoch': 10.27}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1575446128845215, 'eval_bleu': 13.4953, 'eval_gen_len': 23.6883, 'eval_runtime': 89.3632, 'eval_samples_per_second': 9.836, 'eval_steps_per_second': 0.615, 'epoch': 10.28}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.175386905670166, 'eval_bleu': 13.6873, 'eval_gen_len': 23.7929, 'eval_runtime': 89.1066, 'eval_samples_per_second': 9.865, 'eval_steps_per_second': 0.617, 'epoch': 10.29}
{'loss': 1.616, 'grad_norm': 1.649320363998413, 'learning_rate': 0.0005033333333333333, 'epoch': 10.31}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.183715581893921, 'eval_bleu': 13.1807, 'eval_gen_len': 23.8362, 'eval_runtime': 89.0322, 'eval_samples_per_second': 9.873, 'eval_steps_per_second': 0.618, 'epoch': 10.31}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.15878963470459, 'eval_bleu': 13.7122, 'eval_gen_len': 23.8726, 'eval_runtime': 88.9474, 'eval_samples_per_second': 9.882, 'eval_steps_per_second': 0.618, 'epoch': 10.32}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.153069019317627, 'eval_bleu': 13.4001, 'eval_gen_len': 23.8612, 'eval_runtime': 88.5905, 'eval_samples_per_second': 9.922, 'eval_steps_per_second': 0.621, 'epoch': 10.34}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.168006181716919, 'eval_bleu': 13.6061, 'eval_gen_len': 23.6416, 'eval_runtime': 87.7995, 'eval_samples_per_second': 10.011, 'eval_steps_per_second': 0.626, 'epoch': 10.35}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1691558361053467, 'eval_bleu': 13.7485, 'eval_gen_len': 23.9363, 'eval_runtime': 88.4482, 'eval_samples_per_second': 9.938, 'eval_steps_per_second': 0.622, 'epoch': 10.36}
{'loss': 1.6262, 'grad_norm': 1.2796308994293213, 'learning_rate': 0.0005, 'epoch': 10.38}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.165097713470459, 'eval_bleu': 13.6534, 'eval_gen_len': 23.711, 'eval_runtime': 88.5919, 'eval_samples_per_second': 9.922, 'eval_steps_per_second': 0.621, 'epoch': 10.38}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1603524684906006, 'eval_bleu': 13.2639, 'eval_gen_len': 23.9772, 'eval_runtime': 88.231, 'eval_samples_per_second': 9.962, 'eval_steps_per_second': 0.623, 'epoch': 10.39}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1674535274505615, 'eval_bleu': 13.0001, 'eval_gen_len': 23.8714, 'eval_runtime': 88.7178, 'eval_samples_per_second': 9.908, 'eval_steps_per_second': 0.62, 'epoch': 10.4}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.156144618988037, 'eval_bleu': 13.5818, 'eval_gen_len': 23.8931, 'eval_runtime': 88.639, 'eval_samples_per_second': 9.917, 'eval_steps_per_second': 0.62, 'epoch': 10.42}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.159890651702881, 'eval_bleu': 13.2376, 'eval_gen_len': 24.0899, 'eval_runtime': 88.0199, 'eval_samples_per_second': 9.986, 'eval_steps_per_second': 0.625, 'epoch': 10.43}
{'loss': 1.633, 'grad_norm': 1.0728224515914917, 'learning_rate': 0.0004966666666666666, 'epoch': 10.45}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.172043800354004, 'eval_bleu': 13.4196, 'eval_gen_len': 23.8908, 'eval_runtime': 88.2545, 'eval_samples_per_second': 9.96, 'eval_steps_per_second': 0.623, 'epoch': 10.45}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1553635597229004, 'eval_bleu': 13.2339, 'eval_gen_len': 24.0489, 'eval_runtime': 91.6992, 'eval_samples_per_second': 9.586, 'eval_steps_per_second': 0.6, 'epoch': 10.46}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1555416584014893, 'eval_bleu': 13.4752, 'eval_gen_len': 23.8305, 'eval_runtime': 88.9796, 'eval_samples_per_second': 9.879, 'eval_steps_per_second': 0.618, 'epoch': 10.47}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.164299726486206, 'eval_bleu': 13.4934, 'eval_gen_len': 23.9386, 'eval_runtime': 88.5196, 'eval_samples_per_second': 9.93, 'eval_steps_per_second': 0.621, 'epoch': 10.49}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1661179065704346, 'eval_bleu': 13.2561, 'eval_gen_len': 23.9033, 'eval_runtime': 88.4062, 'eval_samples_per_second': 9.943, 'eval_steps_per_second': 0.622, 'epoch': 10.5}
{'loss': 1.6372, 'grad_norm': 1.177193284034729, 'learning_rate': 0.0004933333333333334, 'epoch': 10.52}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.152153730392456, 'eval_bleu': 13.2555, 'eval_gen_len': 24.0865, 'eval_runtime': 88.1419, 'eval_samples_per_second': 9.973, 'eval_steps_per_second': 0.624, 'epoch': 10.52}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1491971015930176, 'eval_bleu': 13.4841, 'eval_gen_len': 23.8658, 'eval_runtime': 88.6883, 'eval_samples_per_second': 9.911, 'eval_steps_per_second': 0.62, 'epoch': 10.53}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1457743644714355, 'eval_bleu': 13.913, 'eval_gen_len': 23.8328, 'eval_runtime': 89.0784, 'eval_samples_per_second': 9.868, 'eval_steps_per_second': 0.617, 'epoch': 10.54}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1536362171173096, 'eval_bleu': 14.0245, 'eval_gen_len': 23.8305, 'eval_runtime': 88.7924, 'eval_samples_per_second': 9.9, 'eval_steps_per_second': 0.619, 'epoch': 10.56}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1442465782165527, 'eval_bleu': 13.5519, 'eval_gen_len': 23.8134, 'eval_runtime': 88.2572, 'eval_samples_per_second': 9.96, 'eval_steps_per_second': 0.623, 'epoch': 10.57}
{'loss': 1.6572, 'grad_norm': 1.0490585565567017, 'learning_rate': 0.00049, 'epoch': 10.58}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.147505521774292, 'eval_bleu': 13.9246, 'eval_gen_len': 23.7861, 'eval_runtime': 88.6287, 'eval_samples_per_second': 9.918, 'eval_steps_per_second': 0.621, 'epoch': 10.58}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.154139280319214, 'eval_bleu': 13.7356, 'eval_gen_len': 23.7053, 'eval_runtime': 88.0558, 'eval_samples_per_second': 9.982, 'eval_steps_per_second': 0.625, 'epoch': 10.6}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1427135467529297, 'eval_bleu': 13.8526, 'eval_gen_len': 23.9863, 'eval_runtime': 88.658, 'eval_samples_per_second': 9.915, 'eval_steps_per_second': 0.62, 'epoch': 10.61}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1433017253875732, 'eval_bleu': 13.9108, 'eval_gen_len': 24.0023, 'eval_runtime': 87.9177, 'eval_samples_per_second': 9.998, 'eval_steps_per_second': 0.626, 'epoch': 10.63}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1422290802001953, 'eval_bleu': 13.6235, 'eval_gen_len': 23.8271, 'eval_runtime': 89.2153, 'eval_samples_per_second': 9.853, 'eval_steps_per_second': 0.616, 'epoch': 10.64}
{'loss': 1.6484, 'grad_norm': 1.3104292154312134, 'learning_rate': 0.0004866666666666667, 'epoch': 10.65}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1555135250091553, 'eval_bleu': 13.9378, 'eval_gen_len': 23.9954, 'eval_runtime': 88.0087, 'eval_samples_per_second': 9.988, 'eval_steps_per_second': 0.625, 'epoch': 10.65}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1431448459625244, 'eval_bleu': 13.6987, 'eval_gen_len': 23.9317, 'eval_runtime': 88.2419, 'eval_samples_per_second': 9.961, 'eval_steps_per_second': 0.623, 'epoch': 10.67}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.141777515411377, 'eval_bleu': 13.3568, 'eval_gen_len': 23.8237, 'eval_runtime': 88.6368, 'eval_samples_per_second': 9.917, 'eval_steps_per_second': 0.621, 'epoch': 10.68}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1462669372558594, 'eval_bleu': 13.6485, 'eval_gen_len': 23.8805, 'eval_runtime': 89.1861, 'eval_samples_per_second': 9.856, 'eval_steps_per_second': 0.617, 'epoch': 10.7}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1386451721191406, 'eval_bleu': 13.5659, 'eval_gen_len': 23.843, 'eval_runtime': 89.1347, 'eval_samples_per_second': 9.861, 'eval_steps_per_second': 0.617, 'epoch': 10.71}
{'loss': 1.6509, 'grad_norm': 1.1083552837371826, 'learning_rate': 0.00048333333333333334, 'epoch': 10.72}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.140287399291992, 'eval_bleu': 13.8171, 'eval_gen_len': 23.8601, 'eval_runtime': 88.6626, 'eval_samples_per_second': 9.914, 'eval_steps_per_second': 0.62, 'epoch': 10.72}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1386663913726807, 'eval_bleu': 13.5396, 'eval_gen_len': 23.6507, 'eval_runtime': 88.8468, 'eval_samples_per_second': 9.893, 'eval_steps_per_second': 0.619, 'epoch': 10.74}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.139134168624878, 'eval_bleu': 13.8982, 'eval_gen_len': 23.9511, 'eval_runtime': 88.7303, 'eval_samples_per_second': 9.906, 'eval_steps_per_second': 0.62, 'epoch': 10.75}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.146209478378296, 'eval_bleu': 13.7303, 'eval_gen_len': 23.9056, 'eval_runtime': 88.9874, 'eval_samples_per_second': 9.878, 'eval_steps_per_second': 0.618, 'epoch': 10.76}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.145627498626709, 'eval_bleu': 13.3805, 'eval_gen_len': 24.0046, 'eval_runtime': 88.1615, 'eval_samples_per_second': 9.97, 'eval_steps_per_second': 0.624, 'epoch': 10.78}
{'loss': 1.6503, 'grad_norm': 1.7058930397033691, 'learning_rate': 0.00048, 'epoch': 10.79}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1480674743652344, 'eval_bleu': 13.712, 'eval_gen_len': 24.0671, 'eval_runtime': 88.3376, 'eval_samples_per_second': 9.95, 'eval_steps_per_second': 0.623, 'epoch': 10.79}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.141347646713257, 'eval_bleu': 13.8748, 'eval_gen_len': 23.8464, 'eval_runtime': 89.0451, 'eval_samples_per_second': 9.871, 'eval_steps_per_second': 0.618, 'epoch': 10.81}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1446540355682373, 'eval_bleu': 13.8485, 'eval_gen_len': 23.8817, 'eval_runtime': 88.3509, 'eval_samples_per_second': 9.949, 'eval_steps_per_second': 0.623, 'epoch': 10.82}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1380226612091064, 'eval_bleu': 13.8909, 'eval_gen_len': 23.8203, 'eval_runtime': 88.6309, 'eval_samples_per_second': 9.918, 'eval_steps_per_second': 0.621, 'epoch': 10.83}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.149031400680542, 'eval_bleu': 13.8531, 'eval_gen_len': 23.8134, 'eval_runtime': 88.3429, 'eval_samples_per_second': 9.95, 'eval_steps_per_second': 0.623, 'epoch': 10.85}
{'loss': 1.655, 'grad_norm': 1.114542841911316, 'learning_rate': 0.0004766666666666667, 'epoch': 10.86}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1478710174560547, 'eval_bleu': 13.9013, 'eval_gen_len': 23.81, 'eval_runtime': 88.4588, 'eval_samples_per_second': 9.937, 'eval_steps_per_second': 0.622, 'epoch': 10.86}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.148509979248047, 'eval_bleu': 13.1443, 'eval_gen_len': 23.6917, 'eval_runtime': 88.3015, 'eval_samples_per_second': 9.955, 'eval_steps_per_second': 0.623, 'epoch': 10.88}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1414332389831543, 'eval_bleu': 13.4248, 'eval_gen_len': 23.9317, 'eval_runtime': 88.1463, 'eval_samples_per_second': 9.972, 'eval_steps_per_second': 0.624, 'epoch': 10.89}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1432862281799316, 'eval_bleu': 13.233, 'eval_gen_len': 23.7452, 'eval_runtime': 87.67, 'eval_samples_per_second': 10.026, 'eval_steps_per_second': 0.627, 'epoch': 10.9}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1381032466888428, 'eval_bleu': 13.6343, 'eval_gen_len': 23.7668, 'eval_runtime': 88.4707, 'eval_samples_per_second': 9.935, 'eval_steps_per_second': 0.622, 'epoch': 10.92}
{'loss': 1.6626, 'grad_norm': 0.9211755394935608, 'learning_rate': 0.00047333333333333336, 'epoch': 10.93}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1421327590942383, 'eval_bleu': 13.4186, 'eval_gen_len': 23.7554, 'eval_runtime': 88.3197, 'eval_samples_per_second': 9.952, 'eval_steps_per_second': 0.623, 'epoch': 10.93}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.148129940032959, 'eval_bleu': 13.4395, 'eval_gen_len': 23.8077, 'eval_runtime': 88.157, 'eval_samples_per_second': 9.971, 'eval_steps_per_second': 0.624, 'epoch': 10.94}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1470980644226074, 'eval_bleu': 13.577, 'eval_gen_len': 24.0102, 'eval_runtime': 88.8106, 'eval_samples_per_second': 9.897, 'eval_steps_per_second': 0.619, 'epoch': 10.96}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1357669830322266, 'eval_bleu': 13.4183, 'eval_gen_len': 23.62, 'eval_runtime': 87.8415, 'eval_samples_per_second': 10.007, 'eval_steps_per_second': 0.626, 'epoch': 10.97}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.134995698928833, 'eval_bleu': 13.7917, 'eval_gen_len': 23.9022, 'eval_runtime': 89.5411, 'eval_samples_per_second': 9.817, 'eval_steps_per_second': 0.614, 'epoch': 10.99}
{'loss': 1.6645, 'grad_norm': 1.1316736936569214, 'learning_rate': 0.00047, 'epoch': 11.0}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.131105661392212, 'eval_bleu': 13.6201, 'eval_gen_len': 23.7349, 'eval_runtime': 90.5593, 'eval_samples_per_second': 9.706, 'eval_steps_per_second': 0.607, 'epoch': 11.0}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.165459156036377, 'eval_bleu': 13.7228, 'eval_gen_len': 23.7565, 'eval_runtime': 88.7065, 'eval_samples_per_second': 9.909, 'eval_steps_per_second': 0.62, 'epoch': 11.01}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.177304267883301, 'eval_bleu': 13.594, 'eval_gen_len': 23.727, 'eval_runtime': 88.2807, 'eval_samples_per_second': 9.957, 'eval_steps_per_second': 0.623, 'epoch': 11.03}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1689612865448, 'eval_bleu': 13.2728, 'eval_gen_len': 23.7247, 'eval_runtime': 88.1532, 'eval_samples_per_second': 9.971, 'eval_steps_per_second': 0.624, 'epoch': 11.04}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1700308322906494, 'eval_bleu': 13.4516, 'eval_gen_len': 23.8316, 'eval_runtime': 88.8844, 'eval_samples_per_second': 9.889, 'eval_steps_per_second': 0.619, 'epoch': 11.05}
{'loss': 1.5187, 'grad_norm': 1.326309084892273, 'learning_rate': 0.00046666666666666666, 'epoch': 11.07}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.165727376937866, 'eval_bleu': 13.2693, 'eval_gen_len': 23.8612, 'eval_runtime': 88.6933, 'eval_samples_per_second': 9.911, 'eval_steps_per_second': 0.62, 'epoch': 11.07}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1773908138275146, 'eval_bleu': 13.1176, 'eval_gen_len': 24.0478, 'eval_runtime': 88.4996, 'eval_samples_per_second': 9.932, 'eval_steps_per_second': 0.621, 'epoch': 11.08}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.175631523132324, 'eval_bleu': 13.5805, 'eval_gen_len': 23.5552, 'eval_runtime': 88.8973, 'eval_samples_per_second': 9.888, 'eval_steps_per_second': 0.619, 'epoch': 11.1}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.178255319595337, 'eval_bleu': 13.648, 'eval_gen_len': 23.686, 'eval_runtime': 87.6136, 'eval_samples_per_second': 10.033, 'eval_steps_per_second': 0.628, 'epoch': 11.11}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1692535877227783, 'eval_bleu': 13.3607, 'eval_gen_len': 23.8225, 'eval_runtime': 88.8772, 'eval_samples_per_second': 9.89, 'eval_steps_per_second': 0.619, 'epoch': 11.12}
{'loss': 1.5324, 'grad_norm': 1.1620030403137207, 'learning_rate': 0.00046333333333333334, 'epoch': 11.14}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.16339373588562, 'eval_bleu': 13.6253, 'eval_gen_len': 23.719, 'eval_runtime': 88.9007, 'eval_samples_per_second': 9.887, 'eval_steps_per_second': 0.619, 'epoch': 11.14}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.165733575820923, 'eval_bleu': 13.3632, 'eval_gen_len': 23.8896, 'eval_runtime': 88.38, 'eval_samples_per_second': 9.946, 'eval_steps_per_second': 0.622, 'epoch': 11.15}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1807985305786133, 'eval_bleu': 13.5509, 'eval_gen_len': 23.6553, 'eval_runtime': 87.8379, 'eval_samples_per_second': 10.007, 'eval_steps_per_second': 0.626, 'epoch': 11.17}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1675994396209717, 'eval_bleu': 13.8241, 'eval_gen_len': 23.7884, 'eval_runtime': 88.3839, 'eval_samples_per_second': 9.945, 'eval_steps_per_second': 0.622, 'epoch': 11.18}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.173839807510376, 'eval_bleu': 13.3807, 'eval_gen_len': 23.7235, 'eval_runtime': 88.747, 'eval_samples_per_second': 9.905, 'eval_steps_per_second': 0.62, 'epoch': 11.19}
{'loss': 1.5452, 'grad_norm': 1.4448322057724, 'learning_rate': 0.00046, 'epoch': 11.21}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1693027019500732, 'eval_bleu': 13.2793, 'eval_gen_len': 23.818, 'eval_runtime': 88.4029, 'eval_samples_per_second': 9.943, 'eval_steps_per_second': 0.622, 'epoch': 11.21}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.182711362838745, 'eval_bleu': 13.3281, 'eval_gen_len': 23.8168, 'eval_runtime': 89.1594, 'eval_samples_per_second': 9.859, 'eval_steps_per_second': 0.617, 'epoch': 11.22}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1762266159057617, 'eval_bleu': 13.7701, 'eval_gen_len': 23.9545, 'eval_runtime': 89.0543, 'eval_samples_per_second': 9.87, 'eval_steps_per_second': 0.618, 'epoch': 11.23}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1809074878692627, 'eval_bleu': 13.8046, 'eval_gen_len': 23.8828, 'eval_runtime': 88.0452, 'eval_samples_per_second': 9.984, 'eval_steps_per_second': 0.625, 'epoch': 11.25}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.171445608139038, 'eval_bleu': 13.5375, 'eval_gen_len': 23.9579, 'eval_runtime': 89.2628, 'eval_samples_per_second': 9.847, 'eval_steps_per_second': 0.616, 'epoch': 11.26}
{'loss': 1.5516, 'grad_norm': 0.9168345928192139, 'learning_rate': 0.0004566666666666667, 'epoch': 11.28}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1704115867614746, 'eval_bleu': 13.9437, 'eval_gen_len': 23.8862, 'eval_runtime': 88.0572, 'eval_samples_per_second': 9.982, 'eval_steps_per_second': 0.625, 'epoch': 11.28}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.164018154144287, 'eval_bleu': 13.805, 'eval_gen_len': 23.8134, 'eval_runtime': 88.5306, 'eval_samples_per_second': 9.929, 'eval_steps_per_second': 0.621, 'epoch': 11.29}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.176182508468628, 'eval_bleu': 13.2607, 'eval_gen_len': 23.8339, 'eval_runtime': 88.422, 'eval_samples_per_second': 9.941, 'eval_steps_per_second': 0.622, 'epoch': 11.3}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.177211046218872, 'eval_bleu': 13.8011, 'eval_gen_len': 23.6883, 'eval_runtime': 88.6824, 'eval_samples_per_second': 9.912, 'eval_steps_per_second': 0.62, 'epoch': 11.32}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1535491943359375, 'eval_bleu': 13.9508, 'eval_gen_len': 23.7554, 'eval_runtime': 89.0318, 'eval_samples_per_second': 9.873, 'eval_steps_per_second': 0.618, 'epoch': 11.33}
{'loss': 1.5624, 'grad_norm': 1.1809346675872803, 'learning_rate': 0.0004533333333333333, 'epoch': 11.35}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1616857051849365, 'eval_bleu': 13.3776, 'eval_gen_len': 23.8896, 'eval_runtime': 89.4537, 'eval_samples_per_second': 9.826, 'eval_steps_per_second': 0.615, 'epoch': 11.35}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1698224544525146, 'eval_bleu': 13.5892, 'eval_gen_len': 23.8123, 'eval_runtime': 88.4756, 'eval_samples_per_second': 9.935, 'eval_steps_per_second': 0.622, 'epoch': 11.36}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1637911796569824, 'eval_bleu': 13.7316, 'eval_gen_len': 23.8362, 'eval_runtime': 88.1374, 'eval_samples_per_second': 9.973, 'eval_steps_per_second': 0.624, 'epoch': 11.37}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.158132553100586, 'eval_bleu': 13.7806, 'eval_gen_len': 23.9101, 'eval_runtime': 88.1601, 'eval_samples_per_second': 9.97, 'eval_steps_per_second': 0.624, 'epoch': 11.39}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.161031723022461, 'eval_bleu': 13.6111, 'eval_gen_len': 23.8487, 'eval_runtime': 88.5802, 'eval_samples_per_second': 9.923, 'eval_steps_per_second': 0.621, 'epoch': 11.4}
{'loss': 1.5674, 'grad_norm': 1.1104209423065186, 'learning_rate': 0.00045000000000000004, 'epoch': 11.41}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.164991617202759, 'eval_bleu': 13.9641, 'eval_gen_len': 23.9408, 'eval_runtime': 88.3012, 'eval_samples_per_second': 9.955, 'eval_steps_per_second': 0.623, 'epoch': 11.41}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1677558422088623, 'eval_bleu': 14.14, 'eval_gen_len': 23.8828, 'eval_runtime': 89.7218, 'eval_samples_per_second': 9.797, 'eval_steps_per_second': 0.613, 'epoch': 11.43}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.165851354598999, 'eval_bleu': 13.9703, 'eval_gen_len': 23.8874, 'eval_runtime': 88.788, 'eval_samples_per_second': 9.9, 'eval_steps_per_second': 0.619, 'epoch': 11.44}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.147951602935791, 'eval_bleu': 13.7048, 'eval_gen_len': 23.8203, 'eval_runtime': 88.4086, 'eval_samples_per_second': 9.942, 'eval_steps_per_second': 0.622, 'epoch': 11.46}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.148257255554199, 'eval_bleu': 13.5867, 'eval_gen_len': 24.0489, 'eval_runtime': 89.1644, 'eval_samples_per_second': 9.858, 'eval_steps_per_second': 0.617, 'epoch': 11.47}
{'loss': 1.5699, 'grad_norm': 1.249747633934021, 'learning_rate': 0.00044666666666666666, 'epoch': 11.48}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1663897037506104, 'eval_bleu': 13.7786, 'eval_gen_len': 23.8635, 'eval_runtime': 88.5313, 'eval_samples_per_second': 9.929, 'eval_steps_per_second': 0.621, 'epoch': 11.48}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1519522666931152, 'eval_bleu': 13.8716, 'eval_gen_len': 23.7395, 'eval_runtime': 88.726, 'eval_samples_per_second': 9.907, 'eval_steps_per_second': 0.62, 'epoch': 11.5}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1357908248901367, 'eval_bleu': 13.8606, 'eval_gen_len': 23.8464, 'eval_runtime': 88.3551, 'eval_samples_per_second': 9.948, 'eval_steps_per_second': 0.622, 'epoch': 11.51}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1522409915924072, 'eval_bleu': 14.0213, 'eval_gen_len': 23.6041, 'eval_runtime': 88.6194, 'eval_samples_per_second': 9.919, 'eval_steps_per_second': 0.621, 'epoch': 11.53}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1511123180389404, 'eval_bleu': 13.7763, 'eval_gen_len': 23.76, 'eval_runtime': 88.7726, 'eval_samples_per_second': 9.902, 'eval_steps_per_second': 0.62, 'epoch': 11.54}
{'loss': 1.5659, 'grad_norm': 1.2873386144638062, 'learning_rate': 0.00044333333333333334, 'epoch': 11.55}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.149345636367798, 'eval_bleu': 13.7259, 'eval_gen_len': 23.8578, 'eval_runtime': 90.0914, 'eval_samples_per_second': 9.757, 'eval_steps_per_second': 0.61, 'epoch': 11.55}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1492836475372314, 'eval_bleu': 13.8619, 'eval_gen_len': 23.7895, 'eval_runtime': 88.539, 'eval_samples_per_second': 9.928, 'eval_steps_per_second': 0.621, 'epoch': 11.57}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1533515453338623, 'eval_bleu': 13.7647, 'eval_gen_len': 23.7497, 'eval_runtime': 88.6846, 'eval_samples_per_second': 9.912, 'eval_steps_per_second': 0.62, 'epoch': 11.58}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1341779232025146, 'eval_bleu': 14.0157, 'eval_gen_len': 23.9477, 'eval_runtime': 87.9536, 'eval_samples_per_second': 9.994, 'eval_steps_per_second': 0.625, 'epoch': 11.59}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.155162811279297, 'eval_bleu': 13.7249, 'eval_gen_len': 23.8259, 'eval_runtime': 89.2045, 'eval_samples_per_second': 9.854, 'eval_steps_per_second': 0.617, 'epoch': 11.61}
{'loss': 1.5798, 'grad_norm': 1.3993571996688843, 'learning_rate': 0.00044, 'epoch': 11.62}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1424882411956787, 'eval_bleu': 13.7932, 'eval_gen_len': 23.8771, 'eval_runtime': 88.059, 'eval_samples_per_second': 9.982, 'eval_steps_per_second': 0.625, 'epoch': 11.62}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1450507640838623, 'eval_bleu': 13.7485, 'eval_gen_len': 23.9261, 'eval_runtime': 89.9166, 'eval_samples_per_second': 9.776, 'eval_steps_per_second': 0.612, 'epoch': 11.64}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1449105739593506, 'eval_bleu': 13.6271, 'eval_gen_len': 23.9556, 'eval_runtime': 89.7739, 'eval_samples_per_second': 9.791, 'eval_steps_per_second': 0.613, 'epoch': 11.65}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.150247812271118, 'eval_bleu': 13.7078, 'eval_gen_len': 23.7793, 'eval_runtime': 89.1775, 'eval_samples_per_second': 9.857, 'eval_steps_per_second': 0.617, 'epoch': 11.66}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.150019884109497, 'eval_bleu': 13.9161, 'eval_gen_len': 23.6177, 'eval_runtime': 88.041, 'eval_samples_per_second': 9.984, 'eval_steps_per_second': 0.625, 'epoch': 11.68}
{'loss': 1.5794, 'grad_norm': 1.2753664255142212, 'learning_rate': 0.00043666666666666664, 'epoch': 11.69}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.14868426322937, 'eval_bleu': 14.0708, 'eval_gen_len': 23.7179, 'eval_runtime': 88.743, 'eval_samples_per_second': 9.905, 'eval_steps_per_second': 0.62, 'epoch': 11.69}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1492557525634766, 'eval_bleu': 13.9363, 'eval_gen_len': 23.6837, 'eval_runtime': 88.4025, 'eval_samples_per_second': 9.943, 'eval_steps_per_second': 0.622, 'epoch': 11.71}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1430792808532715, 'eval_bleu': 13.913, 'eval_gen_len': 23.9056, 'eval_runtime': 88.3473, 'eval_samples_per_second': 9.949, 'eval_steps_per_second': 0.623, 'epoch': 11.72}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1399388313293457, 'eval_bleu': 14.0183, 'eval_gen_len': 23.8328, 'eval_runtime': 89.2274, 'eval_samples_per_second': 9.851, 'eval_steps_per_second': 0.616, 'epoch': 11.73}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1554105281829834, 'eval_bleu': 13.9791, 'eval_gen_len': 23.7895, 'eval_runtime': 88.4298, 'eval_samples_per_second': 9.94, 'eval_steps_per_second': 0.622, 'epoch': 11.75}
{'loss': 1.5809, 'grad_norm': 1.2010492086410522, 'learning_rate': 0.00043333333333333337, 'epoch': 11.76}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.133509874343872, 'eval_bleu': 13.7282, 'eval_gen_len': 23.9022, 'eval_runtime': 88.6455, 'eval_samples_per_second': 9.916, 'eval_steps_per_second': 0.62, 'epoch': 11.76}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.146019697189331, 'eval_bleu': 14.2129, 'eval_gen_len': 23.7349, 'eval_runtime': 88.9721, 'eval_samples_per_second': 9.88, 'eval_steps_per_second': 0.618, 'epoch': 11.77}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1494028568267822, 'eval_bleu': 13.9514, 'eval_gen_len': 23.8794, 'eval_runtime': 88.3497, 'eval_samples_per_second': 9.949, 'eval_steps_per_second': 0.623, 'epoch': 11.79}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1423227787017822, 'eval_bleu': 13.9268, 'eval_gen_len': 23.9545, 'eval_runtime': 88.866, 'eval_samples_per_second': 9.891, 'eval_steps_per_second': 0.619, 'epoch': 11.8}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1540565490722656, 'eval_bleu': 13.6587, 'eval_gen_len': 23.7861, 'eval_runtime': 88.6172, 'eval_samples_per_second': 9.919, 'eval_steps_per_second': 0.621, 'epoch': 11.82}
{'loss': 1.5855, 'grad_norm': 1.2917866706848145, 'learning_rate': 0.00043, 'epoch': 11.83}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1404736042022705, 'eval_bleu': 13.8143, 'eval_gen_len': 23.9784, 'eval_runtime': 88.5831, 'eval_samples_per_second': 9.923, 'eval_steps_per_second': 0.621, 'epoch': 11.83}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1463754177093506, 'eval_bleu': 13.8561, 'eval_gen_len': 23.9249, 'eval_runtime': 89.6872, 'eval_samples_per_second': 9.801, 'eval_steps_per_second': 0.613, 'epoch': 11.84}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.135863780975342, 'eval_bleu': 13.8418, 'eval_gen_len': 24.0228, 'eval_runtime': 88.5447, 'eval_samples_per_second': 9.927, 'eval_steps_per_second': 0.621, 'epoch': 11.86}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.138159990310669, 'eval_bleu': 13.9436, 'eval_gen_len': 23.8373, 'eval_runtime': 89.8563, 'eval_samples_per_second': 9.782, 'eval_steps_per_second': 0.612, 'epoch': 11.87}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.133488416671753, 'eval_bleu': 14.0426, 'eval_gen_len': 23.8498, 'eval_runtime': 88.9675, 'eval_samples_per_second': 9.88, 'eval_steps_per_second': 0.618, 'epoch': 11.89}
{'loss': 1.5889, 'grad_norm': 1.0964539051055908, 'learning_rate': 0.0004266666666666667, 'epoch': 11.9}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1390702724456787, 'eval_bleu': 13.8675, 'eval_gen_len': 23.7702, 'eval_runtime': 88.4026, 'eval_samples_per_second': 9.943, 'eval_steps_per_second': 0.622, 'epoch': 11.9}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.122637987136841, 'eval_bleu': 13.9389, 'eval_gen_len': 23.8089, 'eval_runtime': 88.31, 'eval_samples_per_second': 9.954, 'eval_steps_per_second': 0.623, 'epoch': 11.91}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.134171724319458, 'eval_bleu': 14.278, 'eval_gen_len': 23.7827, 'eval_runtime': 87.866, 'eval_samples_per_second': 10.004, 'eval_steps_per_second': 0.626, 'epoch': 11.93}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1394481658935547, 'eval_bleu': 13.6857, 'eval_gen_len': 24.0102, 'eval_runtime': 88.9054, 'eval_samples_per_second': 9.887, 'eval_steps_per_second': 0.619, 'epoch': 11.94}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.131805896759033, 'eval_bleu': 13.9143, 'eval_gen_len': 24.0478, 'eval_runtime': 88.957, 'eval_samples_per_second': 9.881, 'eval_steps_per_second': 0.618, 'epoch': 11.95}
{'loss': 1.5942, 'grad_norm': 1.1191686391830444, 'learning_rate': 0.00042333333333333334, 'epoch': 11.97}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.133368730545044, 'eval_bleu': 14.2507, 'eval_gen_len': 23.917, 'eval_runtime': 88.2299, 'eval_samples_per_second': 9.963, 'eval_steps_per_second': 0.623, 'epoch': 11.97}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.133326292037964, 'eval_bleu': 14.1211, 'eval_gen_len': 23.9124, 'eval_runtime': 88.0768, 'eval_samples_per_second': 9.98, 'eval_steps_per_second': 0.624, 'epoch': 11.98}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1289803981781006, 'eval_bleu': 14.0998, 'eval_gen_len': 23.7645, 'eval_runtime': 88.5469, 'eval_samples_per_second': 9.927, 'eval_steps_per_second': 0.621, 'epoch': 12.0}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.171717882156372, 'eval_bleu': 13.9195, 'eval_gen_len': 23.7122, 'eval_runtime': 88.64, 'eval_samples_per_second': 9.917, 'eval_steps_per_second': 0.62, 'epoch': 12.01}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.170862913131714, 'eval_bleu': 14.0483, 'eval_gen_len': 23.9124, 'eval_runtime': 88.9434, 'eval_samples_per_second': 9.883, 'eval_steps_per_second': 0.618, 'epoch': 12.02}
{'loss': 1.5138, 'grad_norm': 1.0291296243667603, 'learning_rate': 0.00042, 'epoch': 12.04}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1608047485351562, 'eval_bleu': 13.7204, 'eval_gen_len': 23.8419, 'eval_runtime': 88.6297, 'eval_samples_per_second': 9.918, 'eval_steps_per_second': 0.621, 'epoch': 12.04}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1630585193634033, 'eval_bleu': 13.8063, 'eval_gen_len': 23.7702, 'eval_runtime': 90.0728, 'eval_samples_per_second': 9.759, 'eval_steps_per_second': 0.611, 'epoch': 12.05}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.177743911743164, 'eval_bleu': 13.9286, 'eval_gen_len': 23.802, 'eval_runtime': 89.9799, 'eval_samples_per_second': 9.769, 'eval_steps_per_second': 0.611, 'epoch': 12.07}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1753673553466797, 'eval_bleu': 14.1212, 'eval_gen_len': 23.785, 'eval_runtime': 90.7405, 'eval_samples_per_second': 9.687, 'eval_steps_per_second': 0.606, 'epoch': 12.08}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.158137083053589, 'eval_bleu': 13.9173, 'eval_gen_len': 23.6962, 'eval_runtime': 91.3113, 'eval_samples_per_second': 9.626, 'eval_steps_per_second': 0.602, 'epoch': 12.09}
{'loss': 1.4675, 'grad_norm': 1.3635027408599854, 'learning_rate': 0.0004166666666666667, 'epoch': 12.11}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.172273635864258, 'eval_bleu': 13.8944, 'eval_gen_len': 23.744, 'eval_runtime': 91.3584, 'eval_samples_per_second': 9.621, 'eval_steps_per_second': 0.602, 'epoch': 12.11}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.165738582611084, 'eval_bleu': 13.7028, 'eval_gen_len': 23.7133, 'eval_runtime': 91.3584, 'eval_samples_per_second': 9.621, 'eval_steps_per_second': 0.602, 'epoch': 12.12}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1741957664489746, 'eval_bleu': 13.9849, 'eval_gen_len': 23.835, 'eval_runtime': 91.0906, 'eval_samples_per_second': 9.65, 'eval_steps_per_second': 0.604, 'epoch': 12.13}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1757469177246094, 'eval_bleu': 13.6982, 'eval_gen_len': 23.7964, 'eval_runtime': 91.7007, 'eval_samples_per_second': 9.586, 'eval_steps_per_second': 0.6, 'epoch': 12.15}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.172936201095581, 'eval_bleu': 13.8826, 'eval_gen_len': 23.7645, 'eval_runtime': 91.6206, 'eval_samples_per_second': 9.594, 'eval_steps_per_second': 0.6, 'epoch': 12.16}
{'loss': 1.4654, 'grad_norm': 1.029572606086731, 'learning_rate': 0.0004133333333333333, 'epoch': 12.18}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.165773630142212, 'eval_bleu': 13.9995, 'eval_gen_len': 23.678, 'eval_runtime': 90.7334, 'eval_samples_per_second': 9.688, 'eval_steps_per_second': 0.606, 'epoch': 12.18}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.173896551132202, 'eval_bleu': 13.8733, 'eval_gen_len': 23.8043, 'eval_runtime': 91.35, 'eval_samples_per_second': 9.622, 'eval_steps_per_second': 0.602, 'epoch': 12.19}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1709635257720947, 'eval_bleu': 13.6568, 'eval_gen_len': 23.6678, 'eval_runtime': 91.3889, 'eval_samples_per_second': 9.618, 'eval_steps_per_second': 0.602, 'epoch': 12.2}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.179180145263672, 'eval_bleu': 13.7725, 'eval_gen_len': 23.8578, 'eval_runtime': 90.9204, 'eval_samples_per_second': 9.668, 'eval_steps_per_second': 0.605, 'epoch': 12.22}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.171846389770508, 'eval_bleu': 13.6408, 'eval_gen_len': 23.8589, 'eval_runtime': 91.1153, 'eval_samples_per_second': 9.647, 'eval_steps_per_second': 0.604, 'epoch': 12.23}
{'loss': 1.4735, 'grad_norm': 1.626863718032837, 'learning_rate': 0.00041, 'epoch': 12.24}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1728150844573975, 'eval_bleu': 13.8887, 'eval_gen_len': 23.9772, 'eval_runtime': 92.1251, 'eval_samples_per_second': 9.541, 'eval_steps_per_second': 0.597, 'epoch': 12.24}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1617162227630615, 'eval_bleu': 13.7526, 'eval_gen_len': 23.9192, 'eval_runtime': 91.2723, 'eval_samples_per_second': 9.631, 'eval_steps_per_second': 0.603, 'epoch': 12.26}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1715943813323975, 'eval_bleu': 13.5467, 'eval_gen_len': 23.9727, 'eval_runtime': 91.5799, 'eval_samples_per_second': 9.598, 'eval_steps_per_second': 0.601, 'epoch': 12.27}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.168241500854492, 'eval_bleu': 13.333, 'eval_gen_len': 23.7008, 'eval_runtime': 91.588, 'eval_samples_per_second': 9.597, 'eval_steps_per_second': 0.601, 'epoch': 12.29}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1720967292785645, 'eval_bleu': 13.4102, 'eval_gen_len': 23.8919, 'eval_runtime': 91.407, 'eval_samples_per_second': 9.616, 'eval_steps_per_second': 0.602, 'epoch': 12.3}
{'loss': 1.5006, 'grad_norm': 1.3388959169387817, 'learning_rate': 0.00040666666666666667, 'epoch': 12.31}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.176464080810547, 'eval_bleu': 13.7218, 'eval_gen_len': 23.7361, 'eval_runtime': 91.4806, 'eval_samples_per_second': 9.609, 'eval_steps_per_second': 0.601, 'epoch': 12.31}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1615846157073975, 'eval_bleu': 13.7961, 'eval_gen_len': 23.6473, 'eval_runtime': 90.9798, 'eval_samples_per_second': 9.661, 'eval_steps_per_second': 0.605, 'epoch': 12.33}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.170952558517456, 'eval_bleu': 13.5329, 'eval_gen_len': 23.9181, 'eval_runtime': 92.0838, 'eval_samples_per_second': 9.546, 'eval_steps_per_second': 0.597, 'epoch': 12.34}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.163632869720459, 'eval_bleu': 13.8318, 'eval_gen_len': 23.8407, 'eval_runtime': 92.5942, 'eval_samples_per_second': 9.493, 'eval_steps_per_second': 0.594, 'epoch': 12.36}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.156442165374756, 'eval_bleu': 13.7203, 'eval_gen_len': 23.8487, 'eval_runtime': 92.7154, 'eval_samples_per_second': 9.481, 'eval_steps_per_second': 0.593, 'epoch': 12.37}
{'loss': 1.495, 'grad_norm': 1.2297486066818237, 'learning_rate': 0.00040333333333333334, 'epoch': 12.38}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1578731536865234, 'eval_bleu': 13.833, 'eval_gen_len': 23.7565, 'eval_runtime': 91.8858, 'eval_samples_per_second': 9.566, 'eval_steps_per_second': 0.599, 'epoch': 12.38}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1778178215026855, 'eval_bleu': 13.5952, 'eval_gen_len': 23.6826, 'eval_runtime': 99.8258, 'eval_samples_per_second': 8.805, 'eval_steps_per_second': 0.551, 'epoch': 12.4}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1576313972473145, 'eval_bleu': 13.3634, 'eval_gen_len': 23.8942, 'eval_runtime': 48.9919, 'eval_samples_per_second': 17.942, 'eval_steps_per_second': 1.123, 'epoch': 12.41}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1550447940826416, 'eval_bleu': 13.624, 'eval_gen_len': 23.7144, 'eval_runtime': 71.812, 'eval_samples_per_second': 12.24, 'eval_steps_per_second': 0.766, 'epoch': 12.42}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.158562183380127, 'eval_bleu': 13.8797, 'eval_gen_len': 23.7736, 'eval_runtime': 45.9698, 'eval_samples_per_second': 19.121, 'eval_steps_per_second': 1.196, 'epoch': 12.44}
{'loss': 1.5023, 'grad_norm': 1.2553246021270752, 'learning_rate': 0.0004, 'epoch': 12.45}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.161083459854126, 'eval_bleu': 13.7121, 'eval_gen_len': 23.6325, 'eval_runtime': 44.6157, 'eval_samples_per_second': 19.702, 'eval_steps_per_second': 1.233, 'epoch': 12.45}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1605591773986816, 'eval_bleu': 13.6682, 'eval_gen_len': 23.8043, 'eval_runtime': 45.6661, 'eval_samples_per_second': 19.248, 'eval_steps_per_second': 1.204, 'epoch': 12.47}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1594231128692627, 'eval_bleu': 13.645, 'eval_gen_len': 23.7042, 'eval_runtime': 43.9128, 'eval_samples_per_second': 20.017, 'eval_steps_per_second': 1.252, 'epoch': 12.48}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1597769260406494, 'eval_bleu': 13.6729, 'eval_gen_len': 23.8032, 'eval_runtime': 44.7253, 'eval_samples_per_second': 19.653, 'eval_steps_per_second': 1.23, 'epoch': 12.49}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1545093059539795, 'eval_bleu': 13.6724, 'eval_gen_len': 23.9545, 'eval_runtime': 44.9772, 'eval_samples_per_second': 19.543, 'eval_steps_per_second': 1.223, 'epoch': 12.51}
{'loss': 1.5104, 'grad_norm': 1.0156561136245728, 'learning_rate': 0.0003966666666666667, 'epoch': 12.52}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.155764579772949, 'eval_bleu': 13.9615, 'eval_gen_len': 23.8578, 'eval_runtime': 43.2031, 'eval_samples_per_second': 20.346, 'eval_steps_per_second': 1.273, 'epoch': 12.52}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.159330129623413, 'eval_bleu': 13.9163, 'eval_gen_len': 23.8658, 'eval_runtime': 46.7712, 'eval_samples_per_second': 18.794, 'eval_steps_per_second': 1.176, 'epoch': 12.54}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.161200761795044, 'eval_bleu': 13.7506, 'eval_gen_len': 23.818, 'eval_runtime': 45.4667, 'eval_samples_per_second': 19.333, 'eval_steps_per_second': 1.21, 'epoch': 12.55}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1493537425994873, 'eval_bleu': 13.58, 'eval_gen_len': 23.7042, 'eval_runtime': 45.5401, 'eval_samples_per_second': 19.302, 'eval_steps_per_second': 1.208, 'epoch': 12.56}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1446292400360107, 'eval_bleu': 13.752, 'eval_gen_len': 23.6724, 'eval_runtime': 44.868, 'eval_samples_per_second': 19.591, 'eval_steps_per_second': 1.226, 'epoch': 12.58}
{'loss': 1.5151, 'grad_norm': 1.4412405490875244, 'learning_rate': 0.0003933333333333333, 'epoch': 12.59}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.15071177482605, 'eval_bleu': 13.5591, 'eval_gen_len': 23.7611, 'eval_runtime': 46.1255, 'eval_samples_per_second': 19.057, 'eval_steps_per_second': 1.192, 'epoch': 12.59}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1494498252868652, 'eval_bleu': 13.6967, 'eval_gen_len': 23.8294, 'eval_runtime': 44.3128, 'eval_samples_per_second': 19.836, 'eval_steps_per_second': 1.241, 'epoch': 12.6}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1595816612243652, 'eval_bleu': 13.9187, 'eval_gen_len': 23.727, 'eval_runtime': 73.4459, 'eval_samples_per_second': 11.968, 'eval_steps_per_second': 0.749, 'epoch': 12.62}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.155214786529541, 'eval_bleu': 13.7557, 'eval_gen_len': 23.7725, 'eval_runtime': 82.2838, 'eval_samples_per_second': 10.683, 'eval_steps_per_second': 0.668, 'epoch': 12.63}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1442933082580566, 'eval_bleu': 14.229, 'eval_gen_len': 23.7247, 'eval_runtime': 45.7988, 'eval_samples_per_second': 19.193, 'eval_steps_per_second': 1.201, 'epoch': 12.65}
{'loss': 1.5149, 'grad_norm': 1.1250582933425903, 'learning_rate': 0.00039000000000000005, 'epoch': 12.66}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1518332958221436, 'eval_bleu': 14.1386, 'eval_gen_len': 23.7383, 'eval_runtime': 44.1646, 'eval_samples_per_second': 19.903, 'eval_steps_per_second': 1.245, 'epoch': 12.66}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1464133262634277, 'eval_bleu': 13.9839, 'eval_gen_len': 23.6724, 'eval_runtime': 46.1188, 'eval_samples_per_second': 19.059, 'eval_steps_per_second': 1.193, 'epoch': 12.67}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.138043165206909, 'eval_bleu': 13.8921, 'eval_gen_len': 23.8703, 'eval_runtime': 44.8651, 'eval_samples_per_second': 19.592, 'eval_steps_per_second': 1.226, 'epoch': 12.69}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.144141435623169, 'eval_bleu': 13.7936, 'eval_gen_len': 23.7418, 'eval_runtime': 45.4321, 'eval_samples_per_second': 19.348, 'eval_steps_per_second': 1.211, 'epoch': 12.7}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.136335849761963, 'eval_bleu': 13.8364, 'eval_gen_len': 23.7929, 'eval_runtime': 46.1763, 'eval_samples_per_second': 19.036, 'eval_steps_per_second': 1.191, 'epoch': 12.72}
{'loss': 1.5274, 'grad_norm': 0.9678164720535278, 'learning_rate': 0.00038666666666666667, 'epoch': 12.73}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1421477794647217, 'eval_bleu': 14.0294, 'eval_gen_len': 23.7838, 'eval_runtime': 44.7507, 'eval_samples_per_second': 19.642, 'eval_steps_per_second': 1.229, 'epoch': 12.73}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.144098997116089, 'eval_bleu': 13.9348, 'eval_gen_len': 23.8862, 'eval_runtime': 46.3444, 'eval_samples_per_second': 18.967, 'eval_steps_per_second': 1.187, 'epoch': 12.74}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1504461765289307, 'eval_bleu': 14.2035, 'eval_gen_len': 23.6382, 'eval_runtime': 62.7457, 'eval_samples_per_second': 14.009, 'eval_steps_per_second': 0.877, 'epoch': 12.76}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1545350551605225, 'eval_bleu': 13.7897, 'eval_gen_len': 23.8089, 'eval_runtime': 52.6822, 'eval_samples_per_second': 16.685, 'eval_steps_per_second': 1.044, 'epoch': 12.77}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.140368700027466, 'eval_bleu': 13.9253, 'eval_gen_len': 23.8225, 'eval_runtime': 46.0475, 'eval_samples_per_second': 19.089, 'eval_steps_per_second': 1.194, 'epoch': 12.78}
{'loss': 1.5288, 'grad_norm': 1.2106142044067383, 'learning_rate': 0.00038333333333333334, 'epoch': 12.8}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1408438682556152, 'eval_bleu': 14.3691, 'eval_gen_len': 23.7793, 'eval_runtime': 44.6972, 'eval_samples_per_second': 19.666, 'eval_steps_per_second': 1.231, 'epoch': 12.8}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1429762840270996, 'eval_bleu': 13.8482, 'eval_gen_len': 23.7008, 'eval_runtime': 46.3852, 'eval_samples_per_second': 18.95, 'eval_steps_per_second': 1.186, 'epoch': 12.81}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1440377235412598, 'eval_bleu': 14.0653, 'eval_gen_len': 23.7281, 'eval_runtime': 44.8055, 'eval_samples_per_second': 19.618, 'eval_steps_per_second': 1.228, 'epoch': 12.83}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1576895713806152, 'eval_bleu': 13.6681, 'eval_gen_len': 23.8487, 'eval_runtime': 45.1609, 'eval_samples_per_second': 19.464, 'eval_steps_per_second': 1.218, 'epoch': 12.84}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1439764499664307, 'eval_bleu': 13.655, 'eval_gen_len': 23.9135, 'eval_runtime': 44.8963, 'eval_samples_per_second': 19.578, 'eval_steps_per_second': 1.225, 'epoch': 12.85}
{'loss': 1.5328, 'grad_norm': 1.3529250621795654, 'learning_rate': 0.00038, 'epoch': 12.87}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.15338134765625, 'eval_bleu': 13.7453, 'eval_gen_len': 23.8441, 'eval_runtime': 44.4236, 'eval_samples_per_second': 19.787, 'eval_steps_per_second': 1.238, 'epoch': 12.87}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1465842723846436, 'eval_bleu': 13.9617, 'eval_gen_len': 23.7645, 'eval_runtime': 45.5584, 'eval_samples_per_second': 19.294, 'eval_steps_per_second': 1.207, 'epoch': 12.88}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1425936222076416, 'eval_bleu': 14.1758, 'eval_gen_len': 23.6428, 'eval_runtime': 43.1292, 'eval_samples_per_second': 20.381, 'eval_steps_per_second': 1.275, 'epoch': 12.9}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.14353609085083, 'eval_bleu': 14.2369, 'eval_gen_len': 23.7452, 'eval_runtime': 45.478, 'eval_samples_per_second': 19.328, 'eval_steps_per_second': 1.209, 'epoch': 12.91}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1333529949188232, 'eval_bleu': 14.3611, 'eval_gen_len': 23.7964, 'eval_runtime': 45.3264, 'eval_samples_per_second': 19.393, 'eval_steps_per_second': 1.213, 'epoch': 12.92}
{'loss': 1.5401, 'grad_norm': 1.205376386642456, 'learning_rate': 0.00037666666666666664, 'epoch': 12.94}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1462950706481934, 'eval_bleu': 14.1148, 'eval_gen_len': 23.7088, 'eval_runtime': 45.4095, 'eval_samples_per_second': 19.357, 'eval_steps_per_second': 1.211, 'epoch': 12.94}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1321635246276855, 'eval_bleu': 13.9792, 'eval_gen_len': 23.8487, 'eval_runtime': 44.4, 'eval_samples_per_second': 19.797, 'eval_steps_per_second': 1.239, 'epoch': 12.95}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.147892951965332, 'eval_bleu': 14.011, 'eval_gen_len': 23.5017, 'eval_runtime': 44.7088, 'eval_samples_per_second': 19.661, 'eval_steps_per_second': 1.23, 'epoch': 12.96}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1274588108062744, 'eval_bleu': 13.9233, 'eval_gen_len': 23.785, 'eval_runtime': 44.6163, 'eval_samples_per_second': 19.701, 'eval_steps_per_second': 1.233, 'epoch': 12.98}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.146442174911499, 'eval_bleu': 14.3378, 'eval_gen_len': 23.8373, 'eval_runtime': 45.3988, 'eval_samples_per_second': 19.362, 'eval_steps_per_second': 1.211, 'epoch': 12.99}
{'loss': 1.5285, 'grad_norm': 1.1457531452178955, 'learning_rate': 0.0003733333333333334, 'epoch': 13.01}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.165707588195801, 'eval_bleu': 14.3428, 'eval_gen_len': 23.719, 'eval_runtime': 45.7427, 'eval_samples_per_second': 19.216, 'eval_steps_per_second': 1.202, 'epoch': 13.01}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1787281036376953, 'eval_bleu': 13.7567, 'eval_gen_len': 23.8783, 'eval_runtime': 43.9763, 'eval_samples_per_second': 19.988, 'eval_steps_per_second': 1.251, 'epoch': 13.02}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1891822814941406, 'eval_bleu': 13.9559, 'eval_gen_len': 23.6826, 'eval_runtime': 46.1874, 'eval_samples_per_second': 19.031, 'eval_steps_per_second': 1.191, 'epoch': 13.03}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1774919033050537, 'eval_bleu': 13.7879, 'eval_gen_len': 23.8714, 'eval_runtime': 45.1984, 'eval_samples_per_second': 19.448, 'eval_steps_per_second': 1.217, 'epoch': 13.05}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.184563636779785, 'eval_bleu': 14.0359, 'eval_gen_len': 23.5939, 'eval_runtime': 45.1272, 'eval_samples_per_second': 19.478, 'eval_steps_per_second': 1.219, 'epoch': 13.06}
{'loss': 1.3986, 'grad_norm': 1.0756455659866333, 'learning_rate': 0.00037, 'epoch': 13.08}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1661229133605957, 'eval_bleu': 13.9235, 'eval_gen_len': 23.8749, 'eval_runtime': 46.026, 'eval_samples_per_second': 19.098, 'eval_steps_per_second': 1.195, 'epoch': 13.08}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1817057132720947, 'eval_bleu': 13.7699, 'eval_gen_len': 23.7782, 'eval_runtime': 44.225, 'eval_samples_per_second': 19.876, 'eval_steps_per_second': 1.244, 'epoch': 13.09}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.179429292678833, 'eval_bleu': 13.6466, 'eval_gen_len': 23.9738, 'eval_runtime': 47.1904, 'eval_samples_per_second': 18.627, 'eval_steps_per_second': 1.165, 'epoch': 13.1}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.173358201980591, 'eval_bleu': 14.1311, 'eval_gen_len': 23.8646, 'eval_runtime': 44.2777, 'eval_samples_per_second': 19.852, 'eval_steps_per_second': 1.242, 'epoch': 13.12}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.180752992630005, 'eval_bleu': 14.1237, 'eval_gen_len': 23.7042, 'eval_runtime': 55.4151, 'eval_samples_per_second': 15.862, 'eval_steps_per_second': 0.993, 'epoch': 13.13}
{'loss': 1.4134, 'grad_norm': 1.454483985900879, 'learning_rate': 0.00036666666666666667, 'epoch': 13.14}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1724414825439453, 'eval_bleu': 13.8832, 'eval_gen_len': 23.8976, 'eval_runtime': 45.9507, 'eval_samples_per_second': 19.129, 'eval_steps_per_second': 1.197, 'epoch': 13.14}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1856589317321777, 'eval_bleu': 14.1118, 'eval_gen_len': 23.7258, 'eval_runtime': 45.3946, 'eval_samples_per_second': 19.364, 'eval_steps_per_second': 1.212, 'epoch': 13.16}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1723592281341553, 'eval_bleu': 13.998, 'eval_gen_len': 23.7929, 'eval_runtime': 46.1271, 'eval_samples_per_second': 19.056, 'eval_steps_per_second': 1.192, 'epoch': 13.17}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1623055934906006, 'eval_bleu': 13.6367, 'eval_gen_len': 23.818, 'eval_runtime': 45.4991, 'eval_samples_per_second': 19.319, 'eval_steps_per_second': 1.209, 'epoch': 13.19}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1869709491729736, 'eval_bleu': 13.6915, 'eval_gen_len': 23.711, 'eval_runtime': 45.6976, 'eval_samples_per_second': 19.235, 'eval_steps_per_second': 1.204, 'epoch': 13.2}
{'loss': 1.4227, 'grad_norm': 1.3670985698699951, 'learning_rate': 0.00036333333333333335, 'epoch': 13.21}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.181572437286377, 'eval_bleu': 13.9404, 'eval_gen_len': 23.8043, 'eval_runtime': 45.9642, 'eval_samples_per_second': 19.124, 'eval_steps_per_second': 1.197, 'epoch': 13.21}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1721248626708984, 'eval_bleu': 14.3014, 'eval_gen_len': 23.7736, 'eval_runtime': 44.6396, 'eval_samples_per_second': 19.691, 'eval_steps_per_second': 1.232, 'epoch': 13.23}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1755173206329346, 'eval_bleu': 14.172, 'eval_gen_len': 23.8851, 'eval_runtime': 45.3094, 'eval_samples_per_second': 19.4, 'eval_steps_per_second': 1.214, 'epoch': 13.24}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1834347248077393, 'eval_bleu': 13.9296, 'eval_gen_len': 23.8066, 'eval_runtime': 44.2786, 'eval_samples_per_second': 19.852, 'eval_steps_per_second': 1.242, 'epoch': 13.25}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.175830125808716, 'eval_bleu': 14.0908, 'eval_gen_len': 23.744, 'eval_runtime': 44.765, 'eval_samples_per_second': 19.636, 'eval_steps_per_second': 1.229, 'epoch': 13.27}
{'loss': 1.4235, 'grad_norm': 1.2534061670303345, 'learning_rate': 0.00035999999999999997, 'epoch': 13.28}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.173963785171509, 'eval_bleu': 14.0962, 'eval_gen_len': 23.851, 'eval_runtime': 45.3595, 'eval_samples_per_second': 19.379, 'eval_steps_per_second': 1.213, 'epoch': 13.28}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.177044630050659, 'eval_bleu': 13.9701, 'eval_gen_len': 23.7065, 'eval_runtime': 45.7756, 'eval_samples_per_second': 19.202, 'eval_steps_per_second': 1.202, 'epoch': 13.3}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1626176834106445, 'eval_bleu': 14.3675, 'eval_gen_len': 23.9113, 'eval_runtime': 44.5146, 'eval_samples_per_second': 19.746, 'eval_steps_per_second': 1.236, 'epoch': 13.31}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1659345626831055, 'eval_bleu': 13.9649, 'eval_gen_len': 23.7065, 'eval_runtime': 45.6247, 'eval_samples_per_second': 19.266, 'eval_steps_per_second': 1.205, 'epoch': 13.32}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.17207932472229, 'eval_bleu': 13.9609, 'eval_gen_len': 23.719, 'eval_runtime': 45.2395, 'eval_samples_per_second': 19.43, 'eval_steps_per_second': 1.216, 'epoch': 13.34}
{'loss': 1.4447, 'grad_norm': 1.5496454238891602, 'learning_rate': 0.0003566666666666667, 'epoch': 13.35}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1707024574279785, 'eval_bleu': 14.0535, 'eval_gen_len': 23.7554, 'eval_runtime': 44.9985, 'eval_samples_per_second': 19.534, 'eval_steps_per_second': 1.222, 'epoch': 13.35}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.173581600189209, 'eval_bleu': 13.8507, 'eval_gen_len': 23.7031, 'eval_runtime': 46.5945, 'eval_samples_per_second': 18.865, 'eval_steps_per_second': 1.18, 'epoch': 13.37}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.184607744216919, 'eval_bleu': 14.0234, 'eval_gen_len': 23.5916, 'eval_runtime': 43.9799, 'eval_samples_per_second': 19.986, 'eval_steps_per_second': 1.251, 'epoch': 13.38}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1706390380859375, 'eval_bleu': 13.6969, 'eval_gen_len': 23.6132, 'eval_runtime': 46.1907, 'eval_samples_per_second': 19.03, 'eval_steps_per_second': 1.191, 'epoch': 13.39}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.170318841934204, 'eval_bleu': 13.6208, 'eval_gen_len': 23.8999, 'eval_runtime': 45.1416, 'eval_samples_per_second': 19.472, 'eval_steps_per_second': 1.218, 'epoch': 13.41}
{'loss': 1.4403, 'grad_norm': 1.1572397947311401, 'learning_rate': 0.0003533333333333333, 'epoch': 13.42}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1638076305389404, 'eval_bleu': 13.7311, 'eval_gen_len': 23.7247, 'eval_runtime': 44.3835, 'eval_samples_per_second': 19.805, 'eval_steps_per_second': 1.239, 'epoch': 13.42}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1821858882904053, 'eval_bleu': 14.0612, 'eval_gen_len': 23.8111, 'eval_runtime': 45.3625, 'eval_samples_per_second': 19.377, 'eval_steps_per_second': 1.212, 'epoch': 13.43}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1727192401885986, 'eval_bleu': 13.8511, 'eval_gen_len': 23.8794, 'eval_runtime': 44.7064, 'eval_samples_per_second': 19.662, 'eval_steps_per_second': 1.23, 'epoch': 13.45}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.173431158065796, 'eval_bleu': 13.5879, 'eval_gen_len': 23.7235, 'eval_runtime': 46.217, 'eval_samples_per_second': 19.019, 'eval_steps_per_second': 1.19, 'epoch': 13.46}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.179394245147705, 'eval_bleu': 13.4863, 'eval_gen_len': 23.6621, 'eval_runtime': 44.6443, 'eval_samples_per_second': 19.689, 'eval_steps_per_second': 1.232, 'epoch': 13.48}
{'loss': 1.4419, 'grad_norm': 1.1915279626846313, 'learning_rate': 0.00035, 'epoch': 13.49}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1676793098449707, 'eval_bleu': 14.0088, 'eval_gen_len': 23.6507, 'eval_runtime': 44.0845, 'eval_samples_per_second': 19.939, 'eval_steps_per_second': 1.248, 'epoch': 13.49}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.167076349258423, 'eval_bleu': 13.9624, 'eval_gen_len': 23.7554, 'eval_runtime': 63.0059, 'eval_samples_per_second': 13.951, 'eval_steps_per_second': 0.873, 'epoch': 13.5}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1600074768066406, 'eval_bleu': 14.2249, 'eval_gen_len': 23.7577, 'eval_runtime': 94.6499, 'eval_samples_per_second': 9.287, 'eval_steps_per_second': 0.581, 'epoch': 13.52}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1648638248443604, 'eval_bleu': 13.958, 'eval_gen_len': 23.8658, 'eval_runtime': 93.386, 'eval_samples_per_second': 9.413, 'eval_steps_per_second': 0.589, 'epoch': 13.53}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.170058250427246, 'eval_bleu': 13.6951, 'eval_gen_len': 23.7349, 'eval_runtime': 93.4151, 'eval_samples_per_second': 9.41, 'eval_steps_per_second': 0.589, 'epoch': 13.55}
{'loss': 1.4498, 'grad_norm': 1.225188970565796, 'learning_rate': 0.00034666666666666667, 'epoch': 13.56}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1648707389831543, 'eval_bleu': 13.4341, 'eval_gen_len': 23.8089, 'eval_runtime': 93.7336, 'eval_samples_per_second': 9.378, 'eval_steps_per_second': 0.587, 'epoch': 13.56}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1665310859680176, 'eval_bleu': 14.2682, 'eval_gen_len': 23.6712, 'eval_runtime': 94.8978, 'eval_samples_per_second': 9.263, 'eval_steps_per_second': 0.58, 'epoch': 13.57}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1589996814727783, 'eval_bleu': 13.7054, 'eval_gen_len': 23.7019, 'eval_runtime': 45.7854, 'eval_samples_per_second': 19.198, 'eval_steps_per_second': 1.201, 'epoch': 13.59}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1677632331848145, 'eval_bleu': 13.988, 'eval_gen_len': 23.744, 'eval_runtime': 52.1178, 'eval_samples_per_second': 16.866, 'eval_steps_per_second': 1.055, 'epoch': 13.6}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.166435956954956, 'eval_bleu': 13.8801, 'eval_gen_len': 23.8077, 'eval_runtime': 76.2439, 'eval_samples_per_second': 11.529, 'eval_steps_per_second': 0.721, 'epoch': 13.61}
{'loss': 1.4577, 'grad_norm': 1.1697006225585938, 'learning_rate': 0.00034333333333333335, 'epoch': 13.63}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.165342330932617, 'eval_bleu': 13.7237, 'eval_gen_len': 23.6894, 'eval_runtime': 44.6104, 'eval_samples_per_second': 19.704, 'eval_steps_per_second': 1.233, 'epoch': 13.63}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.168837070465088, 'eval_bleu': 13.5065, 'eval_gen_len': 23.7895, 'eval_runtime': 44.818, 'eval_samples_per_second': 19.613, 'eval_steps_per_second': 1.227, 'epoch': 13.64}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1739466190338135, 'eval_bleu': 13.7807, 'eval_gen_len': 23.7952, 'eval_runtime': 46.8197, 'eval_samples_per_second': 18.774, 'eval_steps_per_second': 1.175, 'epoch': 13.66}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1599135398864746, 'eval_bleu': 13.6372, 'eval_gen_len': 23.8965, 'eval_runtime': 44.4594, 'eval_samples_per_second': 19.771, 'eval_steps_per_second': 1.237, 'epoch': 13.67}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.178659200668335, 'eval_bleu': 13.6681, 'eval_gen_len': 23.8601, 'eval_runtime': 46.1327, 'eval_samples_per_second': 19.054, 'eval_steps_per_second': 1.192, 'epoch': 13.68}
{'loss': 1.4686, 'grad_norm': 1.3587666749954224, 'learning_rate': 0.00034, 'epoch': 13.7}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1672186851501465, 'eval_bleu': 14.225, 'eval_gen_len': 23.6951, 'eval_runtime': 44.4362, 'eval_samples_per_second': 19.781, 'eval_steps_per_second': 1.238, 'epoch': 13.7}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1633641719818115, 'eval_bleu': 13.8894, 'eval_gen_len': 23.7497, 'eval_runtime': 45.0045, 'eval_samples_per_second': 19.531, 'eval_steps_per_second': 1.222, 'epoch': 13.71}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.148668050765991, 'eval_bleu': 13.7646, 'eval_gen_len': 23.6598, 'eval_runtime': 44.8749, 'eval_samples_per_second': 19.588, 'eval_steps_per_second': 1.226, 'epoch': 13.73}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.167428493499756, 'eval_bleu': 13.879, 'eval_gen_len': 23.6724, 'eval_runtime': 43.8116, 'eval_samples_per_second': 20.063, 'eval_steps_per_second': 1.255, 'epoch': 13.74}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.166473865509033, 'eval_bleu': 13.9635, 'eval_gen_len': 23.8214, 'eval_runtime': 46.1543, 'eval_samples_per_second': 19.045, 'eval_steps_per_second': 1.192, 'epoch': 13.75}
{'loss': 1.461, 'grad_norm': 1.1993322372436523, 'learning_rate': 0.0003366666666666667, 'epoch': 13.77}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1542694568634033, 'eval_bleu': 14.0592, 'eval_gen_len': 23.7941, 'eval_runtime': 45.5376, 'eval_samples_per_second': 19.303, 'eval_steps_per_second': 1.208, 'epoch': 13.77}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1577019691467285, 'eval_bleu': 14.1242, 'eval_gen_len': 23.8111, 'eval_runtime': 44.5664, 'eval_samples_per_second': 19.723, 'eval_steps_per_second': 1.234, 'epoch': 13.78}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1628432273864746, 'eval_bleu': 14.3892, 'eval_gen_len': 23.7759, 'eval_runtime': 47.6484, 'eval_samples_per_second': 18.448, 'eval_steps_per_second': 1.154, 'epoch': 13.79}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1597070693969727, 'eval_bleu': 14.0818, 'eval_gen_len': 23.7133, 'eval_runtime': 44.875, 'eval_samples_per_second': 19.588, 'eval_steps_per_second': 1.226, 'epoch': 13.81}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.164741277694702, 'eval_bleu': 13.8088, 'eval_gen_len': 23.7327, 'eval_runtime': 45.6525, 'eval_samples_per_second': 19.254, 'eval_steps_per_second': 1.205, 'epoch': 13.82}
{'loss': 1.4763, 'grad_norm': 1.0967179536819458, 'learning_rate': 0.0003333333333333333, 'epoch': 13.84}


  0%|          | 0/55 [00:00<?, ?it/s]

Non-default generation parameters: {'max_length': 32, 'min_length': 8, 'early_stopping': True, 'num_beams': 4, 'length_penalty': 2.0, 'no_repeat_ngram_size': 3}


{'eval_loss': 2.146198272705078, 'eval_bleu': 14.1336, 'eval_gen_len': 23.7565, 'eval_runtime': 44.8088, 'eval_samples_per_second': 19.617, 'eval_steps_per_second': 1.227, 'epoch': 13.84}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1576859951019287, 'eval_bleu': 14.1206, 'eval_gen_len': 23.7873, 'eval_runtime': 44.9501, 'eval_samples_per_second': 19.555, 'eval_steps_per_second': 1.224, 'epoch': 13.85}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1454999446868896, 'eval_bleu': 14.1907, 'eval_gen_len': 23.7634, 'eval_runtime': 45.8747, 'eval_samples_per_second': 19.161, 'eval_steps_per_second': 1.199, 'epoch': 13.86}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.151031494140625, 'eval_bleu': 14.1384, 'eval_gen_len': 23.7793, 'eval_runtime': 45.7345, 'eval_samples_per_second': 19.22, 'eval_steps_per_second': 1.203, 'epoch': 13.88}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.158334255218506, 'eval_bleu': 13.8889, 'eval_gen_len': 23.851, 'eval_runtime': 44.7642, 'eval_samples_per_second': 19.636, 'eval_steps_per_second': 1.229, 'epoch': 13.89}
{'loss': 1.4752, 'grad_norm': 1.4525055885314941, 'learning_rate': 0.00033, 'epoch': 13.91}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1553382873535156, 'eval_bleu': 13.9535, 'eval_gen_len': 23.7668, 'eval_runtime': 44.8052, 'eval_samples_per_second': 19.618, 'eval_steps_per_second': 1.228, 'epoch': 13.91}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1544854640960693, 'eval_bleu': 13.9881, 'eval_gen_len': 23.8612, 'eval_runtime': 44.5542, 'eval_samples_per_second': 19.729, 'eval_steps_per_second': 1.234, 'epoch': 13.92}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1546387672424316, 'eval_bleu': 14.1092, 'eval_gen_len': 23.8385, 'eval_runtime': 45.0576, 'eval_samples_per_second': 19.508, 'eval_steps_per_second': 1.221, 'epoch': 13.93}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1430609226226807, 'eval_bleu': 13.8751, 'eval_gen_len': 23.7042, 'eval_runtime': 45.829, 'eval_samples_per_second': 19.18, 'eval_steps_per_second': 1.2, 'epoch': 13.95}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.152125358581543, 'eval_bleu': 13.8256, 'eval_gen_len': 23.5882, 'eval_runtime': 45.9567, 'eval_samples_per_second': 19.127, 'eval_steps_per_second': 1.197, 'epoch': 13.96}
{'loss': 1.4807, 'grad_norm': 1.3507418632507324, 'learning_rate': 0.0003266666666666667, 'epoch': 13.97}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1485114097595215, 'eval_bleu': 13.8914, 'eval_gen_len': 23.802, 'eval_runtime': 91.9461, 'eval_samples_per_second': 9.56, 'eval_steps_per_second': 0.598, 'epoch': 13.97}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.153843641281128, 'eval_bleu': 13.8169, 'eval_gen_len': 23.8487, 'eval_runtime': 75.928, 'eval_samples_per_second': 11.577, 'eval_steps_per_second': 0.724, 'epoch': 13.99}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1802713871002197, 'eval_bleu': 13.6971, 'eval_gen_len': 23.8862, 'eval_runtime': 94.7783, 'eval_samples_per_second': 9.274, 'eval_steps_per_second': 0.58, 'epoch': 14.0}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.201211929321289, 'eval_bleu': 13.4653, 'eval_gen_len': 23.7122, 'eval_runtime': 98.2764, 'eval_samples_per_second': 8.944, 'eval_steps_per_second': 0.56, 'epoch': 14.02}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2121024131774902, 'eval_bleu': 13.5359, 'eval_gen_len': 23.7292, 'eval_runtime': 58.3717, 'eval_samples_per_second': 15.059, 'eval_steps_per_second': 0.942, 'epoch': 14.03}
{'loss': 1.3924, 'grad_norm': 1.6224898099899292, 'learning_rate': 0.0003233333333333333, 'epoch': 14.04}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2037625312805176, 'eval_bleu': 13.2631, 'eval_gen_len': 23.7463, 'eval_runtime': 59.0154, 'eval_samples_per_second': 14.894, 'eval_steps_per_second': 0.932, 'epoch': 14.04}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.196737289428711, 'eval_bleu': 13.4007, 'eval_gen_len': 23.6997, 'eval_runtime': 94.2109, 'eval_samples_per_second': 9.33, 'eval_steps_per_second': 0.584, 'epoch': 14.06}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.204158067703247, 'eval_bleu': 13.5805, 'eval_gen_len': 23.6542, 'eval_runtime': 99.3602, 'eval_samples_per_second': 8.847, 'eval_steps_per_second': 0.554, 'epoch': 14.07}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1957614421844482, 'eval_bleu': 13.7662, 'eval_gen_len': 23.7338, 'eval_runtime': 97.4837, 'eval_samples_per_second': 9.017, 'eval_steps_per_second': 0.564, 'epoch': 14.09}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2000768184661865, 'eval_bleu': 14.0043, 'eval_gen_len': 23.8055, 'eval_runtime': 97.7283, 'eval_samples_per_second': 8.994, 'eval_steps_per_second': 0.563, 'epoch': 14.1}
{'loss': 1.3621, 'grad_norm': 1.3554484844207764, 'learning_rate': 0.00032, 'epoch': 14.11}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2001709938049316, 'eval_bleu': 13.8555, 'eval_gen_len': 23.6655, 'eval_runtime': 96.8683, 'eval_samples_per_second': 9.074, 'eval_steps_per_second': 0.568, 'epoch': 14.11}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1770365238189697, 'eval_bleu': 13.953, 'eval_gen_len': 23.8032, 'eval_runtime': 45.7197, 'eval_samples_per_second': 19.226, 'eval_steps_per_second': 1.203, 'epoch': 14.13}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.197977066040039, 'eval_bleu': 13.9145, 'eval_gen_len': 23.7543, 'eval_runtime': 46.8879, 'eval_samples_per_second': 18.747, 'eval_steps_per_second': 1.173, 'epoch': 14.14}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1978254318237305, 'eval_bleu': 14.0533, 'eval_gen_len': 23.7622, 'eval_runtime': 85.8688, 'eval_samples_per_second': 10.237, 'eval_steps_per_second': 0.641, 'epoch': 14.15}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.193175792694092, 'eval_bleu': 13.9994, 'eval_gen_len': 23.7827, 'eval_runtime': 94.9338, 'eval_samples_per_second': 9.259, 'eval_steps_per_second': 0.579, 'epoch': 14.17}
{'loss': 1.3708, 'grad_norm': 1.926547884941101, 'learning_rate': 0.00031666666666666665, 'epoch': 14.18}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.206510305404663, 'eval_bleu': 13.747, 'eval_gen_len': 23.7235, 'eval_runtime': 45.4009, 'eval_samples_per_second': 19.361, 'eval_steps_per_second': 1.211, 'epoch': 14.18}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.203504800796509, 'eval_bleu': 14.0431, 'eval_gen_len': 23.6371, 'eval_runtime': 46.3236, 'eval_samples_per_second': 18.975, 'eval_steps_per_second': 1.187, 'epoch': 14.2}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.188765048980713, 'eval_bleu': 13.9357, 'eval_gen_len': 23.7258, 'eval_runtime': 50.2383, 'eval_samples_per_second': 17.497, 'eval_steps_per_second': 1.095, 'epoch': 14.21}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1825475692749023, 'eval_bleu': 13.8838, 'eval_gen_len': 23.6086, 'eval_runtime': 48.5484, 'eval_samples_per_second': 18.106, 'eval_steps_per_second': 1.133, 'epoch': 14.22}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1763505935668945, 'eval_bleu': 13.9967, 'eval_gen_len': 23.5597, 'eval_runtime': 45.693, 'eval_samples_per_second': 19.237, 'eval_steps_per_second': 1.204, 'epoch': 14.24}
{'loss': 1.3775, 'grad_norm': 1.6138410568237305, 'learning_rate': 0.0003133333333333334, 'epoch': 14.25}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.186429977416992, 'eval_bleu': 13.9156, 'eval_gen_len': 23.5597, 'eval_runtime': 49.4927, 'eval_samples_per_second': 17.76, 'eval_steps_per_second': 1.111, 'epoch': 14.25}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.184380292892456, 'eval_bleu': 13.8658, 'eval_gen_len': 23.7474, 'eval_runtime': 98.4726, 'eval_samples_per_second': 8.926, 'eval_steps_per_second': 0.559, 'epoch': 14.26}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1833646297454834, 'eval_bleu': 14.0595, 'eval_gen_len': 23.7634, 'eval_runtime': 97.9078, 'eval_samples_per_second': 8.978, 'eval_steps_per_second': 0.562, 'epoch': 14.28}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1782538890838623, 'eval_bleu': 13.9437, 'eval_gen_len': 23.8146, 'eval_runtime': 96.3412, 'eval_samples_per_second': 9.124, 'eval_steps_per_second': 0.571, 'epoch': 14.29}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1940932273864746, 'eval_bleu': 13.7738, 'eval_gen_len': 23.7509, 'eval_runtime': 44.352, 'eval_samples_per_second': 19.819, 'eval_steps_per_second': 1.24, 'epoch': 14.31}
{'loss': 1.3801, 'grad_norm': 1.2878526449203491, 'learning_rate': 0.00031, 'epoch': 14.32}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1987485885620117, 'eval_bleu': 13.6339, 'eval_gen_len': 23.7929, 'eval_runtime': 45.406, 'eval_samples_per_second': 19.359, 'eval_steps_per_second': 1.211, 'epoch': 14.32}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1890311241149902, 'eval_bleu': 13.4273, 'eval_gen_len': 23.6246, 'eval_runtime': 45.5491, 'eval_samples_per_second': 19.298, 'eval_steps_per_second': 1.207, 'epoch': 14.33}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.192843437194824, 'eval_bleu': 13.6017, 'eval_gen_len': 23.595, 'eval_runtime': 44.0315, 'eval_samples_per_second': 19.963, 'eval_steps_per_second': 1.249, 'epoch': 14.35}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1786465644836426, 'eval_bleu': 13.3886, 'eval_gen_len': 23.8123, 'eval_runtime': 44.8472, 'eval_samples_per_second': 19.6, 'eval_steps_per_second': 1.226, 'epoch': 14.36}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.182147264480591, 'eval_bleu': 13.8567, 'eval_gen_len': 23.711, 'eval_runtime': 43.4005, 'eval_samples_per_second': 20.253, 'eval_steps_per_second': 1.267, 'epoch': 14.38}
{'loss': 1.3813, 'grad_norm': 1.332512378692627, 'learning_rate': 0.0003066666666666667, 'epoch': 14.39}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1875112056732178, 'eval_bleu': 14.0038, 'eval_gen_len': 23.7884, 'eval_runtime': 44.5217, 'eval_samples_per_second': 19.743, 'eval_steps_per_second': 1.235, 'epoch': 14.39}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.193040370941162, 'eval_bleu': 13.648, 'eval_gen_len': 23.8373, 'eval_runtime': 45.7354, 'eval_samples_per_second': 19.219, 'eval_steps_per_second': 1.203, 'epoch': 14.4}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1758432388305664, 'eval_bleu': 13.4632, 'eval_gen_len': 23.6428, 'eval_runtime': 60.7441, 'eval_samples_per_second': 14.471, 'eval_steps_per_second': 0.905, 'epoch': 14.42}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.182831048965454, 'eval_bleu': 13.7553, 'eval_gen_len': 23.7349, 'eval_runtime': 53.385, 'eval_samples_per_second': 16.465, 'eval_steps_per_second': 1.03, 'epoch': 14.43}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.182077646255493, 'eval_bleu': 13.9109, 'eval_gen_len': 23.727, 'eval_runtime': 53.4747, 'eval_samples_per_second': 16.438, 'eval_steps_per_second': 1.029, 'epoch': 14.44}
{'loss': 1.3844, 'grad_norm': 1.0352710485458374, 'learning_rate': 0.00030333333333333335, 'epoch': 14.46}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1862990856170654, 'eval_bleu': 13.7347, 'eval_gen_len': 23.777, 'eval_runtime': 47.2026, 'eval_samples_per_second': 18.622, 'eval_steps_per_second': 1.165, 'epoch': 14.46}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1915676593780518, 'eval_bleu': 13.6852, 'eval_gen_len': 23.752, 'eval_runtime': 47.902, 'eval_samples_per_second': 18.35, 'eval_steps_per_second': 1.148, 'epoch': 14.47}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.178246259689331, 'eval_bleu': 13.828, 'eval_gen_len': 23.6007, 'eval_runtime': 46.7418, 'eval_samples_per_second': 18.805, 'eval_steps_per_second': 1.177, 'epoch': 14.49}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1857378482818604, 'eval_bleu': 13.9897, 'eval_gen_len': 23.6644, 'eval_runtime': 48.1064, 'eval_samples_per_second': 18.272, 'eval_steps_per_second': 1.143, 'epoch': 14.5}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1845130920410156, 'eval_bleu': 13.6746, 'eval_gen_len': 23.8043, 'eval_runtime': 46.7852, 'eval_samples_per_second': 18.788, 'eval_steps_per_second': 1.176, 'epoch': 14.51}
{'loss': 1.3933, 'grad_norm': 1.1563029289245605, 'learning_rate': 0.0003, 'epoch': 14.53}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1805579662323, 'eval_bleu': 14.2798, 'eval_gen_len': 23.7076, 'eval_runtime': 43.5809, 'eval_samples_per_second': 20.169, 'eval_steps_per_second': 1.262, 'epoch': 14.53}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1841161251068115, 'eval_bleu': 13.8396, 'eval_gen_len': 23.7486, 'eval_runtime': 43.9635, 'eval_samples_per_second': 19.994, 'eval_steps_per_second': 1.251, 'epoch': 14.54}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1849589347839355, 'eval_bleu': 13.688, 'eval_gen_len': 23.6234, 'eval_runtime': 44.2384, 'eval_samples_per_second': 19.87, 'eval_steps_per_second': 1.243, 'epoch': 14.56}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1834747791290283, 'eval_bleu': 14.2252, 'eval_gen_len': 23.7998, 'eval_runtime': 43.4769, 'eval_samples_per_second': 20.218, 'eval_steps_per_second': 1.265, 'epoch': 14.57}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.167672872543335, 'eval_bleu': 13.978, 'eval_gen_len': 23.8703, 'eval_runtime': 43.6145, 'eval_samples_per_second': 20.154, 'eval_steps_per_second': 1.261, 'epoch': 14.58}
{'loss': 1.3992, 'grad_norm': 1.2313027381896973, 'learning_rate': 0.0002966666666666667, 'epoch': 14.6}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1703503131866455, 'eval_bleu': 14.0209, 'eval_gen_len': 23.9693, 'eval_runtime': 44.0486, 'eval_samples_per_second': 19.955, 'eval_steps_per_second': 1.249, 'epoch': 14.6}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1627004146575928, 'eval_bleu': 14.1731, 'eval_gen_len': 23.6325, 'eval_runtime': 44.9629, 'eval_samples_per_second': 19.549, 'eval_steps_per_second': 1.223, 'epoch': 14.61}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.173823356628418, 'eval_bleu': 13.8129, 'eval_gen_len': 23.6246, 'eval_runtime': 44.9136, 'eval_samples_per_second': 19.571, 'eval_steps_per_second': 1.225, 'epoch': 14.62}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.166613817214966, 'eval_bleu': 13.8703, 'eval_gen_len': 23.603, 'eval_runtime': 46.2976, 'eval_samples_per_second': 18.986, 'eval_steps_per_second': 1.188, 'epoch': 14.64}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.173157215118408, 'eval_bleu': 13.8481, 'eval_gen_len': 23.7349, 'eval_runtime': 46.3876, 'eval_samples_per_second': 18.949, 'eval_steps_per_second': 1.186, 'epoch': 14.65}
{'loss': 1.4029, 'grad_norm': 1.0884402990341187, 'learning_rate': 0.0002933333333333333, 'epoch': 14.67}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1827006340026855, 'eval_bleu': 13.6908, 'eval_gen_len': 23.6553, 'eval_runtime': 49.7919, 'eval_samples_per_second': 17.653, 'eval_steps_per_second': 1.105, 'epoch': 14.67}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1676952838897705, 'eval_bleu': 13.7943, 'eval_gen_len': 23.7258, 'eval_runtime': 79.8611, 'eval_samples_per_second': 11.007, 'eval_steps_per_second': 0.689, 'epoch': 14.68}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1846566200256348, 'eval_bleu': 13.6715, 'eval_gen_len': 23.7031, 'eval_runtime': 44.7164, 'eval_samples_per_second': 19.657, 'eval_steps_per_second': 1.23, 'epoch': 14.69}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.16610050201416, 'eval_bleu': 13.9693, 'eval_gen_len': 23.8441, 'eval_runtime': 88.012, 'eval_samples_per_second': 9.987, 'eval_steps_per_second': 0.625, 'epoch': 14.71}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1722822189331055, 'eval_bleu': 13.7269, 'eval_gen_len': 23.6849, 'eval_runtime': 86.3605, 'eval_samples_per_second': 10.178, 'eval_steps_per_second': 0.637, 'epoch': 14.72}
{'loss': 1.4079, 'grad_norm': 1.2462464570999146, 'learning_rate': 0.00029, 'epoch': 14.74}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1738784313201904, 'eval_bleu': 14.1261, 'eval_gen_len': 23.7622, 'eval_runtime': 81.7959, 'eval_samples_per_second': 10.746, 'eval_steps_per_second': 0.672, 'epoch': 14.74}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.181398868560791, 'eval_bleu': 14.0233, 'eval_gen_len': 23.8555, 'eval_runtime': 45.9045, 'eval_samples_per_second': 19.148, 'eval_steps_per_second': 1.198, 'epoch': 14.75}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1735317707061768, 'eval_bleu': 14.0386, 'eval_gen_len': 23.7099, 'eval_runtime': 44.4159, 'eval_samples_per_second': 19.79, 'eval_steps_per_second': 1.238, 'epoch': 14.76}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.171327590942383, 'eval_bleu': 14.2353, 'eval_gen_len': 23.6519, 'eval_runtime': 44.857, 'eval_samples_per_second': 19.596, 'eval_steps_per_second': 1.226, 'epoch': 14.78}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1602632999420166, 'eval_bleu': 14.0435, 'eval_gen_len': 23.7304, 'eval_runtime': 77.9048, 'eval_samples_per_second': 11.283, 'eval_steps_per_second': 0.706, 'epoch': 14.79}
{'loss': 1.4199, 'grad_norm': 1.2752455472946167, 'learning_rate': 0.0002866666666666667, 'epoch': 14.8}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1738593578338623, 'eval_bleu': 14.2604, 'eval_gen_len': 23.6871, 'eval_runtime': 60.7438, 'eval_samples_per_second': 14.471, 'eval_steps_per_second': 0.905, 'epoch': 14.8}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.172027587890625, 'eval_bleu': 14.4939, 'eval_gen_len': 23.7668, 'eval_runtime': 45.3846, 'eval_samples_per_second': 19.368, 'eval_steps_per_second': 1.212, 'epoch': 14.82}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.172680616378784, 'eval_bleu': 14.3163, 'eval_gen_len': 23.6974, 'eval_runtime': 78.0665, 'eval_samples_per_second': 11.26, 'eval_steps_per_second': 0.705, 'epoch': 14.83}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1730611324310303, 'eval_bleu': 14.4116, 'eval_gen_len': 23.7816, 'eval_runtime': 45.4475, 'eval_samples_per_second': 19.341, 'eval_steps_per_second': 1.21, 'epoch': 14.85}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.167332887649536, 'eval_bleu': 14.4898, 'eval_gen_len': 23.6985, 'eval_runtime': 44.7911, 'eval_samples_per_second': 19.624, 'eval_steps_per_second': 1.228, 'epoch': 14.86}
{'loss': 1.4196, 'grad_norm': 1.2864677906036377, 'learning_rate': 0.00028333333333333335, 'epoch': 14.87}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1691062450408936, 'eval_bleu': 14.2258, 'eval_gen_len': 23.8066, 'eval_runtime': 50.0461, 'eval_samples_per_second': 17.564, 'eval_steps_per_second': 1.099, 'epoch': 14.87}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1680843830108643, 'eval_bleu': 14.4263, 'eval_gen_len': 23.7907, 'eval_runtime': 44.105, 'eval_samples_per_second': 19.93, 'eval_steps_per_second': 1.247, 'epoch': 14.89}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1551473140716553, 'eval_bleu': 14.3158, 'eval_gen_len': 23.8157, 'eval_runtime': 45.5756, 'eval_samples_per_second': 19.287, 'eval_steps_per_second': 1.207, 'epoch': 14.9}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.162254810333252, 'eval_bleu': 14.6167, 'eval_gen_len': 23.8146, 'eval_runtime': 69.2146, 'eval_samples_per_second': 12.7, 'eval_steps_per_second': 0.795, 'epoch': 14.92}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.165407419204712, 'eval_bleu': 14.3564, 'eval_gen_len': 23.8191, 'eval_runtime': 45.1655, 'eval_samples_per_second': 19.462, 'eval_steps_per_second': 1.218, 'epoch': 14.93}
{'loss': 1.4162, 'grad_norm': 1.1640523672103882, 'learning_rate': 0.00028000000000000003, 'epoch': 14.94}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1639537811279297, 'eval_bleu': 13.8832, 'eval_gen_len': 23.7224, 'eval_runtime': 44.8104, 'eval_samples_per_second': 19.616, 'eval_steps_per_second': 1.227, 'epoch': 14.94}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1647398471832275, 'eval_bleu': 14.0982, 'eval_gen_len': 23.8339, 'eval_runtime': 44.6147, 'eval_samples_per_second': 19.702, 'eval_steps_per_second': 1.233, 'epoch': 14.96}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1747183799743652, 'eval_bleu': 14.339, 'eval_gen_len': 23.6894, 'eval_runtime': 45.4601, 'eval_samples_per_second': 19.336, 'eval_steps_per_second': 1.21, 'epoch': 14.97}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1638684272766113, 'eval_bleu': 14.2976, 'eval_gen_len': 23.7611, 'eval_runtime': 44.9636, 'eval_samples_per_second': 19.549, 'eval_steps_per_second': 1.223, 'epoch': 14.98}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1586005687713623, 'eval_bleu': 14.1953, 'eval_gen_len': 23.8123, 'eval_runtime': 57.3685, 'eval_samples_per_second': 15.322, 'eval_steps_per_second': 0.959, 'epoch': 15.0}
{'loss': 1.3889, 'grad_norm': 1.2441483736038208, 'learning_rate': 0.00027666666666666665, 'epoch': 15.01}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.206141471862793, 'eval_bleu': 14.6076, 'eval_gen_len': 23.8111, 'eval_runtime': 49.3236, 'eval_samples_per_second': 17.821, 'eval_steps_per_second': 1.115, 'epoch': 15.01}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.203212261199951, 'eval_bleu': 14.3823, 'eval_gen_len': 23.6303, 'eval_runtime': 75.7039, 'eval_samples_per_second': 11.611, 'eval_steps_per_second': 0.727, 'epoch': 15.03}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1922900676727295, 'eval_bleu': 14.0945, 'eval_gen_len': 23.5859, 'eval_runtime': 45.6205, 'eval_samples_per_second': 19.268, 'eval_steps_per_second': 1.206, 'epoch': 15.04}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2094643115997314, 'eval_bleu': 14.3039, 'eval_gen_len': 23.6769, 'eval_runtime': 47.9734, 'eval_samples_per_second': 18.323, 'eval_steps_per_second': 1.146, 'epoch': 15.05}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2028615474700928, 'eval_bleu': 14.071, 'eval_gen_len': 23.5984, 'eval_runtime': 96.1053, 'eval_samples_per_second': 9.146, 'eval_steps_per_second': 0.572, 'epoch': 15.07}
{'loss': 1.3084, 'grad_norm': 1.368886113166809, 'learning_rate': 0.00027333333333333333, 'epoch': 15.08}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2199480533599854, 'eval_bleu': 14.0948, 'eval_gen_len': 23.8009, 'eval_runtime': 66.8536, 'eval_samples_per_second': 13.148, 'eval_steps_per_second': 0.823, 'epoch': 15.08}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.224212408065796, 'eval_bleu': 14.3621, 'eval_gen_len': 23.6143, 'eval_runtime': 53.8708, 'eval_samples_per_second': 16.317, 'eval_steps_per_second': 1.021, 'epoch': 15.1}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2142462730407715, 'eval_bleu': 14.3184, 'eval_gen_len': 23.7554, 'eval_runtime': 65.2823, 'eval_samples_per_second': 13.465, 'eval_steps_per_second': 0.842, 'epoch': 15.11}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2064290046691895, 'eval_bleu': 14.2365, 'eval_gen_len': 23.7565, 'eval_runtime': 46.562, 'eval_samples_per_second': 18.878, 'eval_steps_per_second': 1.181, 'epoch': 15.12}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2159435749053955, 'eval_bleu': 14.4329, 'eval_gen_len': 23.6428, 'eval_runtime': 78.2352, 'eval_samples_per_second': 11.235, 'eval_steps_per_second': 0.703, 'epoch': 15.14}
{'loss': 1.3086, 'grad_norm': 1.384087324142456, 'learning_rate': 0.00027, 'epoch': 15.15}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.211057424545288, 'eval_bleu': 14.1389, 'eval_gen_len': 23.686, 'eval_runtime': 46.51, 'eval_samples_per_second': 18.899, 'eval_steps_per_second': 1.183, 'epoch': 15.15}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.208235502243042, 'eval_bleu': 14.0992, 'eval_gen_len': 23.7964, 'eval_runtime': 45.5957, 'eval_samples_per_second': 19.278, 'eval_steps_per_second': 1.206, 'epoch': 15.16}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.199899673461914, 'eval_bleu': 14.2571, 'eval_gen_len': 23.8009, 'eval_runtime': 94.2681, 'eval_samples_per_second': 9.324, 'eval_steps_per_second': 0.583, 'epoch': 15.18}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.219815492630005, 'eval_bleu': 14.2059, 'eval_gen_len': 23.7304, 'eval_runtime': 76.0346, 'eval_samples_per_second': 11.561, 'eval_steps_per_second': 0.723, 'epoch': 15.19}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.205690622329712, 'eval_bleu': 14.341, 'eval_gen_len': 23.7554, 'eval_runtime': 71.1584, 'eval_samples_per_second': 12.353, 'eval_steps_per_second': 0.773, 'epoch': 15.21}
{'loss': 1.3238, 'grad_norm': 1.3932329416275024, 'learning_rate': 0.0002666666666666667, 'epoch': 15.22}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.197125196456909, 'eval_bleu': 13.9244, 'eval_gen_len': 23.7042, 'eval_runtime': 53.1156, 'eval_samples_per_second': 16.549, 'eval_steps_per_second': 1.035, 'epoch': 15.22}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.209505081176758, 'eval_bleu': 13.9804, 'eval_gen_len': 23.6655, 'eval_runtime': 44.8908, 'eval_samples_per_second': 19.581, 'eval_steps_per_second': 1.225, 'epoch': 15.23}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2051124572753906, 'eval_bleu': 14.1428, 'eval_gen_len': 23.6564, 'eval_runtime': 45.8814, 'eval_samples_per_second': 19.158, 'eval_steps_per_second': 1.199, 'epoch': 15.25}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2023496627807617, 'eval_bleu': 14.2808, 'eval_gen_len': 23.7474, 'eval_runtime': 61.7097, 'eval_samples_per_second': 14.244, 'eval_steps_per_second': 0.891, 'epoch': 15.26}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1967625617980957, 'eval_bleu': 14.5219, 'eval_gen_len': 23.7565, 'eval_runtime': 55.469, 'eval_samples_per_second': 15.847, 'eval_steps_per_second': 0.992, 'epoch': 15.27}
{'loss': 1.3262, 'grad_norm': 1.1358453035354614, 'learning_rate': 0.0002633333333333333, 'epoch': 15.29}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.203273057937622, 'eval_bleu': 14.5642, 'eval_gen_len': 23.7452, 'eval_runtime': 45.6037, 'eval_samples_per_second': 19.275, 'eval_steps_per_second': 1.206, 'epoch': 15.29}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2103729248046875, 'eval_bleu': 14.2946, 'eval_gen_len': 23.7315, 'eval_runtime': 50.6428, 'eval_samples_per_second': 17.357, 'eval_steps_per_second': 1.086, 'epoch': 15.3}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2083230018615723, 'eval_bleu': 14.1062, 'eval_gen_len': 23.5347, 'eval_runtime': 43.6975, 'eval_samples_per_second': 20.116, 'eval_steps_per_second': 1.259, 'epoch': 15.32}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.216162919998169, 'eval_bleu': 14.2672, 'eval_gen_len': 23.6007, 'eval_runtime': 45.4869, 'eval_samples_per_second': 19.324, 'eval_steps_per_second': 1.209, 'epoch': 15.33}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2031989097595215, 'eval_bleu': 14.3286, 'eval_gen_len': 23.6542, 'eval_runtime': 44.7221, 'eval_samples_per_second': 19.655, 'eval_steps_per_second': 1.23, 'epoch': 15.34}
{'loss': 1.3337, 'grad_norm': 1.3139406442642212, 'learning_rate': 0.00026000000000000003, 'epoch': 15.36}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2049179077148438, 'eval_bleu': 14.3186, 'eval_gen_len': 23.5904, 'eval_runtime': 45.6232, 'eval_samples_per_second': 19.267, 'eval_steps_per_second': 1.206, 'epoch': 15.36}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2026424407958984, 'eval_bleu': 14.2001, 'eval_gen_len': 23.8123, 'eval_runtime': 48.7648, 'eval_samples_per_second': 18.025, 'eval_steps_per_second': 1.128, 'epoch': 15.37}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.192411422729492, 'eval_bleu': 14.2292, 'eval_gen_len': 23.6371, 'eval_runtime': 45.1826, 'eval_samples_per_second': 19.454, 'eval_steps_per_second': 1.217, 'epoch': 15.39}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2063095569610596, 'eval_bleu': 14.0485, 'eval_gen_len': 23.6758, 'eval_runtime': 44.4248, 'eval_samples_per_second': 19.786, 'eval_steps_per_second': 1.238, 'epoch': 15.4}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.205216646194458, 'eval_bleu': 14.1406, 'eval_gen_len': 23.6121, 'eval_runtime': 45.9959, 'eval_samples_per_second': 19.11, 'eval_steps_per_second': 1.196, 'epoch': 15.41}
{'loss': 1.3339, 'grad_norm': 1.18087637424469, 'learning_rate': 0.00025666666666666665, 'epoch': 15.43}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1914796829223633, 'eval_bleu': 13.8164, 'eval_gen_len': 23.6553, 'eval_runtime': 45.8322, 'eval_samples_per_second': 19.179, 'eval_steps_per_second': 1.2, 'epoch': 15.43}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.197633981704712, 'eval_bleu': 14.1524, 'eval_gen_len': 23.6724, 'eval_runtime': 45.0734, 'eval_samples_per_second': 19.502, 'eval_steps_per_second': 1.22, 'epoch': 15.44}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.191493272781372, 'eval_bleu': 13.9794, 'eval_gen_len': 23.6212, 'eval_runtime': 45.8284, 'eval_samples_per_second': 19.18, 'eval_steps_per_second': 1.2, 'epoch': 15.45}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.197345733642578, 'eval_bleu': 13.9508, 'eval_gen_len': 23.7827, 'eval_runtime': 69.0347, 'eval_samples_per_second': 12.733, 'eval_steps_per_second': 0.797, 'epoch': 15.47}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.201936960220337, 'eval_bleu': 14.1407, 'eval_gen_len': 23.7224, 'eval_runtime': 46.0661, 'eval_samples_per_second': 19.081, 'eval_steps_per_second': 1.194, 'epoch': 15.48}
{'loss': 1.3405, 'grad_norm': 1.1408647298812866, 'learning_rate': 0.0002533333333333334, 'epoch': 15.5}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.194835662841797, 'eval_bleu': 13.9248, 'eval_gen_len': 23.6143, 'eval_runtime': 45.8943, 'eval_samples_per_second': 19.153, 'eval_steps_per_second': 1.198, 'epoch': 15.5}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1887645721435547, 'eval_bleu': 14.0873, 'eval_gen_len': 23.719, 'eval_runtime': 45.151, 'eval_samples_per_second': 19.468, 'eval_steps_per_second': 1.218, 'epoch': 15.51}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1938319206237793, 'eval_bleu': 13.6027, 'eval_gen_len': 23.6815, 'eval_runtime': 45.7446, 'eval_samples_per_second': 19.215, 'eval_steps_per_second': 1.202, 'epoch': 15.52}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2048935890197754, 'eval_bleu': 13.9586, 'eval_gen_len': 23.7611, 'eval_runtime': 45.4862, 'eval_samples_per_second': 19.325, 'eval_steps_per_second': 1.209, 'epoch': 15.54}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.184265375137329, 'eval_bleu': 14.1044, 'eval_gen_len': 23.6576, 'eval_runtime': 46.0622, 'eval_samples_per_second': 19.083, 'eval_steps_per_second': 1.194, 'epoch': 15.55}
{'loss': 1.3485, 'grad_norm': 1.1791675090789795, 'learning_rate': 0.00025, 'epoch': 15.57}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.197845220565796, 'eval_bleu': 13.5365, 'eval_gen_len': 23.7122, 'eval_runtime': 48.7933, 'eval_samples_per_second': 18.015, 'eval_steps_per_second': 1.127, 'epoch': 15.57}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.192908763885498, 'eval_bleu': 14.134, 'eval_gen_len': 23.6906, 'eval_runtime': 45.2132, 'eval_samples_per_second': 19.441, 'eval_steps_per_second': 1.216, 'epoch': 15.58}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.188995599746704, 'eval_bleu': 14.0928, 'eval_gen_len': 23.6007, 'eval_runtime': 66.3854, 'eval_samples_per_second': 13.241, 'eval_steps_per_second': 0.828, 'epoch': 15.59}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1995251178741455, 'eval_bleu': 14.0967, 'eval_gen_len': 23.7452, 'eval_runtime': 53.7885, 'eval_samples_per_second': 16.342, 'eval_steps_per_second': 1.023, 'epoch': 15.61}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1993486881256104, 'eval_bleu': 14.3046, 'eval_gen_len': 23.7065, 'eval_runtime': 47.89, 'eval_samples_per_second': 18.355, 'eval_steps_per_second': 1.148, 'epoch': 15.62}
{'loss': 1.3509, 'grad_norm': 1.6368948221206665, 'learning_rate': 0.0002466666666666667, 'epoch': 15.63}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1924965381622314, 'eval_bleu': 14.3061, 'eval_gen_len': 23.6542, 'eval_runtime': 46.1037, 'eval_samples_per_second': 19.066, 'eval_steps_per_second': 1.193, 'epoch': 15.63}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1931886672973633, 'eval_bleu': 14.1631, 'eval_gen_len': 23.7668, 'eval_runtime': 64.0105, 'eval_samples_per_second': 13.732, 'eval_steps_per_second': 0.859, 'epoch': 15.65}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.185272693634033, 'eval_bleu': 14.0242, 'eval_gen_len': 23.7816, 'eval_runtime': 63.3559, 'eval_samples_per_second': 13.874, 'eval_steps_per_second': 0.868, 'epoch': 15.66}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1834511756896973, 'eval_bleu': 14.2925, 'eval_gen_len': 23.6724, 'eval_runtime': 46.1094, 'eval_samples_per_second': 19.063, 'eval_steps_per_second': 1.193, 'epoch': 15.68}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.182466745376587, 'eval_bleu': 13.9679, 'eval_gen_len': 23.7031, 'eval_runtime': 45.9856, 'eval_samples_per_second': 19.115, 'eval_steps_per_second': 1.196, 'epoch': 15.69}
{'loss': 1.3544, 'grad_norm': 1.1690704822540283, 'learning_rate': 0.00024333333333333336, 'epoch': 15.7}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1986031532287598, 'eval_bleu': 14.3072, 'eval_gen_len': 23.562, 'eval_runtime': 61.177, 'eval_samples_per_second': 14.368, 'eval_steps_per_second': 0.899, 'epoch': 15.7}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1825079917907715, 'eval_bleu': 14.3415, 'eval_gen_len': 23.6758, 'eval_runtime': 48.4121, 'eval_samples_per_second': 18.157, 'eval_steps_per_second': 1.136, 'epoch': 15.72}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1953678131103516, 'eval_bleu': 14.2117, 'eval_gen_len': 23.6928, 'eval_runtime': 70.3732, 'eval_samples_per_second': 12.491, 'eval_steps_per_second': 0.782, 'epoch': 15.73}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.185054063796997, 'eval_bleu': 14.5619, 'eval_gen_len': 23.7019, 'eval_runtime': 48.8494, 'eval_samples_per_second': 17.994, 'eval_steps_per_second': 1.126, 'epoch': 15.75}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.187377452850342, 'eval_bleu': 14.2918, 'eval_gen_len': 23.6428, 'eval_runtime': 45.8535, 'eval_samples_per_second': 19.17, 'eval_steps_per_second': 1.199, 'epoch': 15.76}
{'loss': 1.3561, 'grad_norm': 1.423276424407959, 'learning_rate': 0.00024, 'epoch': 15.77}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.172391414642334, 'eval_bleu': 14.4199, 'eval_gen_len': 23.7315, 'eval_runtime': 53.5768, 'eval_samples_per_second': 16.406, 'eval_steps_per_second': 1.027, 'epoch': 15.77}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1904518604278564, 'eval_bleu': 14.181, 'eval_gen_len': 23.6303, 'eval_runtime': 45.3277, 'eval_samples_per_second': 19.392, 'eval_steps_per_second': 1.213, 'epoch': 15.79}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1842174530029297, 'eval_bleu': 14.4201, 'eval_gen_len': 23.6906, 'eval_runtime': 52.0503, 'eval_samples_per_second': 16.888, 'eval_steps_per_second': 1.057, 'epoch': 15.8}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.179689407348633, 'eval_bleu': 14.2713, 'eval_gen_len': 23.7679, 'eval_runtime': 47.546, 'eval_samples_per_second': 18.487, 'eval_steps_per_second': 1.157, 'epoch': 15.81}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1937804222106934, 'eval_bleu': 14.3011, 'eval_gen_len': 23.7736, 'eval_runtime': 44.5386, 'eval_samples_per_second': 19.736, 'eval_steps_per_second': 1.235, 'epoch': 15.83}
{'loss': 1.3551, 'grad_norm': 0.9963029026985168, 'learning_rate': 0.00023666666666666668, 'epoch': 15.84}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1921911239624023, 'eval_bleu': 14.0062, 'eval_gen_len': 23.8225, 'eval_runtime': 51.553, 'eval_samples_per_second': 17.05, 'eval_steps_per_second': 1.067, 'epoch': 15.84}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1887974739074707, 'eval_bleu': 14.3995, 'eval_gen_len': 23.7565, 'eval_runtime': 48.8683, 'eval_samples_per_second': 17.987, 'eval_steps_per_second': 1.125, 'epoch': 15.86}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.191575527191162, 'eval_bleu': 14.0733, 'eval_gen_len': 23.6962, 'eval_runtime': 55.1952, 'eval_samples_per_second': 15.925, 'eval_steps_per_second': 0.996, 'epoch': 15.87}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1848082542419434, 'eval_bleu': 13.9959, 'eval_gen_len': 23.7144, 'eval_runtime': 70.646, 'eval_samples_per_second': 12.442, 'eval_steps_per_second': 0.779, 'epoch': 15.88}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1928789615631104, 'eval_bleu': 13.7328, 'eval_gen_len': 23.6246, 'eval_runtime': 56.0497, 'eval_samples_per_second': 15.683, 'eval_steps_per_second': 0.981, 'epoch': 15.9}
{'loss': 1.3625, 'grad_norm': 1.2312533855438232, 'learning_rate': 0.00023333333333333333, 'epoch': 15.91}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.18788480758667, 'eval_bleu': 13.5061, 'eval_gen_len': 23.8623, 'eval_runtime': 46.1324, 'eval_samples_per_second': 19.054, 'eval_steps_per_second': 1.192, 'epoch': 15.91}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1728568077087402, 'eval_bleu': 13.7824, 'eval_gen_len': 23.752, 'eval_runtime': 47.5612, 'eval_samples_per_second': 18.481, 'eval_steps_per_second': 1.156, 'epoch': 15.93}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1865720748901367, 'eval_bleu': 13.7678, 'eval_gen_len': 23.7622, 'eval_runtime': 45.6932, 'eval_samples_per_second': 19.237, 'eval_steps_per_second': 1.204, 'epoch': 15.94}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.18571400642395, 'eval_bleu': 14.2462, 'eval_gen_len': 23.7588, 'eval_runtime': 46.723, 'eval_samples_per_second': 18.813, 'eval_steps_per_second': 1.177, 'epoch': 15.95}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.186859607696533, 'eval_bleu': 14.3915, 'eval_gen_len': 23.7656, 'eval_runtime': 45.3436, 'eval_samples_per_second': 19.385, 'eval_steps_per_second': 1.213, 'epoch': 15.97}
{'loss': 1.3654, 'grad_norm': 1.7896291017532349, 'learning_rate': 0.00023, 'epoch': 15.98}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.185875654220581, 'eval_bleu': 14.4988, 'eval_gen_len': 23.7008, 'eval_runtime': 46.7093, 'eval_samples_per_second': 18.819, 'eval_steps_per_second': 1.177, 'epoch': 15.98}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.184086561203003, 'eval_bleu': 14.2111, 'eval_gen_len': 23.7156, 'eval_runtime': 46.0597, 'eval_samples_per_second': 19.084, 'eval_steps_per_second': 1.194, 'epoch': 15.99}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.22163462638855, 'eval_bleu': 14.1217, 'eval_gen_len': 23.843, 'eval_runtime': 46.225, 'eval_samples_per_second': 19.016, 'eval_steps_per_second': 1.19, 'epoch': 16.01}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.217221975326538, 'eval_bleu': 13.9681, 'eval_gen_len': 23.6997, 'eval_runtime': 44.9597, 'eval_samples_per_second': 19.551, 'eval_steps_per_second': 1.223, 'epoch': 16.02}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2157843112945557, 'eval_bleu': 13.9578, 'eval_gen_len': 23.8134, 'eval_runtime': 45.0492, 'eval_samples_per_second': 19.512, 'eval_steps_per_second': 1.221, 'epoch': 16.04}
{'loss': 1.288, 'grad_norm': 1.3318716287612915, 'learning_rate': 0.00022666666666666666, 'epoch': 16.05}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.238826274871826, 'eval_bleu': 13.7055, 'eval_gen_len': 23.6553, 'eval_runtime': 45.0118, 'eval_samples_per_second': 19.528, 'eval_steps_per_second': 1.222, 'epoch': 16.05}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.22501802444458, 'eval_bleu': 13.8252, 'eval_gen_len': 23.7338, 'eval_runtime': 43.6438, 'eval_samples_per_second': 20.14, 'eval_steps_per_second': 1.26, 'epoch': 16.06}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2202932834625244, 'eval_bleu': 13.7129, 'eval_gen_len': 23.7975, 'eval_runtime': 47.4889, 'eval_samples_per_second': 18.51, 'eval_steps_per_second': 1.158, 'epoch': 16.08}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2149405479431152, 'eval_bleu': 13.7323, 'eval_gen_len': 23.7463, 'eval_runtime': 46.2186, 'eval_samples_per_second': 19.018, 'eval_steps_per_second': 1.19, 'epoch': 16.09}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.224248170852661, 'eval_bleu': 13.9789, 'eval_gen_len': 23.6769, 'eval_runtime': 45.8819, 'eval_samples_per_second': 19.158, 'eval_steps_per_second': 1.199, 'epoch': 16.11}
{'loss': 1.262, 'grad_norm': 1.35589599609375, 'learning_rate': 0.00022333333333333333, 'epoch': 16.12}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.228503704071045, 'eval_bleu': 13.7408, 'eval_gen_len': 23.7281, 'eval_runtime': 44.7731, 'eval_samples_per_second': 19.632, 'eval_steps_per_second': 1.228, 'epoch': 16.12}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.236905574798584, 'eval_bleu': 13.6488, 'eval_gen_len': 23.8191, 'eval_runtime': 45.5811, 'eval_samples_per_second': 19.284, 'eval_steps_per_second': 1.207, 'epoch': 16.13}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2273385524749756, 'eval_bleu': 13.5047, 'eval_gen_len': 23.8168, 'eval_runtime': 57.0885, 'eval_samples_per_second': 15.397, 'eval_steps_per_second': 0.963, 'epoch': 16.15}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.226310968399048, 'eval_bleu': 13.6477, 'eval_gen_len': 23.7668, 'eval_runtime': 47.9375, 'eval_samples_per_second': 18.336, 'eval_steps_per_second': 1.147, 'epoch': 16.16}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2229747772216797, 'eval_bleu': 13.6185, 'eval_gen_len': 23.8146, 'eval_runtime': 46.5685, 'eval_samples_per_second': 18.875, 'eval_steps_per_second': 1.181, 'epoch': 16.17}
{'loss': 1.2744, 'grad_norm': 1.100748062133789, 'learning_rate': 0.00022, 'epoch': 16.19}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.219143867492676, 'eval_bleu': 13.8488, 'eval_gen_len': 23.7463, 'eval_runtime': 46.1991, 'eval_samples_per_second': 19.026, 'eval_steps_per_second': 1.191, 'epoch': 16.19}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.227013111114502, 'eval_bleu': 13.8578, 'eval_gen_len': 23.7645, 'eval_runtime': 46.3318, 'eval_samples_per_second': 18.972, 'eval_steps_per_second': 1.187, 'epoch': 16.2}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2240984439849854, 'eval_bleu': 14.0232, 'eval_gen_len': 23.7941, 'eval_runtime': 46.0361, 'eval_samples_per_second': 19.094, 'eval_steps_per_second': 1.195, 'epoch': 16.22}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.224982261657715, 'eval_bleu': 14.0144, 'eval_gen_len': 23.7179, 'eval_runtime': 46.0003, 'eval_samples_per_second': 19.109, 'eval_steps_per_second': 1.196, 'epoch': 16.23}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2252891063690186, 'eval_bleu': 14.0585, 'eval_gen_len': 23.7907, 'eval_runtime': 45.9615, 'eval_samples_per_second': 19.125, 'eval_steps_per_second': 1.197, 'epoch': 16.24}
{'loss': 1.2794, 'grad_norm': 1.2409814596176147, 'learning_rate': 0.00021666666666666668, 'epoch': 16.26}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2257652282714844, 'eval_bleu': 14.3394, 'eval_gen_len': 23.6223, 'eval_runtime': 44.7981, 'eval_samples_per_second': 19.621, 'eval_steps_per_second': 1.228, 'epoch': 16.26}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2249836921691895, 'eval_bleu': 14.4132, 'eval_gen_len': 23.7725, 'eval_runtime': 56.0661, 'eval_samples_per_second': 15.678, 'eval_steps_per_second': 0.981, 'epoch': 16.27}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2213501930236816, 'eval_bleu': 13.9528, 'eval_gen_len': 23.727, 'eval_runtime': 50.3376, 'eval_samples_per_second': 17.462, 'eval_steps_per_second': 1.093, 'epoch': 16.29}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.225944995880127, 'eval_bleu': 14.2389, 'eval_gen_len': 23.6837, 'eval_runtime': 45.0977, 'eval_samples_per_second': 19.491, 'eval_steps_per_second': 1.22, 'epoch': 16.3}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.22467303276062, 'eval_bleu': 14.0447, 'eval_gen_len': 23.7349, 'eval_runtime': 45.3738, 'eval_samples_per_second': 19.372, 'eval_steps_per_second': 1.212, 'epoch': 16.31}
{'loss': 1.281, 'grad_norm': 1.3122395277023315, 'learning_rate': 0.00021333333333333336, 'epoch': 16.33}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.222119092941284, 'eval_bleu': 14.0766, 'eval_gen_len': 23.6883, 'eval_runtime': 45.1367, 'eval_samples_per_second': 19.474, 'eval_steps_per_second': 1.219, 'epoch': 16.33}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2146835327148438, 'eval_bleu': 13.8875, 'eval_gen_len': 23.7702, 'eval_runtime': 45.9973, 'eval_samples_per_second': 19.11, 'eval_steps_per_second': 1.196, 'epoch': 16.34}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2240567207336426, 'eval_bleu': 13.8874, 'eval_gen_len': 23.6815, 'eval_runtime': 45.7558, 'eval_samples_per_second': 19.211, 'eval_steps_per_second': 1.202, 'epoch': 16.35}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2188687324523926, 'eval_bleu': 14.2356, 'eval_gen_len': 23.8009, 'eval_runtime': 45.8643, 'eval_samples_per_second': 19.165, 'eval_steps_per_second': 1.199, 'epoch': 16.37}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.214662551879883, 'eval_bleu': 13.9424, 'eval_gen_len': 23.8441, 'eval_runtime': 46.337, 'eval_samples_per_second': 18.97, 'eval_steps_per_second': 1.187, 'epoch': 16.38}
{'loss': 1.2871, 'grad_norm': 1.2565726041793823, 'learning_rate': 0.00021, 'epoch': 16.4}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2152099609375, 'eval_bleu': 13.9761, 'eval_gen_len': 23.8373, 'eval_runtime': 45.7828, 'eval_samples_per_second': 19.199, 'eval_steps_per_second': 1.201, 'epoch': 16.4}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.228475332260132, 'eval_bleu': 14.1132, 'eval_gen_len': 23.6837, 'eval_runtime': 48.7025, 'eval_samples_per_second': 18.048, 'eval_steps_per_second': 1.129, 'epoch': 16.41}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.229330062866211, 'eval_bleu': 14.1926, 'eval_gen_len': 23.8214, 'eval_runtime': 45.9717, 'eval_samples_per_second': 19.12, 'eval_steps_per_second': 1.196, 'epoch': 16.42}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.218946695327759, 'eval_bleu': 14.2899, 'eval_gen_len': 23.686, 'eval_runtime': 47.149, 'eval_samples_per_second': 18.643, 'eval_steps_per_second': 1.167, 'epoch': 16.44}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.214289426803589, 'eval_bleu': 14.2835, 'eval_gen_len': 23.8635, 'eval_runtime': 46.6348, 'eval_samples_per_second': 18.849, 'eval_steps_per_second': 1.179, 'epoch': 16.45}
{'loss': 1.2887, 'grad_norm': 1.1656370162963867, 'learning_rate': 0.00020666666666666666, 'epoch': 16.46}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2116236686706543, 'eval_bleu': 14.2561, 'eval_gen_len': 23.7088, 'eval_runtime': 50.437, 'eval_samples_per_second': 17.428, 'eval_steps_per_second': 1.09, 'epoch': 16.46}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2187628746032715, 'eval_bleu': 14.3247, 'eval_gen_len': 23.7736, 'eval_runtime': 53.9897, 'eval_samples_per_second': 16.281, 'eval_steps_per_second': 1.019, 'epoch': 16.48}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.219489574432373, 'eval_bleu': 14.2979, 'eval_gen_len': 23.8714, 'eval_runtime': 47.2462, 'eval_samples_per_second': 18.605, 'eval_steps_per_second': 1.164, 'epoch': 16.49}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.210146903991699, 'eval_bleu': 14.1131, 'eval_gen_len': 23.8453, 'eval_runtime': 59.6817, 'eval_samples_per_second': 14.728, 'eval_steps_per_second': 0.922, 'epoch': 16.51}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.213808059692383, 'eval_bleu': 14.0949, 'eval_gen_len': 23.7747, 'eval_runtime': 89.516, 'eval_samples_per_second': 9.819, 'eval_steps_per_second': 0.614, 'epoch': 16.52}
{'loss': 1.2965, 'grad_norm': 1.023830533027649, 'learning_rate': 0.00020333333333333333, 'epoch': 16.53}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.215876817703247, 'eval_bleu': 14.3179, 'eval_gen_len': 23.5836, 'eval_runtime': 86.8853, 'eval_samples_per_second': 10.117, 'eval_steps_per_second': 0.633, 'epoch': 16.53}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.208712577819824, 'eval_bleu': 14.0014, 'eval_gen_len': 23.727, 'eval_runtime': 86.949, 'eval_samples_per_second': 10.109, 'eval_steps_per_second': 0.633, 'epoch': 16.55}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2127716541290283, 'eval_bleu': 14.263, 'eval_gen_len': 23.6712, 'eval_runtime': 89.0615, 'eval_samples_per_second': 9.87, 'eval_steps_per_second': 0.618, 'epoch': 16.56}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.211453676223755, 'eval_bleu': 14.1344, 'eval_gen_len': 23.7884, 'eval_runtime': 90.4719, 'eval_samples_per_second': 9.716, 'eval_steps_per_second': 0.608, 'epoch': 16.58}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.198366403579712, 'eval_bleu': 14.4118, 'eval_gen_len': 23.901, 'eval_runtime': 87.9338, 'eval_samples_per_second': 9.996, 'eval_steps_per_second': 0.625, 'epoch': 16.59}
{'loss': 1.2941, 'grad_norm': 1.0214402675628662, 'learning_rate': 0.0002, 'epoch': 16.6}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.205075740814209, 'eval_bleu': 14.2138, 'eval_gen_len': 23.62, 'eval_runtime': 89.3457, 'eval_samples_per_second': 9.838, 'eval_steps_per_second': 0.616, 'epoch': 16.6}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2026991844177246, 'eval_bleu': 14.2285, 'eval_gen_len': 23.8441, 'eval_runtime': 89.4814, 'eval_samples_per_second': 9.823, 'eval_steps_per_second': 0.615, 'epoch': 16.62}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2067296504974365, 'eval_bleu': 14.1999, 'eval_gen_len': 23.6826, 'eval_runtime': 89.3989, 'eval_samples_per_second': 9.832, 'eval_steps_per_second': 0.615, 'epoch': 16.63}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2049825191497803, 'eval_bleu': 14.2002, 'eval_gen_len': 23.6371, 'eval_runtime': 90.2804, 'eval_samples_per_second': 9.736, 'eval_steps_per_second': 0.609, 'epoch': 16.64}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.214446544647217, 'eval_bleu': 14.5109, 'eval_gen_len': 23.7019, 'eval_runtime': 88.7054, 'eval_samples_per_second': 9.909, 'eval_steps_per_second': 0.62, 'epoch': 16.66}
{'loss': 1.3077, 'grad_norm': 1.3727006912231445, 'learning_rate': 0.00019666666666666666, 'epoch': 16.67}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.211527109146118, 'eval_bleu': 14.6747, 'eval_gen_len': 23.7065, 'eval_runtime': 89.5052, 'eval_samples_per_second': 9.821, 'eval_steps_per_second': 0.614, 'epoch': 16.67}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2060706615448, 'eval_bleu': 14.2708, 'eval_gen_len': 23.8646, 'eval_runtime': 88.7519, 'eval_samples_per_second': 9.904, 'eval_steps_per_second': 0.62, 'epoch': 16.69}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.200268268585205, 'eval_bleu': 14.338, 'eval_gen_len': 23.818, 'eval_runtime': 89.3527, 'eval_samples_per_second': 9.837, 'eval_steps_per_second': 0.616, 'epoch': 16.7}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.20234751701355, 'eval_bleu': 14.4985, 'eval_gen_len': 23.7816, 'eval_runtime': 89.6343, 'eval_samples_per_second': 9.807, 'eval_steps_per_second': 0.614, 'epoch': 16.71}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.202035903930664, 'eval_bleu': 14.4747, 'eval_gen_len': 23.7008, 'eval_runtime': 90.4184, 'eval_samples_per_second': 9.721, 'eval_steps_per_second': 0.608, 'epoch': 16.73}
{'loss': 1.3053, 'grad_norm': 1.2503222227096558, 'learning_rate': 0.00019333333333333333, 'epoch': 16.74}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.204627513885498, 'eval_bleu': 14.0977, 'eval_gen_len': 23.6974, 'eval_runtime': 86.2329, 'eval_samples_per_second': 10.193, 'eval_steps_per_second': 0.638, 'epoch': 16.74}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.204308271408081, 'eval_bleu': 14.4012, 'eval_gen_len': 23.7395, 'eval_runtime': 79.7725, 'eval_samples_per_second': 11.019, 'eval_steps_per_second': 0.689, 'epoch': 16.76}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.202133893966675, 'eval_bleu': 14.3324, 'eval_gen_len': 23.8567, 'eval_runtime': 46.0921, 'eval_samples_per_second': 19.071, 'eval_steps_per_second': 1.193, 'epoch': 16.77}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2113397121429443, 'eval_bleu': 14.0767, 'eval_gen_len': 23.7702, 'eval_runtime': 45.4366, 'eval_samples_per_second': 19.346, 'eval_steps_per_second': 1.21, 'epoch': 16.78}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.205991268157959, 'eval_bleu': 14.3965, 'eval_gen_len': 23.8373, 'eval_runtime': 45.7018, 'eval_samples_per_second': 19.233, 'eval_steps_per_second': 1.203, 'epoch': 16.8}
{'loss': 1.3061, 'grad_norm': 1.4390251636505127, 'learning_rate': 0.00019, 'epoch': 16.81}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2172722816467285, 'eval_bleu': 14.1278, 'eval_gen_len': 23.7668, 'eval_runtime': 46.7696, 'eval_samples_per_second': 18.794, 'eval_steps_per_second': 1.176, 'epoch': 16.81}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.198291778564453, 'eval_bleu': 14.122, 'eval_gen_len': 23.8237, 'eval_runtime': 46.5281, 'eval_samples_per_second': 18.892, 'eval_steps_per_second': 1.182, 'epoch': 16.82}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2012131214141846, 'eval_bleu': 13.8711, 'eval_gen_len': 23.7645, 'eval_runtime': 46.59, 'eval_samples_per_second': 18.867, 'eval_steps_per_second': 1.181, 'epoch': 16.84}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.213963270187378, 'eval_bleu': 14.3351, 'eval_gen_len': 23.7964, 'eval_runtime': 45.7952, 'eval_samples_per_second': 19.194, 'eval_steps_per_second': 1.201, 'epoch': 16.85}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1995327472686768, 'eval_bleu': 14.3179, 'eval_gen_len': 23.7907, 'eval_runtime': 56.5015, 'eval_samples_per_second': 15.557, 'eval_steps_per_second': 0.973, 'epoch': 16.87}
{'loss': 1.3083, 'grad_norm': 1.0966293811798096, 'learning_rate': 0.0001866666666666667, 'epoch': 16.88}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.203352451324463, 'eval_bleu': 13.9727, 'eval_gen_len': 23.8669, 'eval_runtime': 46.2741, 'eval_samples_per_second': 18.996, 'eval_steps_per_second': 1.189, 'epoch': 16.88}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2014544010162354, 'eval_bleu': 14.0164, 'eval_gen_len': 23.8828, 'eval_runtime': 46.9367, 'eval_samples_per_second': 18.727, 'eval_steps_per_second': 1.172, 'epoch': 16.89}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2067949771881104, 'eval_bleu': 14.0714, 'eval_gen_len': 23.7065, 'eval_runtime': 47.4545, 'eval_samples_per_second': 18.523, 'eval_steps_per_second': 1.159, 'epoch': 16.91}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1998789310455322, 'eval_bleu': 14.3814, 'eval_gen_len': 23.6428, 'eval_runtime': 46.0639, 'eval_samples_per_second': 19.082, 'eval_steps_per_second': 1.194, 'epoch': 16.92}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.1949310302734375, 'eval_bleu': 14.0733, 'eval_gen_len': 23.8407, 'eval_runtime': 46.4599, 'eval_samples_per_second': 18.92, 'eval_steps_per_second': 1.184, 'epoch': 16.94}
{'loss': 1.3076, 'grad_norm': 1.2974427938461304, 'learning_rate': 0.00018333333333333334, 'epoch': 16.95}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.201995372772217, 'eval_bleu': 14.0775, 'eval_gen_len': 23.5984, 'eval_runtime': 45.4915, 'eval_samples_per_second': 19.322, 'eval_steps_per_second': 1.209, 'epoch': 16.95}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2065465450286865, 'eval_bleu': 14.2319, 'eval_gen_len': 23.7383, 'eval_runtime': 47.1791, 'eval_samples_per_second': 18.631, 'eval_steps_per_second': 1.166, 'epoch': 16.96}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.201355457305908, 'eval_bleu': 14.2056, 'eval_gen_len': 23.8191, 'eval_runtime': 46.2034, 'eval_samples_per_second': 19.025, 'eval_steps_per_second': 1.19, 'epoch': 16.98}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.203303337097168, 'eval_bleu': 14.1071, 'eval_gen_len': 23.7281, 'eval_runtime': 44.6168, 'eval_samples_per_second': 19.701, 'eval_steps_per_second': 1.233, 'epoch': 16.99}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.220937728881836, 'eval_bleu': 14.2311, 'eval_gen_len': 23.7133, 'eval_runtime': 44.718, 'eval_samples_per_second': 19.657, 'eval_steps_per_second': 1.23, 'epoch': 17.0}
{'loss': 1.2869, 'grad_norm': 1.2708065509796143, 'learning_rate': 0.00017999999999999998, 'epoch': 17.02}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2320168018341064, 'eval_bleu': 14.3686, 'eval_gen_len': 23.6485, 'eval_runtime': 44.8753, 'eval_samples_per_second': 19.588, 'eval_steps_per_second': 1.226, 'epoch': 17.02}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2321462631225586, 'eval_bleu': 14.028, 'eval_gen_len': 23.6689, 'eval_runtime': 43.7887, 'eval_samples_per_second': 20.074, 'eval_steps_per_second': 1.256, 'epoch': 17.03}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.225487232208252, 'eval_bleu': 13.8098, 'eval_gen_len': 23.7429, 'eval_runtime': 45.5314, 'eval_samples_per_second': 19.305, 'eval_steps_per_second': 1.208, 'epoch': 17.05}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.238198757171631, 'eval_bleu': 13.9396, 'eval_gen_len': 23.7247, 'eval_runtime': 45.1885, 'eval_samples_per_second': 19.452, 'eval_steps_per_second': 1.217, 'epoch': 17.06}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2358293533325195, 'eval_bleu': 13.993, 'eval_gen_len': 23.8214, 'eval_runtime': 45.0547, 'eval_samples_per_second': 19.51, 'eval_steps_per_second': 1.221, 'epoch': 17.07}
{'loss': 1.2167, 'grad_norm': 1.4250037670135498, 'learning_rate': 0.00017666666666666666, 'epoch': 17.09}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2312440872192383, 'eval_bleu': 14.2867, 'eval_gen_len': 23.694, 'eval_runtime': 46.0171, 'eval_samples_per_second': 19.102, 'eval_steps_per_second': 1.195, 'epoch': 17.09}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2296504974365234, 'eval_bleu': 13.9683, 'eval_gen_len': 23.8111, 'eval_runtime': 47.234, 'eval_samples_per_second': 18.609, 'eval_steps_per_second': 1.164, 'epoch': 17.1}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.240645408630371, 'eval_bleu': 13.9332, 'eval_gen_len': 23.6769, 'eval_runtime': 44.064, 'eval_samples_per_second': 19.948, 'eval_steps_per_second': 1.248, 'epoch': 17.12}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2339749336242676, 'eval_bleu': 14.1834, 'eval_gen_len': 23.7725, 'eval_runtime': 44.5593, 'eval_samples_per_second': 19.727, 'eval_steps_per_second': 1.234, 'epoch': 17.13}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.238558292388916, 'eval_bleu': 13.9177, 'eval_gen_len': 23.6109, 'eval_runtime': 46.1533, 'eval_samples_per_second': 19.045, 'eval_steps_per_second': 1.192, 'epoch': 17.14}
{'loss': 1.2318, 'grad_norm': 1.1133133172988892, 'learning_rate': 0.00017333333333333334, 'epoch': 17.16}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2345259189605713, 'eval_bleu': 13.9862, 'eval_gen_len': 23.6473, 'eval_runtime': 45.722, 'eval_samples_per_second': 19.225, 'eval_steps_per_second': 1.203, 'epoch': 17.16}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2238714694976807, 'eval_bleu': 13.9423, 'eval_gen_len': 23.6143, 'eval_runtime': 46.1597, 'eval_samples_per_second': 19.043, 'eval_steps_per_second': 1.192, 'epoch': 17.17}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.234543561935425, 'eval_bleu': 14.0917, 'eval_gen_len': 23.661, 'eval_runtime': 47.2936, 'eval_samples_per_second': 18.586, 'eval_steps_per_second': 1.163, 'epoch': 17.18}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2451517581939697, 'eval_bleu': 14.1795, 'eval_gen_len': 23.6382, 'eval_runtime': 46.6016, 'eval_samples_per_second': 18.862, 'eval_steps_per_second': 1.18, 'epoch': 17.2}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2268478870391846, 'eval_bleu': 14.2306, 'eval_gen_len': 23.8077, 'eval_runtime': 45.604, 'eval_samples_per_second': 19.275, 'eval_steps_per_second': 1.206, 'epoch': 17.21}
{'loss': 1.2328, 'grad_norm': 1.1534358263015747, 'learning_rate': 0.00017, 'epoch': 17.23}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.232999801635742, 'eval_bleu': 14.3045, 'eval_gen_len': 23.5597, 'eval_runtime': 43.3995, 'eval_samples_per_second': 20.254, 'eval_steps_per_second': 1.267, 'epoch': 17.23}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2330102920532227, 'eval_bleu': 13.9096, 'eval_gen_len': 23.6701, 'eval_runtime': 46.7543, 'eval_samples_per_second': 18.8, 'eval_steps_per_second': 1.176, 'epoch': 17.24}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2342982292175293, 'eval_bleu': 14.1445, 'eval_gen_len': 23.6405, 'eval_runtime': 46.0186, 'eval_samples_per_second': 19.101, 'eval_steps_per_second': 1.195, 'epoch': 17.25}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2255806922912598, 'eval_bleu': 13.976, 'eval_gen_len': 23.6792, 'eval_runtime': 45.7141, 'eval_samples_per_second': 19.228, 'eval_steps_per_second': 1.203, 'epoch': 17.27}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2375030517578125, 'eval_bleu': 13.9631, 'eval_gen_len': 23.6473, 'eval_runtime': 46.7083, 'eval_samples_per_second': 18.819, 'eval_steps_per_second': 1.178, 'epoch': 17.28}
{'loss': 1.2342, 'grad_norm': 1.3398607969284058, 'learning_rate': 0.00016666666666666666, 'epoch': 17.3}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.231813430786133, 'eval_bleu': 14.0594, 'eval_gen_len': 23.7816, 'eval_runtime': 46.9469, 'eval_samples_per_second': 18.723, 'eval_steps_per_second': 1.172, 'epoch': 17.3}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.231102228164673, 'eval_bleu': 14.2565, 'eval_gen_len': 23.7998, 'eval_runtime': 47.2971, 'eval_samples_per_second': 18.585, 'eval_steps_per_second': 1.163, 'epoch': 17.31}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.239621162414551, 'eval_bleu': 14.2688, 'eval_gen_len': 23.6678, 'eval_runtime': 48.2511, 'eval_samples_per_second': 18.217, 'eval_steps_per_second': 1.14, 'epoch': 17.32}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.227409601211548, 'eval_bleu': 14.2907, 'eval_gen_len': 23.7201, 'eval_runtime': 46.3366, 'eval_samples_per_second': 18.97, 'eval_steps_per_second': 1.187, 'epoch': 17.34}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.230348587036133, 'eval_bleu': 14.2255, 'eval_gen_len': 23.7497, 'eval_runtime': 47.3494, 'eval_samples_per_second': 18.564, 'eval_steps_per_second': 1.162, 'epoch': 17.35}
{'loss': 1.2323, 'grad_norm': 1.4215351343154907, 'learning_rate': 0.00016333333333333334, 'epoch': 17.36}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.231065273284912, 'eval_bleu': 14.1701, 'eval_gen_len': 23.6462, 'eval_runtime': 47.0621, 'eval_samples_per_second': 18.677, 'eval_steps_per_second': 1.169, 'epoch': 17.36}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.229072093963623, 'eval_bleu': 13.9848, 'eval_gen_len': 23.8168, 'eval_runtime': 47.401, 'eval_samples_per_second': 18.544, 'eval_steps_per_second': 1.16, 'epoch': 17.38}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.228415012359619, 'eval_bleu': 14.1394, 'eval_gen_len': 23.7372, 'eval_runtime': 45.6925, 'eval_samples_per_second': 19.237, 'eval_steps_per_second': 1.204, 'epoch': 17.39}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2294163703918457, 'eval_bleu': 14.4865, 'eval_gen_len': 23.6928, 'eval_runtime': 45.1141, 'eval_samples_per_second': 19.484, 'eval_steps_per_second': 1.219, 'epoch': 17.41}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.232485294342041, 'eval_bleu': 14.2645, 'eval_gen_len': 23.7759, 'eval_runtime': 45.421, 'eval_samples_per_second': 19.352, 'eval_steps_per_second': 1.211, 'epoch': 17.42}
{'loss': 1.2463, 'grad_norm': 1.2555382251739502, 'learning_rate': 0.00016, 'epoch': 17.43}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2223761081695557, 'eval_bleu': 14.2373, 'eval_gen_len': 23.7406, 'eval_runtime': 50.1163, 'eval_samples_per_second': 17.539, 'eval_steps_per_second': 1.097, 'epoch': 17.43}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.230827808380127, 'eval_bleu': 14.0211, 'eval_gen_len': 23.7873, 'eval_runtime': 44.4883, 'eval_samples_per_second': 19.758, 'eval_steps_per_second': 1.236, 'epoch': 17.45}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2217094898223877, 'eval_bleu': 14.14, 'eval_gen_len': 23.7213, 'eval_runtime': 44.8466, 'eval_samples_per_second': 19.6, 'eval_steps_per_second': 1.226, 'epoch': 17.46}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2187342643737793, 'eval_bleu': 14.1263, 'eval_gen_len': 23.8487, 'eval_runtime': 44.8672, 'eval_samples_per_second': 19.591, 'eval_steps_per_second': 1.226, 'epoch': 17.47}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2265801429748535, 'eval_bleu': 14.224, 'eval_gen_len': 23.7884, 'eval_runtime': 43.8581, 'eval_samples_per_second': 20.042, 'eval_steps_per_second': 1.254, 'epoch': 17.49}
{'loss': 1.2525, 'grad_norm': 1.4832813739776611, 'learning_rate': 0.0001566666666666667, 'epoch': 17.5}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2265074253082275, 'eval_bleu': 14.1254, 'eval_gen_len': 23.6121, 'eval_runtime': 45.0189, 'eval_samples_per_second': 19.525, 'eval_steps_per_second': 1.222, 'epoch': 17.5}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2352545261383057, 'eval_bleu': 14.1723, 'eval_gen_len': 23.6451, 'eval_runtime': 44.8728, 'eval_samples_per_second': 19.589, 'eval_steps_per_second': 1.226, 'epoch': 17.52}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2252554893493652, 'eval_bleu': 14.1586, 'eval_gen_len': 23.7122, 'eval_runtime': 45.317, 'eval_samples_per_second': 19.397, 'eval_steps_per_second': 1.214, 'epoch': 17.53}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.229884147644043, 'eval_bleu': 14.4178, 'eval_gen_len': 23.7645, 'eval_runtime': 44.7095, 'eval_samples_per_second': 19.66, 'eval_steps_per_second': 1.23, 'epoch': 17.54}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2238011360168457, 'eval_bleu': 14.2839, 'eval_gen_len': 23.6837, 'eval_runtime': 47.9863, 'eval_samples_per_second': 18.318, 'eval_steps_per_second': 1.146, 'epoch': 17.56}
{'loss': 1.2484, 'grad_norm': 1.1895110607147217, 'learning_rate': 0.00015333333333333334, 'epoch': 17.57}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2297840118408203, 'eval_bleu': 14.2286, 'eval_gen_len': 23.7304, 'eval_runtime': 44.8599, 'eval_samples_per_second': 19.594, 'eval_steps_per_second': 1.226, 'epoch': 17.57}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.228410243988037, 'eval_bleu': 14.2882, 'eval_gen_len': 23.818, 'eval_runtime': 46.314, 'eval_samples_per_second': 18.979, 'eval_steps_per_second': 1.188, 'epoch': 17.59}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2246854305267334, 'eval_bleu': 14.5164, 'eval_gen_len': 23.6655, 'eval_runtime': 45.4724, 'eval_samples_per_second': 19.33, 'eval_steps_per_second': 1.21, 'epoch': 17.6}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2302513122558594, 'eval_bleu': 14.4832, 'eval_gen_len': 23.8726, 'eval_runtime': 48.5606, 'eval_samples_per_second': 18.101, 'eval_steps_per_second': 1.133, 'epoch': 17.61}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2341840267181396, 'eval_bleu': 14.7796, 'eval_gen_len': 23.6951, 'eval_runtime': 45.1628, 'eval_samples_per_second': 19.463, 'eval_steps_per_second': 1.218, 'epoch': 17.63}
{'loss': 1.2487, 'grad_norm': 1.1852450370788574, 'learning_rate': 0.00015, 'epoch': 17.64}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2263550758361816, 'eval_bleu': 14.7606, 'eval_gen_len': 23.6564, 'eval_runtime': 44.0969, 'eval_samples_per_second': 19.933, 'eval_steps_per_second': 1.247, 'epoch': 17.64}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.226470470428467, 'eval_bleu': 14.3951, 'eval_gen_len': 23.7144, 'eval_runtime': 45.4665, 'eval_samples_per_second': 19.333, 'eval_steps_per_second': 1.21, 'epoch': 17.65}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.222407579421997, 'eval_bleu': 14.3196, 'eval_gen_len': 23.6701, 'eval_runtime': 44.331, 'eval_samples_per_second': 19.828, 'eval_steps_per_second': 1.241, 'epoch': 17.67}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2306787967681885, 'eval_bleu': 14.3071, 'eval_gen_len': 23.7156, 'eval_runtime': 44.1136, 'eval_samples_per_second': 19.926, 'eval_steps_per_second': 1.247, 'epoch': 17.68}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2225794792175293, 'eval_bleu': 14.1526, 'eval_gen_len': 23.6667, 'eval_runtime': 45.253, 'eval_samples_per_second': 19.424, 'eval_steps_per_second': 1.215, 'epoch': 17.7}
{'loss': 1.2524, 'grad_norm': 1.1693742275238037, 'learning_rate': 0.00014666666666666666, 'epoch': 17.71}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.224231004714966, 'eval_bleu': 14.2534, 'eval_gen_len': 23.7656, 'eval_runtime': 43.8276, 'eval_samples_per_second': 20.056, 'eval_steps_per_second': 1.255, 'epoch': 17.71}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2189738750457764, 'eval_bleu': 14.3743, 'eval_gen_len': 23.8328, 'eval_runtime': 45.1044, 'eval_samples_per_second': 19.488, 'eval_steps_per_second': 1.219, 'epoch': 17.72}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2177650928497314, 'eval_bleu': 14.2859, 'eval_gen_len': 23.7691, 'eval_runtime': 44.5599, 'eval_samples_per_second': 19.726, 'eval_steps_per_second': 1.234, 'epoch': 17.74}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2260279655456543, 'eval_bleu': 14.0659, 'eval_gen_len': 23.7554, 'eval_runtime': 46.73, 'eval_samples_per_second': 18.81, 'eval_steps_per_second': 1.177, 'epoch': 17.75}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2191500663757324, 'eval_bleu': 14.4578, 'eval_gen_len': 23.6746, 'eval_runtime': 44.9459, 'eval_samples_per_second': 19.557, 'eval_steps_per_second': 1.224, 'epoch': 17.77}
{'loss': 1.2556, 'grad_norm': 1.428727388381958, 'learning_rate': 0.00014333333333333334, 'epoch': 17.78}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.216284990310669, 'eval_bleu': 14.5531, 'eval_gen_len': 23.7372, 'eval_runtime': 43.9576, 'eval_samples_per_second': 19.997, 'eval_steps_per_second': 1.251, 'epoch': 17.78}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.21897029876709, 'eval_bleu': 14.2326, 'eval_gen_len': 23.6837, 'eval_runtime': 45.1957, 'eval_samples_per_second': 19.449, 'eval_steps_per_second': 1.217, 'epoch': 17.79}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.220299482345581, 'eval_bleu': 14.262, 'eval_gen_len': 23.719, 'eval_runtime': 45.991, 'eval_samples_per_second': 19.112, 'eval_steps_per_second': 1.196, 'epoch': 17.81}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2184865474700928, 'eval_bleu': 13.9602, 'eval_gen_len': 23.8032, 'eval_runtime': 45.4578, 'eval_samples_per_second': 19.337, 'eval_steps_per_second': 1.21, 'epoch': 17.82}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.216989517211914, 'eval_bleu': 14.2484, 'eval_gen_len': 23.7691, 'eval_runtime': 44.3517, 'eval_samples_per_second': 19.819, 'eval_steps_per_second': 1.24, 'epoch': 17.83}
{'loss': 1.2584, 'grad_norm': 1.511881947517395, 'learning_rate': 0.00014000000000000001, 'epoch': 17.85}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2163426876068115, 'eval_bleu': 14.0544, 'eval_gen_len': 23.7838, 'eval_runtime': 44.7282, 'eval_samples_per_second': 19.652, 'eval_steps_per_second': 1.23, 'epoch': 17.85}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.223398208618164, 'eval_bleu': 14.1813, 'eval_gen_len': 23.6644, 'eval_runtime': 44.2828, 'eval_samples_per_second': 19.85, 'eval_steps_per_second': 1.242, 'epoch': 17.86}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.215461015701294, 'eval_bleu': 14.0397, 'eval_gen_len': 23.7838, 'eval_runtime': 44.5808, 'eval_samples_per_second': 19.717, 'eval_steps_per_second': 1.234, 'epoch': 17.88}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2106082439422607, 'eval_bleu': 13.9899, 'eval_gen_len': 23.7247, 'eval_runtime': 44.7549, 'eval_samples_per_second': 19.64, 'eval_steps_per_second': 1.229, 'epoch': 17.89}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2130191326141357, 'eval_bleu': 13.8273, 'eval_gen_len': 23.6451, 'eval_runtime': 45.1642, 'eval_samples_per_second': 19.462, 'eval_steps_per_second': 1.218, 'epoch': 17.9}
{'loss': 1.2618, 'grad_norm': 1.4178467988967896, 'learning_rate': 0.00013666666666666666, 'epoch': 17.92}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.214895725250244, 'eval_bleu': 13.9493, 'eval_gen_len': 23.6746, 'eval_runtime': 45.1366, 'eval_samples_per_second': 19.474, 'eval_steps_per_second': 1.219, 'epoch': 17.92}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.218660831451416, 'eval_bleu': 14.1837, 'eval_gen_len': 23.6371, 'eval_runtime': 45.556, 'eval_samples_per_second': 19.295, 'eval_steps_per_second': 1.207, 'epoch': 17.93}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.208360195159912, 'eval_bleu': 14.1416, 'eval_gen_len': 23.7247, 'eval_runtime': 44.3901, 'eval_samples_per_second': 19.802, 'eval_steps_per_second': 1.239, 'epoch': 17.95}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2136380672454834, 'eval_bleu': 14.0953, 'eval_gen_len': 23.6359, 'eval_runtime': 44.4097, 'eval_samples_per_second': 19.793, 'eval_steps_per_second': 1.238, 'epoch': 17.96}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2191691398620605, 'eval_bleu': 14.1091, 'eval_gen_len': 23.653, 'eval_runtime': 45.1157, 'eval_samples_per_second': 19.483, 'eval_steps_per_second': 1.219, 'epoch': 17.97}
{'loss': 1.2583, 'grad_norm': 1.1641099452972412, 'learning_rate': 0.00013333333333333334, 'epoch': 17.99}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.214920997619629, 'eval_bleu': 14.1921, 'eval_gen_len': 23.7076, 'eval_runtime': 44.58, 'eval_samples_per_second': 19.717, 'eval_steps_per_second': 1.234, 'epoch': 17.99}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2214319705963135, 'eval_bleu': 13.9183, 'eval_gen_len': 23.6359, 'eval_runtime': 44.077, 'eval_samples_per_second': 19.942, 'eval_steps_per_second': 1.248, 'epoch': 18.0}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.253998279571533, 'eval_bleu': 14.1722, 'eval_gen_len': 23.6337, 'eval_runtime': 44.38, 'eval_samples_per_second': 19.806, 'eval_steps_per_second': 1.239, 'epoch': 18.01}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2476110458374023, 'eval_bleu': 13.9864, 'eval_gen_len': 23.5813, 'eval_runtime': 44.1384, 'eval_samples_per_second': 19.915, 'eval_steps_per_second': 1.246, 'epoch': 18.03}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.249213933944702, 'eval_bleu': 13.9572, 'eval_gen_len': 23.6303, 'eval_runtime': 45.5599, 'eval_samples_per_second': 19.293, 'eval_steps_per_second': 1.207, 'epoch': 18.04}
{'loss': 1.1968, 'grad_norm': 1.4215691089630127, 'learning_rate': 0.00013000000000000002, 'epoch': 18.06}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.258521556854248, 'eval_bleu': 14.2571, 'eval_gen_len': 23.6928, 'eval_runtime': 44.1672, 'eval_samples_per_second': 19.902, 'eval_steps_per_second': 1.245, 'epoch': 18.06}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.254714250564575, 'eval_bleu': 14.4301, 'eval_gen_len': 23.7554, 'eval_runtime': 44.4808, 'eval_samples_per_second': 19.761, 'eval_steps_per_second': 1.236, 'epoch': 18.07}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2612180709838867, 'eval_bleu': 14.2942, 'eval_gen_len': 23.7622, 'eval_runtime': 44.0169, 'eval_samples_per_second': 19.97, 'eval_steps_per_second': 1.25, 'epoch': 18.08}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.253037452697754, 'eval_bleu': 14.1703, 'eval_gen_len': 23.7179, 'eval_runtime': 44.8055, 'eval_samples_per_second': 19.618, 'eval_steps_per_second': 1.228, 'epoch': 18.1}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.247699022293091, 'eval_bleu': 14.2467, 'eval_gen_len': 23.6871, 'eval_runtime': 47.5483, 'eval_samples_per_second': 18.486, 'eval_steps_per_second': 1.157, 'epoch': 18.11}
{'loss': 1.1863, 'grad_norm': 1.6991242170333862, 'learning_rate': 0.0001266666666666667, 'epoch': 18.13}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2447152137756348, 'eval_bleu': 14.1531, 'eval_gen_len': 23.6974, 'eval_runtime': 43.283, 'eval_samples_per_second': 20.308, 'eval_steps_per_second': 1.271, 'epoch': 18.13}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.249227523803711, 'eval_bleu': 14.0511, 'eval_gen_len': 23.7531, 'eval_runtime': 44.5468, 'eval_samples_per_second': 19.732, 'eval_steps_per_second': 1.235, 'epoch': 18.14}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.254709243774414, 'eval_bleu': 14.2067, 'eval_gen_len': 23.6883, 'eval_runtime': 63.1847, 'eval_samples_per_second': 13.912, 'eval_steps_per_second': 0.87, 'epoch': 18.15}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2444980144500732, 'eval_bleu': 13.9912, 'eval_gen_len': 23.7008, 'eval_runtime': 62.6963, 'eval_samples_per_second': 14.02, 'eval_steps_per_second': 0.877, 'epoch': 18.17}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2464373111724854, 'eval_bleu': 14.3137, 'eval_gen_len': 23.6519, 'eval_runtime': 46.4246, 'eval_samples_per_second': 18.934, 'eval_steps_per_second': 1.185, 'epoch': 18.18}
{'loss': 1.1904, 'grad_norm': 1.2735389471054077, 'learning_rate': 0.00012333333333333334, 'epoch': 18.19}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.248575210571289, 'eval_bleu': 13.9961, 'eval_gen_len': 23.7088, 'eval_runtime': 48.0983, 'eval_samples_per_second': 18.275, 'eval_steps_per_second': 1.143, 'epoch': 18.19}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2530229091644287, 'eval_bleu': 14.2158, 'eval_gen_len': 23.7827, 'eval_runtime': 64.2285, 'eval_samples_per_second': 13.686, 'eval_steps_per_second': 0.856, 'epoch': 18.21}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.241499662399292, 'eval_bleu': 14.035, 'eval_gen_len': 23.7156, 'eval_runtime': 45.5529, 'eval_samples_per_second': 19.296, 'eval_steps_per_second': 1.207, 'epoch': 18.22}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.250624656677246, 'eval_bleu': 14.0131, 'eval_gen_len': 23.7418, 'eval_runtime': 44.8497, 'eval_samples_per_second': 19.599, 'eval_steps_per_second': 1.226, 'epoch': 18.24}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.255042552947998, 'eval_bleu': 14.114, 'eval_gen_len': 23.7827, 'eval_runtime': 45.092, 'eval_samples_per_second': 19.493, 'eval_steps_per_second': 1.22, 'epoch': 18.25}
{'loss': 1.1929, 'grad_norm': 1.0733956098556519, 'learning_rate': 0.00012, 'epoch': 18.26}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.253075122833252, 'eval_bleu': 14.2143, 'eval_gen_len': 23.7327, 'eval_runtime': 44.3812, 'eval_samples_per_second': 19.806, 'eval_steps_per_second': 1.239, 'epoch': 18.26}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2490906715393066, 'eval_bleu': 14.107, 'eval_gen_len': 23.6758, 'eval_runtime': 45.191, 'eval_samples_per_second': 19.451, 'eval_steps_per_second': 1.217, 'epoch': 18.28}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2573413848876953, 'eval_bleu': 14.1136, 'eval_gen_len': 23.7031, 'eval_runtime': 44.9881, 'eval_samples_per_second': 19.539, 'eval_steps_per_second': 1.223, 'epoch': 18.29}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2477173805236816, 'eval_bleu': 13.9459, 'eval_gen_len': 23.7122, 'eval_runtime': 46.0183, 'eval_samples_per_second': 19.101, 'eval_steps_per_second': 1.195, 'epoch': 18.31}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2514946460723877, 'eval_bleu': 14.0546, 'eval_gen_len': 23.7122, 'eval_runtime': 45.6428, 'eval_samples_per_second': 19.258, 'eval_steps_per_second': 1.205, 'epoch': 18.32}
{'loss': 1.1952, 'grad_norm': 1.4116040468215942, 'learning_rate': 0.00011666666666666667, 'epoch': 18.33}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2487592697143555, 'eval_bleu': 14.0621, 'eval_gen_len': 23.7031, 'eval_runtime': 44.6033, 'eval_samples_per_second': 19.707, 'eval_steps_per_second': 1.233, 'epoch': 18.33}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2424092292785645, 'eval_bleu': 14.1197, 'eval_gen_len': 23.6906, 'eval_runtime': 45.5483, 'eval_samples_per_second': 19.298, 'eval_steps_per_second': 1.208, 'epoch': 18.35}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.249208688735962, 'eval_bleu': 14.1282, 'eval_gen_len': 23.7873, 'eval_runtime': 44.4083, 'eval_samples_per_second': 19.794, 'eval_steps_per_second': 1.239, 'epoch': 18.36}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2490038871765137, 'eval_bleu': 14.0789, 'eval_gen_len': 23.6928, 'eval_runtime': 46.0201, 'eval_samples_per_second': 19.1, 'eval_steps_per_second': 1.195, 'epoch': 18.37}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.246311902999878, 'eval_bleu': 14.2138, 'eval_gen_len': 23.6883, 'eval_runtime': 47.3778, 'eval_samples_per_second': 18.553, 'eval_steps_per_second': 1.161, 'epoch': 18.39}
{'loss': 1.2025, 'grad_norm': 1.196626901626587, 'learning_rate': 0.00011333333333333333, 'epoch': 18.4}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.244964838027954, 'eval_bleu': 14.17, 'eval_gen_len': 23.6974, 'eval_runtime': 45.784, 'eval_samples_per_second': 19.199, 'eval_steps_per_second': 1.201, 'epoch': 18.4}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.246518611907959, 'eval_bleu': 13.8754, 'eval_gen_len': 23.6359, 'eval_runtime': 42.778, 'eval_samples_per_second': 20.548, 'eval_steps_per_second': 1.286, 'epoch': 18.42}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2472620010375977, 'eval_bleu': 13.7758, 'eval_gen_len': 23.6405, 'eval_runtime': 45.2198, 'eval_samples_per_second': 19.438, 'eval_steps_per_second': 1.216, 'epoch': 18.43}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2414371967315674, 'eval_bleu': 13.6428, 'eval_gen_len': 23.7361, 'eval_runtime': 44.6234, 'eval_samples_per_second': 19.698, 'eval_steps_per_second': 1.233, 'epoch': 18.44}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.238471746444702, 'eval_bleu': 14.3091, 'eval_gen_len': 23.6883, 'eval_runtime': 43.5281, 'eval_samples_per_second': 20.194, 'eval_steps_per_second': 1.264, 'epoch': 18.46}
{'loss': 1.2061, 'grad_norm': 1.1709805727005005, 'learning_rate': 0.00011, 'epoch': 18.47}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.252227544784546, 'eval_bleu': 13.9901, 'eval_gen_len': 23.6735, 'eval_runtime': 44.948, 'eval_samples_per_second': 19.556, 'eval_steps_per_second': 1.224, 'epoch': 18.47}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.235236644744873, 'eval_bleu': 14.0731, 'eval_gen_len': 23.7258, 'eval_runtime': 61.4506, 'eval_samples_per_second': 14.304, 'eval_steps_per_second': 0.895, 'epoch': 18.48}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.241962194442749, 'eval_bleu': 14.3645, 'eval_gen_len': 23.6507, 'eval_runtime': 46.8916, 'eval_samples_per_second': 18.745, 'eval_steps_per_second': 1.173, 'epoch': 18.5}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.246370553970337, 'eval_bleu': 14.3793, 'eval_gen_len': 23.7008, 'eval_runtime': 46.8281, 'eval_samples_per_second': 18.771, 'eval_steps_per_second': 1.175, 'epoch': 18.51}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.239994764328003, 'eval_bleu': 14.2981, 'eval_gen_len': 23.6462, 'eval_runtime': 46.1918, 'eval_samples_per_second': 19.029, 'eval_steps_per_second': 1.191, 'epoch': 18.53}
{'loss': 1.201, 'grad_norm': 1.2027157545089722, 'learning_rate': 0.00010666666666666668, 'epoch': 18.54}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2527964115142822, 'eval_bleu': 14.1672, 'eval_gen_len': 23.5927, 'eval_runtime': 79.8725, 'eval_samples_per_second': 11.005, 'eval_steps_per_second': 0.689, 'epoch': 18.54}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.242995023727417, 'eval_bleu': 14.2443, 'eval_gen_len': 23.6894, 'eval_runtime': 66.1652, 'eval_samples_per_second': 13.285, 'eval_steps_per_second': 0.831, 'epoch': 18.55}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2460782527923584, 'eval_bleu': 14.2867, 'eval_gen_len': 23.6769, 'eval_runtime': 47.4019, 'eval_samples_per_second': 18.544, 'eval_steps_per_second': 1.16, 'epoch': 18.57}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2365550994873047, 'eval_bleu': 14.0928, 'eval_gen_len': 23.8043, 'eval_runtime': 45.8179, 'eval_samples_per_second': 19.185, 'eval_steps_per_second': 1.2, 'epoch': 18.58}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2375214099884033, 'eval_bleu': 14.288, 'eval_gen_len': 23.6576, 'eval_runtime': 55.17, 'eval_samples_per_second': 15.933, 'eval_steps_per_second': 0.997, 'epoch': 18.6}
{'loss': 1.2114, 'grad_norm': 1.6142157316207886, 'learning_rate': 0.00010333333333333333, 'epoch': 18.61}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2402524948120117, 'eval_bleu': 14.0478, 'eval_gen_len': 23.8043, 'eval_runtime': 45.9717, 'eval_samples_per_second': 19.12, 'eval_steps_per_second': 1.196, 'epoch': 18.61}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2318670749664307, 'eval_bleu': 14.1906, 'eval_gen_len': 23.6644, 'eval_runtime': 46.7853, 'eval_samples_per_second': 18.788, 'eval_steps_per_second': 1.176, 'epoch': 18.62}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2450313568115234, 'eval_bleu': 14.3538, 'eval_gen_len': 23.727, 'eval_runtime': 45.884, 'eval_samples_per_second': 19.157, 'eval_steps_per_second': 1.199, 'epoch': 18.64}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2346079349517822, 'eval_bleu': 14.0432, 'eval_gen_len': 23.7486, 'eval_runtime': 45.8951, 'eval_samples_per_second': 19.152, 'eval_steps_per_second': 1.198, 'epoch': 18.65}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2363266944885254, 'eval_bleu': 14.3219, 'eval_gen_len': 23.7622, 'eval_runtime': 46.0278, 'eval_samples_per_second': 19.097, 'eval_steps_per_second': 1.195, 'epoch': 18.66}
{'loss': 1.2129, 'grad_norm': 1.3821173906326294, 'learning_rate': 0.0001, 'epoch': 18.68}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2386302947998047, 'eval_bleu': 14.324, 'eval_gen_len': 23.719, 'eval_runtime': 56.5549, 'eval_samples_per_second': 15.542, 'eval_steps_per_second': 0.973, 'epoch': 18.68}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2397706508636475, 'eval_bleu': 14.3939, 'eval_gen_len': 23.7975, 'eval_runtime': 101.2186, 'eval_samples_per_second': 8.684, 'eval_steps_per_second': 0.543, 'epoch': 18.69}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2393696308135986, 'eval_bleu': 14.1521, 'eval_gen_len': 23.7076, 'eval_runtime': 49.3548, 'eval_samples_per_second': 17.81, 'eval_steps_per_second': 1.114, 'epoch': 18.71}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2448201179504395, 'eval_bleu': 14.3649, 'eval_gen_len': 23.7793, 'eval_runtime': 46.1251, 'eval_samples_per_second': 19.057, 'eval_steps_per_second': 1.192, 'epoch': 18.72}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2452473640441895, 'eval_bleu': 14.2385, 'eval_gen_len': 23.6712, 'eval_runtime': 61.5899, 'eval_samples_per_second': 14.272, 'eval_steps_per_second': 0.893, 'epoch': 18.73}
{'loss': 1.2086, 'grad_norm': 1.3792614936828613, 'learning_rate': 9.666666666666667e-05, 'epoch': 18.75}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2410945892333984, 'eval_bleu': 14.2158, 'eval_gen_len': 23.7099, 'eval_runtime': 46.715, 'eval_samples_per_second': 18.816, 'eval_steps_per_second': 1.177, 'epoch': 18.75}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2349770069122314, 'eval_bleu': 14.2364, 'eval_gen_len': 23.6655, 'eval_runtime': 88.5072, 'eval_samples_per_second': 9.931, 'eval_steps_per_second': 0.621, 'epoch': 18.76}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2431282997131348, 'eval_bleu': 14.3102, 'eval_gen_len': 23.7122, 'eval_runtime': 45.4642, 'eval_samples_per_second': 19.334, 'eval_steps_per_second': 1.21, 'epoch': 18.78}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2343904972076416, 'eval_bleu': 14.3618, 'eval_gen_len': 23.7361, 'eval_runtime': 80.1331, 'eval_samples_per_second': 10.969, 'eval_steps_per_second': 0.686, 'epoch': 18.79}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2387425899505615, 'eval_bleu': 14.087, 'eval_gen_len': 23.8146, 'eval_runtime': 51.4092, 'eval_samples_per_second': 17.098, 'eval_steps_per_second': 1.07, 'epoch': 18.8}
{'loss': 1.2158, 'grad_norm': 1.1863722801208496, 'learning_rate': 9.333333333333334e-05, 'epoch': 18.82}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2401223182678223, 'eval_bleu': 14.1549, 'eval_gen_len': 23.7327, 'eval_runtime': 47.417, 'eval_samples_per_second': 18.538, 'eval_steps_per_second': 1.16, 'epoch': 18.82}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2286264896392822, 'eval_bleu': 13.9692, 'eval_gen_len': 23.653, 'eval_runtime': 46.553, 'eval_samples_per_second': 18.882, 'eval_steps_per_second': 1.181, 'epoch': 18.83}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2365047931671143, 'eval_bleu': 14.1483, 'eval_gen_len': 23.6633, 'eval_runtime': 45.2309, 'eval_samples_per_second': 19.434, 'eval_steps_per_second': 1.216, 'epoch': 18.84}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.233577013015747, 'eval_bleu': 14.1498, 'eval_gen_len': 23.7645, 'eval_runtime': 60.372, 'eval_samples_per_second': 14.56, 'eval_steps_per_second': 0.911, 'epoch': 18.86}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2364842891693115, 'eval_bleu': 13.9642, 'eval_gen_len': 23.7838, 'eval_runtime': 84.5657, 'eval_samples_per_second': 10.394, 'eval_steps_per_second': 0.65, 'epoch': 18.87}
{'loss': 1.2252, 'grad_norm': 1.4050716161727905, 'learning_rate': 8.999999999999999e-05, 'epoch': 18.89}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2372853755950928, 'eval_bleu': 14.4484, 'eval_gen_len': 23.6359, 'eval_runtime': 45.8411, 'eval_samples_per_second': 19.175, 'eval_steps_per_second': 1.2, 'epoch': 18.89}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.227689266204834, 'eval_bleu': 14.2331, 'eval_gen_len': 23.694, 'eval_runtime': 51.4983, 'eval_samples_per_second': 17.069, 'eval_steps_per_second': 1.068, 'epoch': 18.9}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.237928867340088, 'eval_bleu': 14.1791, 'eval_gen_len': 23.6849, 'eval_runtime': 46.3575, 'eval_samples_per_second': 18.961, 'eval_steps_per_second': 1.186, 'epoch': 18.91}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2328357696533203, 'eval_bleu': 14.3136, 'eval_gen_len': 23.678, 'eval_runtime': 45.7606, 'eval_samples_per_second': 19.209, 'eval_steps_per_second': 1.202, 'epoch': 18.93}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.235492467880249, 'eval_bleu': 14.2478, 'eval_gen_len': 23.6587, 'eval_runtime': 44.8287, 'eval_samples_per_second': 19.608, 'eval_steps_per_second': 1.227, 'epoch': 18.94}
{'loss': 1.2083, 'grad_norm': 1.2751134634017944, 'learning_rate': 8.666666666666667e-05, 'epoch': 18.96}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.235642433166504, 'eval_bleu': 14.2434, 'eval_gen_len': 23.6689, 'eval_runtime': 47.6318, 'eval_samples_per_second': 18.454, 'eval_steps_per_second': 1.155, 'epoch': 18.96}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2324283123016357, 'eval_bleu': 14.5276, 'eval_gen_len': 23.6496, 'eval_runtime': 66.5079, 'eval_samples_per_second': 13.216, 'eval_steps_per_second': 0.827, 'epoch': 18.97}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2259862422943115, 'eval_bleu': 14.0873, 'eval_gen_len': 23.7679, 'eval_runtime': 94.5685, 'eval_samples_per_second': 9.295, 'eval_steps_per_second': 0.582, 'epoch': 18.98}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2383131980895996, 'eval_bleu': 14.3048, 'eval_gen_len': 23.7304, 'eval_runtime': 86.9015, 'eval_samples_per_second': 10.115, 'eval_steps_per_second': 0.633, 'epoch': 19.0}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.250005006790161, 'eval_bleu': 14.0954, 'eval_gen_len': 23.6667, 'eval_runtime': 58.8827, 'eval_samples_per_second': 14.928, 'eval_steps_per_second': 0.934, 'epoch': 19.01}
{'loss': 1.1844, 'grad_norm': 1.1899354457855225, 'learning_rate': 8.333333333333333e-05, 'epoch': 19.02}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2598352432250977, 'eval_bleu': 14.0518, 'eval_gen_len': 23.6997, 'eval_runtime': 46.2067, 'eval_samples_per_second': 19.023, 'eval_steps_per_second': 1.19, 'epoch': 19.02}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2515618801116943, 'eval_bleu': 14.1742, 'eval_gen_len': 23.6621, 'eval_runtime': 46.3801, 'eval_samples_per_second': 18.952, 'eval_steps_per_second': 1.186, 'epoch': 19.04}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2547860145568848, 'eval_bleu': 14.1453, 'eval_gen_len': 23.6553, 'eval_runtime': 46.609, 'eval_samples_per_second': 18.859, 'eval_steps_per_second': 1.18, 'epoch': 19.05}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.257641315460205, 'eval_bleu': 14.49, 'eval_gen_len': 23.6746, 'eval_runtime': 45.9626, 'eval_samples_per_second': 19.124, 'eval_steps_per_second': 1.197, 'epoch': 19.07}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2611351013183594, 'eval_bleu': 14.0786, 'eval_gen_len': 23.5984, 'eval_runtime': 46.8029, 'eval_samples_per_second': 18.781, 'eval_steps_per_second': 1.175, 'epoch': 19.08}
{'loss': 1.1604, 'grad_norm': 1.458103060722351, 'learning_rate': 8e-05, 'epoch': 19.09}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2583441734313965, 'eval_bleu': 14.032, 'eval_gen_len': 23.7065, 'eval_runtime': 45.238, 'eval_samples_per_second': 19.431, 'eval_steps_per_second': 1.216, 'epoch': 19.09}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.257603168487549, 'eval_bleu': 14.1766, 'eval_gen_len': 23.8271, 'eval_runtime': 94.3253, 'eval_samples_per_second': 9.319, 'eval_steps_per_second': 0.583, 'epoch': 19.11}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2590105533599854, 'eval_bleu': 13.9786, 'eval_gen_len': 23.6109, 'eval_runtime': 87.8811, 'eval_samples_per_second': 10.002, 'eval_steps_per_second': 0.626, 'epoch': 19.12}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.258543014526367, 'eval_bleu': 13.9919, 'eval_gen_len': 23.653, 'eval_runtime': 88.0272, 'eval_samples_per_second': 9.986, 'eval_steps_per_second': 0.625, 'epoch': 19.14}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.259754180908203, 'eval_bleu': 14.3323, 'eval_gen_len': 23.6223, 'eval_runtime': 88.2632, 'eval_samples_per_second': 9.959, 'eval_steps_per_second': 0.623, 'epoch': 19.15}
{'loss': 1.1573, 'grad_norm': 1.3543977737426758, 'learning_rate': 7.666666666666667e-05, 'epoch': 19.16}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.260406017303467, 'eval_bleu': 14.0134, 'eval_gen_len': 23.6803, 'eval_runtime': 88.3275, 'eval_samples_per_second': 9.952, 'eval_steps_per_second': 0.623, 'epoch': 19.16}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.264241933822632, 'eval_bleu': 14.2601, 'eval_gen_len': 23.5734, 'eval_runtime': 87.8046, 'eval_samples_per_second': 10.011, 'eval_steps_per_second': 0.626, 'epoch': 19.18}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2591710090637207, 'eval_bleu': 14.2747, 'eval_gen_len': 23.7133, 'eval_runtime': 88.1811, 'eval_samples_per_second': 9.968, 'eval_steps_per_second': 0.624, 'epoch': 19.19}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2659008502960205, 'eval_bleu': 14.1505, 'eval_gen_len': 23.6769, 'eval_runtime': 90.0442, 'eval_samples_per_second': 9.762, 'eval_steps_per_second': 0.611, 'epoch': 19.2}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.25958514213562, 'eval_bleu': 13.8503, 'eval_gen_len': 23.6849, 'eval_runtime': 91.7048, 'eval_samples_per_second': 9.585, 'eval_steps_per_second': 0.6, 'epoch': 19.22}
{'loss': 1.1603, 'grad_norm': 1.2691768407821655, 'learning_rate': 7.333333333333333e-05, 'epoch': 19.23}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.255436658859253, 'eval_bleu': 13.8272, 'eval_gen_len': 23.7383, 'eval_runtime': 93.4078, 'eval_samples_per_second': 9.41, 'eval_steps_per_second': 0.589, 'epoch': 19.23}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2570297718048096, 'eval_bleu': 14.2394, 'eval_gen_len': 23.6496, 'eval_runtime': 92.084, 'eval_samples_per_second': 9.546, 'eval_steps_per_second': 0.597, 'epoch': 19.25}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.264996290206909, 'eval_bleu': 13.9535, 'eval_gen_len': 23.6371, 'eval_runtime': 97.3709, 'eval_samples_per_second': 9.027, 'eval_steps_per_second': 0.565, 'epoch': 19.26}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2589025497436523, 'eval_bleu': 14.0339, 'eval_gen_len': 23.6837, 'eval_runtime': 68.2728, 'eval_samples_per_second': 12.875, 'eval_steps_per_second': 0.806, 'epoch': 19.27}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.264392614364624, 'eval_bleu': 14.3009, 'eval_gen_len': 23.6416, 'eval_runtime': 45.1765, 'eval_samples_per_second': 19.457, 'eval_steps_per_second': 1.217, 'epoch': 19.29}
{'loss': 1.1592, 'grad_norm': 1.257137417793274, 'learning_rate': 7.000000000000001e-05, 'epoch': 19.3}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2603297233581543, 'eval_bleu': 14.1376, 'eval_gen_len': 23.6428, 'eval_runtime': 46.9435, 'eval_samples_per_second': 18.725, 'eval_steps_per_second': 1.172, 'epoch': 19.3}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2562382221221924, 'eval_bleu': 14.1866, 'eval_gen_len': 23.7793, 'eval_runtime': 44.8874, 'eval_samples_per_second': 19.582, 'eval_steps_per_second': 1.225, 'epoch': 19.32}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.25749135017395, 'eval_bleu': 14.3398, 'eval_gen_len': 23.7531, 'eval_runtime': 46.8115, 'eval_samples_per_second': 18.777, 'eval_steps_per_second': 1.175, 'epoch': 19.33}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2553069591522217, 'eval_bleu': 14.3185, 'eval_gen_len': 23.7588, 'eval_runtime': 45.3458, 'eval_samples_per_second': 19.384, 'eval_steps_per_second': 1.213, 'epoch': 19.34}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2619996070861816, 'eval_bleu': 14.3338, 'eval_gen_len': 23.6746, 'eval_runtime': 46.9779, 'eval_samples_per_second': 18.711, 'eval_steps_per_second': 1.171, 'epoch': 19.36}
{'loss': 1.1619, 'grad_norm': 1.186469554901123, 'learning_rate': 6.666666666666667e-05, 'epoch': 19.37}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2533819675445557, 'eval_bleu': 14.293, 'eval_gen_len': 23.7873, 'eval_runtime': 46.3486, 'eval_samples_per_second': 18.965, 'eval_steps_per_second': 1.187, 'epoch': 19.37}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.260805606842041, 'eval_bleu': 14.3218, 'eval_gen_len': 23.7144, 'eval_runtime': 45.4712, 'eval_samples_per_second': 19.331, 'eval_steps_per_second': 1.21, 'epoch': 19.38}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2551381587982178, 'eval_bleu': 14.1838, 'eval_gen_len': 23.7053, 'eval_runtime': 46.3959, 'eval_samples_per_second': 18.946, 'eval_steps_per_second': 1.185, 'epoch': 19.4}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.251940965652466, 'eval_bleu': 14.0687, 'eval_gen_len': 23.6462, 'eval_runtime': 46.3681, 'eval_samples_per_second': 18.957, 'eval_steps_per_second': 1.186, 'epoch': 19.41}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.257326602935791, 'eval_bleu': 13.9083, 'eval_gen_len': 23.6394, 'eval_runtime': 46.4813, 'eval_samples_per_second': 18.911, 'eval_steps_per_second': 1.183, 'epoch': 19.43}
{'loss': 1.1709, 'grad_norm': 1.3676731586456299, 'learning_rate': 6.333333333333335e-05, 'epoch': 19.44}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2569565773010254, 'eval_bleu': 13.8801, 'eval_gen_len': 23.7258, 'eval_runtime': 46.0434, 'eval_samples_per_second': 19.091, 'eval_steps_per_second': 1.195, 'epoch': 19.44}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.257328748703003, 'eval_bleu': 13.9985, 'eval_gen_len': 23.7053, 'eval_runtime': 45.5966, 'eval_samples_per_second': 19.278, 'eval_steps_per_second': 1.206, 'epoch': 19.45}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2566323280334473, 'eval_bleu': 14.0301, 'eval_gen_len': 23.6826, 'eval_runtime': 45.5651, 'eval_samples_per_second': 19.291, 'eval_steps_per_second': 1.207, 'epoch': 19.47}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2612485885620117, 'eval_bleu': 13.8659, 'eval_gen_len': 23.6849, 'eval_runtime': 46.7337, 'eval_samples_per_second': 18.809, 'eval_steps_per_second': 1.177, 'epoch': 19.48}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.256338596343994, 'eval_bleu': 13.8918, 'eval_gen_len': 23.686, 'eval_runtime': 45.3292, 'eval_samples_per_second': 19.391, 'eval_steps_per_second': 1.213, 'epoch': 19.49}
{'loss': 1.1634, 'grad_norm': 1.5527631044387817, 'learning_rate': 6e-05, 'epoch': 19.51}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.262814521789551, 'eval_bleu': 13.9342, 'eval_gen_len': 23.6746, 'eval_runtime': 46.778, 'eval_samples_per_second': 18.791, 'eval_steps_per_second': 1.176, 'epoch': 19.51}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2625184059143066, 'eval_bleu': 14.0133, 'eval_gen_len': 23.7304, 'eval_runtime': 46.5029, 'eval_samples_per_second': 18.902, 'eval_steps_per_second': 1.183, 'epoch': 19.52}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2518558502197266, 'eval_bleu': 14.2107, 'eval_gen_len': 23.744, 'eval_runtime': 46.0262, 'eval_samples_per_second': 19.098, 'eval_steps_per_second': 1.195, 'epoch': 19.54}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.258206844329834, 'eval_bleu': 14.2523, 'eval_gen_len': 23.7224, 'eval_runtime': 44.6324, 'eval_samples_per_second': 19.694, 'eval_steps_per_second': 1.232, 'epoch': 19.55}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2597177028656006, 'eval_bleu': 14.3808, 'eval_gen_len': 23.6451, 'eval_runtime': 43.728, 'eval_samples_per_second': 20.102, 'eval_steps_per_second': 1.258, 'epoch': 19.56}
{'loss': 1.1747, 'grad_norm': 1.1522020101547241, 'learning_rate': 5.6666666666666664e-05, 'epoch': 19.58}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.251737117767334, 'eval_bleu': 14.0399, 'eval_gen_len': 23.6849, 'eval_runtime': 46.1331, 'eval_samples_per_second': 19.054, 'eval_steps_per_second': 1.192, 'epoch': 19.58}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.251974105834961, 'eval_bleu': 14.4137, 'eval_gen_len': 23.694, 'eval_runtime': 45.768, 'eval_samples_per_second': 19.206, 'eval_steps_per_second': 1.202, 'epoch': 19.59}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2494945526123047, 'eval_bleu': 14.348, 'eval_gen_len': 23.7406, 'eval_runtime': 45.9864, 'eval_samples_per_second': 19.114, 'eval_steps_per_second': 1.196, 'epoch': 19.61}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2615087032318115, 'eval_bleu': 14.3681, 'eval_gen_len': 23.6803, 'eval_runtime': 46.8297, 'eval_samples_per_second': 18.77, 'eval_steps_per_second': 1.174, 'epoch': 19.62}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.253549098968506, 'eval_bleu': 14.1154, 'eval_gen_len': 23.7383, 'eval_runtime': 45.0711, 'eval_samples_per_second': 19.503, 'eval_steps_per_second': 1.22, 'epoch': 19.63}
{'loss': 1.1749, 'grad_norm': 1.301751732826233, 'learning_rate': 5.333333333333334e-05, 'epoch': 19.65}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.249540328979492, 'eval_bleu': 14.1885, 'eval_gen_len': 23.7895, 'eval_runtime': 45.977, 'eval_samples_per_second': 19.118, 'eval_steps_per_second': 1.196, 'epoch': 19.65}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2529168128967285, 'eval_bleu': 14.2385, 'eval_gen_len': 23.6667, 'eval_runtime': 44.9627, 'eval_samples_per_second': 19.55, 'eval_steps_per_second': 1.223, 'epoch': 19.66}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2523419857025146, 'eval_bleu': 14.0477, 'eval_gen_len': 23.8271, 'eval_runtime': 46.218, 'eval_samples_per_second': 19.019, 'eval_steps_per_second': 1.19, 'epoch': 19.67}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.253460168838501, 'eval_bleu': 14.1821, 'eval_gen_len': 23.7042, 'eval_runtime': 45.1953, 'eval_samples_per_second': 19.449, 'eval_steps_per_second': 1.217, 'epoch': 19.69}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2577285766601562, 'eval_bleu': 14.0598, 'eval_gen_len': 23.6792, 'eval_runtime': 45.5762, 'eval_samples_per_second': 19.286, 'eval_steps_per_second': 1.207, 'epoch': 19.7}
{'loss': 1.1713, 'grad_norm': 1.3273673057556152, 'learning_rate': 5e-05, 'epoch': 19.72}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2533931732177734, 'eval_bleu': 14.0176, 'eval_gen_len': 23.711, 'eval_runtime': 45.9448, 'eval_samples_per_second': 19.132, 'eval_steps_per_second': 1.197, 'epoch': 19.72}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2507896423339844, 'eval_bleu': 13.9934, 'eval_gen_len': 23.7327, 'eval_runtime': 45.6238, 'eval_samples_per_second': 19.266, 'eval_steps_per_second': 1.206, 'epoch': 19.73}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.254453182220459, 'eval_bleu': 14.0297, 'eval_gen_len': 23.752, 'eval_runtime': 46.5888, 'eval_samples_per_second': 18.867, 'eval_steps_per_second': 1.181, 'epoch': 19.74}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.250425100326538, 'eval_bleu': 13.9786, 'eval_gen_len': 23.7782, 'eval_runtime': 46.9335, 'eval_samples_per_second': 18.729, 'eval_steps_per_second': 1.172, 'epoch': 19.76}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2515087127685547, 'eval_bleu': 14.0603, 'eval_gen_len': 23.7156, 'eval_runtime': 45.3222, 'eval_samples_per_second': 19.394, 'eval_steps_per_second': 1.214, 'epoch': 19.77}
{'loss': 1.1713, 'grad_norm': 1.4299710988998413, 'learning_rate': 4.666666666666667e-05, 'epoch': 19.79}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.253460168838501, 'eval_bleu': 14.186, 'eval_gen_len': 23.6906, 'eval_runtime': 45.2648, 'eval_samples_per_second': 19.419, 'eval_steps_per_second': 1.215, 'epoch': 19.79}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.256603240966797, 'eval_bleu': 14.0205, 'eval_gen_len': 23.6689, 'eval_runtime': 45.602, 'eval_samples_per_second': 19.275, 'eval_steps_per_second': 1.206, 'epoch': 19.8}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2546563148498535, 'eval_bleu': 14.0771, 'eval_gen_len': 23.6758, 'eval_runtime': 44.9644, 'eval_samples_per_second': 19.549, 'eval_steps_per_second': 1.223, 'epoch': 19.81}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2548329830169678, 'eval_bleu': 14.0569, 'eval_gen_len': 23.6621, 'eval_runtime': 45.5072, 'eval_samples_per_second': 19.316, 'eval_steps_per_second': 1.209, 'epoch': 19.83}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.252530574798584, 'eval_bleu': 13.7018, 'eval_gen_len': 23.6405, 'eval_runtime': 46.734, 'eval_samples_per_second': 18.809, 'eval_steps_per_second': 1.177, 'epoch': 19.84}
{'loss': 1.1661, 'grad_norm': 1.3783528804779053, 'learning_rate': 4.3333333333333334e-05, 'epoch': 19.85}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2566940784454346, 'eval_bleu': 13.776, 'eval_gen_len': 23.6701, 'eval_runtime': 45.4385, 'eval_samples_per_second': 19.345, 'eval_steps_per_second': 1.21, 'epoch': 19.85}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.253861904144287, 'eval_bleu': 13.8718, 'eval_gen_len': 23.7053, 'eval_runtime': 46.1941, 'eval_samples_per_second': 19.028, 'eval_steps_per_second': 1.191, 'epoch': 19.87}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2524471282958984, 'eval_bleu': 13.9896, 'eval_gen_len': 23.6519, 'eval_runtime': 45.4191, 'eval_samples_per_second': 19.353, 'eval_steps_per_second': 1.211, 'epoch': 19.88}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2553017139434814, 'eval_bleu': 13.9852, 'eval_gen_len': 23.7201, 'eval_runtime': 45.0691, 'eval_samples_per_second': 19.503, 'eval_steps_per_second': 1.22, 'epoch': 19.9}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.253401041030884, 'eval_bleu': 14.0746, 'eval_gen_len': 23.6587, 'eval_runtime': 45.0147, 'eval_samples_per_second': 19.527, 'eval_steps_per_second': 1.222, 'epoch': 19.91}
{'loss': 1.1746, 'grad_norm': 1.4278100728988647, 'learning_rate': 4e-05, 'epoch': 19.92}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2576568126678467, 'eval_bleu': 14.1101, 'eval_gen_len': 23.6439, 'eval_runtime': 45.7266, 'eval_samples_per_second': 19.223, 'eval_steps_per_second': 1.203, 'epoch': 19.92}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2562787532806396, 'eval_bleu': 14.0654, 'eval_gen_len': 23.6803, 'eval_runtime': 46.5954, 'eval_samples_per_second': 18.865, 'eval_steps_per_second': 1.18, 'epoch': 19.94}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.252311944961548, 'eval_bleu': 14.0087, 'eval_gen_len': 23.7235, 'eval_runtime': 43.0095, 'eval_samples_per_second': 20.437, 'eval_steps_per_second': 1.279, 'epoch': 19.95}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.249695062637329, 'eval_bleu': 14.0663, 'eval_gen_len': 23.6678, 'eval_runtime': 45.7007, 'eval_samples_per_second': 19.234, 'eval_steps_per_second': 1.203, 'epoch': 19.97}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.254962921142578, 'eval_bleu': 14.017, 'eval_gen_len': 23.6325, 'eval_runtime': 44.3631, 'eval_samples_per_second': 19.814, 'eval_steps_per_second': 1.24, 'epoch': 19.98}
{'loss': 1.1658, 'grad_norm': 1.298557996749878, 'learning_rate': 3.6666666666666666e-05, 'epoch': 19.99}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.251136302947998, 'eval_bleu': 14.2788, 'eval_gen_len': 23.6587, 'eval_runtime': 45.4227, 'eval_samples_per_second': 19.352, 'eval_steps_per_second': 1.211, 'epoch': 19.99}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.256617546081543, 'eval_bleu': 13.9093, 'eval_gen_len': 23.6962, 'eval_runtime': 43.6322, 'eval_samples_per_second': 20.146, 'eval_steps_per_second': 1.261, 'epoch': 20.01}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.262411117553711, 'eval_bleu': 14.2019, 'eval_gen_len': 23.6928, 'eval_runtime': 44.7977, 'eval_samples_per_second': 19.622, 'eval_steps_per_second': 1.228, 'epoch': 20.02}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2645130157470703, 'eval_bleu': 14.1203, 'eval_gen_len': 23.628, 'eval_runtime': 43.9147, 'eval_samples_per_second': 20.016, 'eval_steps_per_second': 1.252, 'epoch': 20.03}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.267301559448242, 'eval_bleu': 14.1215, 'eval_gen_len': 23.7053, 'eval_runtime': 44.8296, 'eval_samples_per_second': 19.608, 'eval_steps_per_second': 1.227, 'epoch': 20.05}
{'loss': 1.1382, 'grad_norm': 1.0056047439575195, 'learning_rate': 3.3333333333333335e-05, 'epoch': 20.06}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2613723278045654, 'eval_bleu': 14.3398, 'eval_gen_len': 23.7235, 'eval_runtime': 44.0276, 'eval_samples_per_second': 19.965, 'eval_steps_per_second': 1.249, 'epoch': 20.06}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2653777599334717, 'eval_bleu': 14.0649, 'eval_gen_len': 23.7645, 'eval_runtime': 44.0354, 'eval_samples_per_second': 19.961, 'eval_steps_per_second': 1.249, 'epoch': 20.08}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2677183151245117, 'eval_bleu': 14.0914, 'eval_gen_len': 23.6985, 'eval_runtime': 46.1409, 'eval_samples_per_second': 19.05, 'eval_steps_per_second': 1.192, 'epoch': 20.09}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.266672134399414, 'eval_bleu': 13.9945, 'eval_gen_len': 23.6724, 'eval_runtime': 44.2562, 'eval_samples_per_second': 19.862, 'eval_steps_per_second': 1.243, 'epoch': 20.1}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.266561508178711, 'eval_bleu': 14.1148, 'eval_gen_len': 23.7076, 'eval_runtime': 45.1857, 'eval_samples_per_second': 19.453, 'eval_steps_per_second': 1.217, 'epoch': 20.12}
{'loss': 1.1353, 'grad_norm': 1.3523553609848022, 'learning_rate': 3e-05, 'epoch': 20.13}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.265833854675293, 'eval_bleu': 14.1769, 'eval_gen_len': 23.7008, 'eval_runtime': 44.2215, 'eval_samples_per_second': 19.877, 'eval_steps_per_second': 1.244, 'epoch': 20.13}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2669014930725098, 'eval_bleu': 13.9872, 'eval_gen_len': 23.6223, 'eval_runtime': 45.0178, 'eval_samples_per_second': 19.526, 'eval_steps_per_second': 1.222, 'epoch': 20.15}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2699389457702637, 'eval_bleu': 14.1124, 'eval_gen_len': 23.6633, 'eval_runtime': 44.0295, 'eval_samples_per_second': 19.964, 'eval_steps_per_second': 1.249, 'epoch': 20.16}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2672512531280518, 'eval_bleu': 14.0231, 'eval_gen_len': 23.6633, 'eval_runtime': 44.0601, 'eval_samples_per_second': 19.95, 'eval_steps_per_second': 1.248, 'epoch': 20.17}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2692089080810547, 'eval_bleu': 13.9812, 'eval_gen_len': 23.6496, 'eval_runtime': 45.6947, 'eval_samples_per_second': 19.236, 'eval_steps_per_second': 1.204, 'epoch': 20.19}
{'loss': 1.1367, 'grad_norm': 1.5501067638397217, 'learning_rate': 2.666666666666667e-05, 'epoch': 20.2}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.266624927520752, 'eval_bleu': 14.1375, 'eval_gen_len': 23.6689, 'eval_runtime': 45.587, 'eval_samples_per_second': 19.282, 'eval_steps_per_second': 1.206, 'epoch': 20.2}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2666661739349365, 'eval_bleu': 14.1891, 'eval_gen_len': 23.6883, 'eval_runtime': 46.002, 'eval_samples_per_second': 19.108, 'eval_steps_per_second': 1.196, 'epoch': 20.21}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2677724361419678, 'eval_bleu': 14.1839, 'eval_gen_len': 23.6917, 'eval_runtime': 44.4582, 'eval_samples_per_second': 19.771, 'eval_steps_per_second': 1.237, 'epoch': 20.23}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.265399217605591, 'eval_bleu': 14.0835, 'eval_gen_len': 23.7224, 'eval_runtime': 44.8772, 'eval_samples_per_second': 19.587, 'eval_steps_per_second': 1.226, 'epoch': 20.24}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.266930103302002, 'eval_bleu': 13.9888, 'eval_gen_len': 23.6439, 'eval_runtime': 44.9525, 'eval_samples_per_second': 19.554, 'eval_steps_per_second': 1.224, 'epoch': 20.26}
{'loss': 1.1347, 'grad_norm': 1.3471781015396118, 'learning_rate': 2.3333333333333336e-05, 'epoch': 20.27}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2651031017303467, 'eval_bleu': 14.1223, 'eval_gen_len': 23.6496, 'eval_runtime': 45.5606, 'eval_samples_per_second': 19.293, 'eval_steps_per_second': 1.207, 'epoch': 20.27}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2640318870544434, 'eval_bleu': 14.1684, 'eval_gen_len': 23.6598, 'eval_runtime': 45.8743, 'eval_samples_per_second': 19.161, 'eval_steps_per_second': 1.199, 'epoch': 20.28}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2652814388275146, 'eval_bleu': 14.2665, 'eval_gen_len': 23.6667, 'eval_runtime': 44.733, 'eval_samples_per_second': 19.65, 'eval_steps_per_second': 1.23, 'epoch': 20.3}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.268639087677002, 'eval_bleu': 14.2009, 'eval_gen_len': 23.7042, 'eval_runtime': 45.9404, 'eval_samples_per_second': 19.133, 'eval_steps_per_second': 1.197, 'epoch': 20.31}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2701663970947266, 'eval_bleu': 14.1518, 'eval_gen_len': 23.694, 'eval_runtime': 72.1121, 'eval_samples_per_second': 12.189, 'eval_steps_per_second': 0.763, 'epoch': 20.33}
{'loss': 1.1404, 'grad_norm': 1.560865044593811, 'learning_rate': 2e-05, 'epoch': 20.34}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2648892402648926, 'eval_bleu': 14.194, 'eval_gen_len': 23.7418, 'eval_runtime': 94.8256, 'eval_samples_per_second': 9.27, 'eval_steps_per_second': 0.58, 'epoch': 20.34}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2700138092041016, 'eval_bleu': 14.2649, 'eval_gen_len': 23.6758, 'eval_runtime': 46.4911, 'eval_samples_per_second': 18.907, 'eval_steps_per_second': 1.183, 'epoch': 20.35}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.268787145614624, 'eval_bleu': 14.2587, 'eval_gen_len': 23.7156, 'eval_runtime': 86.0908, 'eval_samples_per_second': 10.21, 'eval_steps_per_second': 0.639, 'epoch': 20.37}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2656149864196777, 'eval_bleu': 14.3223, 'eval_gen_len': 23.6746, 'eval_runtime': 43.6675, 'eval_samples_per_second': 20.129, 'eval_steps_per_second': 1.26, 'epoch': 20.38}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.266754627227783, 'eval_bleu': 14.3008, 'eval_gen_len': 23.661, 'eval_runtime': 41.7403, 'eval_samples_per_second': 21.059, 'eval_steps_per_second': 1.318, 'epoch': 20.39}
{'loss': 1.1415, 'grad_norm': 1.1968663930892944, 'learning_rate': 1.6666666666666667e-05, 'epoch': 20.41}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.264829397201538, 'eval_bleu': 14.1468, 'eval_gen_len': 23.6576, 'eval_runtime': 44.3191, 'eval_samples_per_second': 19.833, 'eval_steps_per_second': 1.241, 'epoch': 20.41}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2665672302246094, 'eval_bleu': 14.0918, 'eval_gen_len': 23.6837, 'eval_runtime': 40.6783, 'eval_samples_per_second': 21.609, 'eval_steps_per_second': 1.352, 'epoch': 20.42}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2655951976776123, 'eval_bleu': 14.2347, 'eval_gen_len': 23.7406, 'eval_runtime': 41.969, 'eval_samples_per_second': 20.944, 'eval_steps_per_second': 1.31, 'epoch': 20.44}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2682833671569824, 'eval_bleu': 14.2499, 'eval_gen_len': 23.6519, 'eval_runtime': 45.6391, 'eval_samples_per_second': 19.26, 'eval_steps_per_second': 1.205, 'epoch': 20.45}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2666375637054443, 'eval_bleu': 14.2438, 'eval_gen_len': 23.6883, 'eval_runtime': 52.5404, 'eval_samples_per_second': 16.73, 'eval_steps_per_second': 1.047, 'epoch': 20.46}
{'loss': 1.1386, 'grad_norm': 1.7287795543670654, 'learning_rate': 1.3333333333333335e-05, 'epoch': 20.48}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.267423152923584, 'eval_bleu': 14.1811, 'eval_gen_len': 23.6689, 'eval_runtime': 61.2512, 'eval_samples_per_second': 14.351, 'eval_steps_per_second': 0.898, 'epoch': 20.48}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.267929792404175, 'eval_bleu': 14.2366, 'eval_gen_len': 23.6371, 'eval_runtime': 44.906, 'eval_samples_per_second': 19.574, 'eval_steps_per_second': 1.225, 'epoch': 20.49}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.269641160964966, 'eval_bleu': 14.2009, 'eval_gen_len': 23.6667, 'eval_runtime': 48.748, 'eval_samples_per_second': 18.032, 'eval_steps_per_second': 1.128, 'epoch': 20.51}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.267956018447876, 'eval_bleu': 14.0904, 'eval_gen_len': 23.6564, 'eval_runtime': 46.6046, 'eval_samples_per_second': 18.861, 'eval_steps_per_second': 1.18, 'epoch': 20.52}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.268352746963501, 'eval_bleu': 14.1861, 'eval_gen_len': 23.6701, 'eval_runtime': 45.5969, 'eval_samples_per_second': 19.278, 'eval_steps_per_second': 1.206, 'epoch': 20.53}
{'loss': 1.1411, 'grad_norm': 1.2488734722137451, 'learning_rate': 1e-05, 'epoch': 20.55}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2666563987731934, 'eval_bleu': 14.0773, 'eval_gen_len': 23.6519, 'eval_runtime': 45.8385, 'eval_samples_per_second': 19.176, 'eval_steps_per_second': 1.2, 'epoch': 20.55}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.263880729675293, 'eval_bleu': 14.1803, 'eval_gen_len': 23.6485, 'eval_runtime': 45.2898, 'eval_samples_per_second': 19.408, 'eval_steps_per_second': 1.214, 'epoch': 20.56}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2659571170806885, 'eval_bleu': 14.148, 'eval_gen_len': 23.6405, 'eval_runtime': 54.5737, 'eval_samples_per_second': 16.107, 'eval_steps_per_second': 1.008, 'epoch': 20.57}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2669427394866943, 'eval_bleu': 14.2369, 'eval_gen_len': 23.6439, 'eval_runtime': 45.6239, 'eval_samples_per_second': 19.266, 'eval_steps_per_second': 1.206, 'epoch': 20.59}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2661938667297363, 'eval_bleu': 14.128, 'eval_gen_len': 23.6951, 'eval_runtime': 45.7642, 'eval_samples_per_second': 19.207, 'eval_steps_per_second': 1.202, 'epoch': 20.6}
{'loss': 1.1389, 'grad_norm': 1.4066047668457031, 'learning_rate': 6.6666666666666675e-06, 'epoch': 20.62}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2661662101745605, 'eval_bleu': 14.1351, 'eval_gen_len': 23.6951, 'eval_runtime': 46.2236, 'eval_samples_per_second': 19.016, 'eval_steps_per_second': 1.19, 'epoch': 20.62}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2655200958251953, 'eval_bleu': 14.2365, 'eval_gen_len': 23.6689, 'eval_runtime': 44.6258, 'eval_samples_per_second': 19.697, 'eval_steps_per_second': 1.232, 'epoch': 20.63}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2657997608184814, 'eval_bleu': 14.1736, 'eval_gen_len': 23.6849, 'eval_runtime': 46.3607, 'eval_samples_per_second': 18.96, 'eval_steps_per_second': 1.186, 'epoch': 20.64}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2659406661987305, 'eval_bleu': 14.1504, 'eval_gen_len': 23.6644, 'eval_runtime': 45.7847, 'eval_samples_per_second': 19.199, 'eval_steps_per_second': 1.201, 'epoch': 20.66}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2662951946258545, 'eval_bleu': 14.1534, 'eval_gen_len': 23.6576, 'eval_runtime': 45.9283, 'eval_samples_per_second': 19.139, 'eval_steps_per_second': 1.198, 'epoch': 20.67}
{'loss': 1.1337, 'grad_norm': 1.4248110055923462, 'learning_rate': 3.3333333333333337e-06, 'epoch': 20.68}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.266554355621338, 'eval_bleu': 14.1749, 'eval_gen_len': 23.6724, 'eval_runtime': 46.7669, 'eval_samples_per_second': 18.795, 'eval_steps_per_second': 1.176, 'epoch': 20.68}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2663426399230957, 'eval_bleu': 14.1192, 'eval_gen_len': 23.6962, 'eval_runtime': 44.8275, 'eval_samples_per_second': 19.609, 'eval_steps_per_second': 1.227, 'epoch': 20.7}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.2666208744049072, 'eval_bleu': 14.1107, 'eval_gen_len': 23.6894, 'eval_runtime': 46.4142, 'eval_samples_per_second': 18.938, 'eval_steps_per_second': 1.185, 'epoch': 20.71}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.26609206199646, 'eval_bleu': 14.1405, 'eval_gen_len': 23.678, 'eval_runtime': 45.0732, 'eval_samples_per_second': 19.502, 'eval_steps_per_second': 1.22, 'epoch': 20.73}


  0%|          | 0/55 [00:00<?, ?it/s]

{'eval_loss': 2.266099214553833, 'eval_bleu': 14.1111, 'eval_gen_len': 23.694, 'eval_runtime': 46.7643, 'eval_samples_per_second': 18.796, 'eval_steps_per_second': 1.176, 'epoch': 20.74}
{'loss': 1.1381, 'grad_norm': 1.2692394256591797, 'learning_rate': 0.0, 'epoch': 20.75}


  0%|          | 0/55 [00:00<?, ?it/s]

Non-default generation parameters: {'max_length': 32, 'min_length': 8, 'early_stopping': True, 'num_beams': 4, 'length_penalty': 2.0, 'no_repeat_ngram_size': 3}


{'eval_loss': 2.2659811973571777, 'eval_bleu': 14.1212, 'eval_gen_len': 23.694, 'eval_runtime': 46.3468, 'eval_samples_per_second': 18.966, 'eval_steps_per_second': 1.187, 'epoch': 20.75}


Non-default generation parameters: {'max_length': 32, 'min_length': 8, 'early_stopping': True, 'num_beams': 4, 'length_penalty': 2.0, 'no_repeat_ngram_size': 3}


{'train_runtime': 92143.844, 'train_samples_per_second': 52.092, 'train_steps_per_second': 3.256, 'train_loss': 0.9730732552083333, 'epoch': 20.75}


VBox(children=(Label(value='0.001 MB of 1145.112 MB uploaded\r'), FloatProgress(value=9.652409585221486e-07, m…

0,1
eval/bleu,▁▃▂▅▅▄▄▄▄▄▄▆▆▆▆▅▅█▆▅▆▅▅█▇█▆█▆▇█▇▇▇▇▇▇▇▇▇
eval/gen_len,▂▄█▄▆▆▅▅▅▄▇█▆▄▃▅▃▃▃▅▅▁▂▃▂▃▂▂▅▄▃▄▄▁▃▄▄▂▃▂
eval/loss,▅▄▃▄▃▂▄▂▂▂▂▁▃▂▁▃▂▂▃▂▃▄▃▅▄▃▆▅▄▆▆▅▇▇▆▇▇▇██
eval/runtime,▁▁▇▇▇▇▇▇▇▇█▇▇▇▇▇▁▁▁▁█▁▂▄▁▂▁▁▁▁▁▁▄▆█▁▁▁▁▁
eval/samples_per_second,██▁▁▁▁▁▁▁▁▁▁▁▁▁▁████▁█▇▄█▆█▇▇███▄▂▁████▇
eval/steps_per_second,██▁▁▁▁▁▁▁▁▁▁▁▁▁▁████▁█▇▄█▆█▇▇███▄▂▁████▇
train/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
train/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
train/grad_norm,▆▁▂▆▂▅▄▂▁▂▂█▅▅▁▄▅▂▃▄▄▄▁▄▄▅▁▂▁▁▂▆▁▂▃▃▄▅▆▃
train/learning_rate,████▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▁▁▁

0,1
eval/bleu,14.1212
eval/gen_len,23.694
eval/loss,2.26598
eval/runtime,46.3468
eval/samples_per_second,18.966
eval/steps_per_second,1.187
train/epoch,20.75
train/global_step,300000.0
train/grad_norm,1.26924
train/learning_rate,0.0


In [11]:
path = "./wandb_mt5/mt5-tuned"
trainer.save_model(path)

Non-default generation parameters: {'max_length': 32, 'min_length': 8, 'early_stopping': True, 'num_beams': 4, 'length_penalty': 2.0, 'no_repeat_ngram_size': 3}


In [17]:
text = "我喜欢吃鸡饭和冰淇淋。"

In [18]:
from transformers import pipeline

model = AutoModelForSeq2SeqLM.from_pretrained("./wandb_mt5/mt5-tuned")
translator = pipeline("translation", model=model, tokenizer=tokenizer)
translator(text)

[{'translation_text': 'I liked chicken and ice cream.'}]