### 0. Initial Setting

In [1]:
# %%capture
# !pip install datasets==1.0.2
# !pip install transformers==4.2.1

In [2]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

cache_dir = "/data4/yoomcache"
model_cache_dir = os.path.join(cache_dir, 'huggingface')
data_cache_dir = os.path.join(cache_dir, 'datasets')
checkpoint_dir = os.path.join(cache_dir, 'checkpoint')

import logging
logging.getLogger().setLevel(logging.CRITICAL)
logging.basicConfig(level=logging.INFO)


import torch
from datasets import load_dataset, load_metric, load_from_disk
from transformers import BertTokenizer, RobertaTokenizer, GPT2Tokenizer
from transformers import AutoConfig, EncoderDecoderConfig, EncoderDecoderModel
from transformers import Seq2SeqTrainer, Seq2SeqTrainingArguments

import wandb
wandb.init(project="testing-gptONLY", entity="yoom-private")

ERROR:wandb.jupyter:Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33myoom-private[0m (use `wandb login --relogin` to force relogin)


### 1. Initialize Model

In [3]:
config_encoder = AutoConfig.from_pretrained("gpt2", cache_dir=model_cache_dir)
config_decoder = AutoConfig.from_pretrained("gpt2", cache_dir=model_cache_dir)
config = EncoderDecoderConfig.from_encoder_decoder_configs(config_encoder, config_decoder, cache_dir=model_cache_dir)
model = EncoderDecoderModel(config=config)
# model.save_pretrained("roberta2gpt", cache_dir=model_cache_dir)
# model = EncoderDecoderModel.from_pretrained("roberta2gpt", cache_dir=model_cache_dir)

model.encoder.h = model.encoder.h[:4]
model.decoder.transformer.h = model.decoder.transformer.h[-4:]

In [4]:
model.decoder

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0): GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (crossattention): GPT2Attention(
          (c_attn): Conv1D()
          (q_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_cross_attn): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
      

In [7]:
# make sure GPT2 appends EOS in begin and end
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
    outputs = [self.bos_token_id] + token_ids_0 + [self.eos_token_id]
    return outputs

GPT2Tokenizer.build_inputs_with_special_tokens = build_inputs_with_special_tokens


encoder_tokenizer = GPT2Tokenizer.from_pretrained("gpt2", cache_dir=model_cache_dir)
encoder_tokenizer.pad_token = encoder_tokenizer.unk_token
# encoder_tokenizer.bos_token = encoder_tokenizer.cls_token  # CLS token will work as BOS token
# encoder_tokenizer.eos_token = encoder_tokenizer.sep_token  # SEP token will work as EOS token

decoder_tokenizer = GPT2Tokenizer.from_pretrained("gpt2", cache_dir=model_cache_dir)
# set pad_token_id to unk_token_id -> be careful here as unk_token_id == eos_token_id == bos_token_id
decoder_tokenizer.pad_token = decoder_tokenizer.unk_token


model.config.decoder_start_token_id = encoder_tokenizer.cls_token_id
model.config.eos_token_id = encoder_tokenizer.sep_token_id
model.config.pad_token_id = encoder_tokenizer.pad_token_id
model.config.vocab_size = model.config.encoder.vocab_size


# set decoding params
model.config.decoder_start_token_id = decoder_tokenizer.bos_token_id
model.config.eos_token_id = decoder_tokenizer.eos_token_id
model.config.max_length = 142
model.config.min_length = 56
model.config.no_repeat_ngram_size = 3
model.early_stopping = True
model.length_penalty = 2.0
model.num_beams = 4

In [8]:
# Freeze decoder parameters
for param in model.decoder.parameters():
    param.requires_grad = False


### 2. Preparing Dataset

In [9]:
# map data correctly
def map_to_encoder_decoder_inputs(batch):    # Tokenizer will automatically set [BOS] <text> [EOS] 
    encoder_length, decoder_length = 512, 128
    inputs = encoder_tokenizer(batch["article"], 
                               padding="max_length", 
                               truncation=True, 
                               max_length=encoder_length)
    outputs = decoder_tokenizer(batch["highlights"], 
                                padding="max_length", 
                                truncation=True, 
                                max_length=decoder_length)
    
    batch["input_ids"] = inputs.input_ids
    batch["attention_mask"] = inputs.attention_mask
    batch["decoder_input_ids"] = outputs.input_ids
    batch["labels"] = outputs.input_ids.copy()
    batch["decoder_attention_mask"] = outputs.attention_mask

    # complicated list comprehension here because pad_token_id alone is not good enough to know whether label should be excluded or not
    batch["labels"] = -100 if batch["decoder_attention_mask"] == 0 else batch["labels"]

    assert len(inputs.input_ids) == encoder_length
    assert len(outputs.input_ids) == decoder_length

    return batch

In [10]:
if os.path.exists(os.path.join(cache_dir, 'preprocessed/train')):
    train_dataset = load_from_disk(os.path.join(cache_dir, 'preprocessed/train'))
else:
    train_dataset = load_dataset("ccdv/cnn_dailymail", "3.0.0", split="train", cache_dir=data_cache_dir)
    train_dataset = train_dataset.map(
        map_to_encoder_decoder_inputs, 
        # batched=True, 
        # batch_size=batch_size, 
        remove_columns=['id', 'article', 'highlights'],
    )
    train_dataset.set_format(
        type="torch", 
        columns=["input_ids", "attention_mask", "decoder_input_ids", "decoder_attention_mask", "labels"],
    )
    
    train_dataset.save_to_disk(os.path.join(cache_dir, 'preprocessed/train'))


if os.path.exists(os.path.join(cache_dir, 'preprocessed/val')):
    val_dataset = load_from_disk(os.path.join(cache_dir, 'preprocessed/val'))
else:
    val_dataset = load_dataset("ccdv/cnn_dailymail", "3.0.0", split="validation", cache_dir=data_cache_dir)
    val_dataset = val_dataset.map(
        map_to_encoder_decoder_inputs, 
        # batched=True, 
        # batch_size=batch_size, 
        remove_columns=['id', 'article', 'highlights'],
    )
    val_dataset.set_format(
        type="torch", 
        columns=["input_ids", "attention_mask", "decoder_input_ids", "decoder_attention_mask", "labels"],
    )
    val_dataset.save_to_disk(os.path.join(cache_dir, 'preprocessed/val'))

### Training Model

In [11]:
# load rouge for validation
rouge = load_metric("rouge")
# rouge = load_metric("rouge", experiment_id=1)

def compute_metrics(pred):
    labels_ids = pred.label_ids
    pred_ids = pred.predictions

    # all unnecessary tokens are removed
    pred_str = decoder_tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
    labels_ids[labels_ids == -100] = decoder_tokenizer.eos_token_id
    label_str = decoder_tokenizer.batch_decode(labels_ids, skip_special_tokens=True)

    rouge_output = rouge.compute(predictions=pred_str, references=label_str, rouge_types=["rouge2"])["rouge2"].mid

    return {
        "rouge2_precision": round(rouge_output.precision, 4),
        "rouge2_recall": round(rouge_output.recall, 4),
        "rouge2_fmeasure": round(rouge_output.fmeasure, 4),
    }

In [None]:
batch_size = 16

# set training arguments - these params are not really tuned, feel free to change
training_args = Seq2SeqTrainingArguments(
    predict_with_generate=True,
    output_dir=os.path.join(checkpoint_dir, "gptONLY"),
    do_train=True,
    do_eval=True,
    # do_predict=True,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    learning_rate=1e-4, 
#     weight_decay=0.0, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0,
    num_train_epochs=100,
    max_steps=-1,
    lr_scheduler_type='cosine',
    
    logging_strategy='steps',
    save_strategy='steps',
    evaluation_strategy='steps',
    logging_steps=1000,
    save_steps=1000,
    eval_steps=1000,
    warmup_steps=10000,
    save_total_limit=10,
    overwrite_output_dir=True,
)

# instantiate trainer
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    compute_metrics=compute_metrics,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

# start training
trainer.train()

***** Running training *****
  Num examples = 28711
  Num Epochs = 100
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 179500
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


Step,Training Loss,Validation Loss,Rouge2 Precision,Rouge2 Recall,Rouge2 Fmeasure
1000,5.0233,3.589779,0.0,0.0,0.0
2000,4.4713,3.203995,0.0,0.0,0.0
3000,4.1948,3.085265,0.0,0.0,0.0
4000,4.0724,3.006499,0.0,0.0,0.0
5000,3.9709,2.895184,0.0002,0.0001,0.0002
6000,3.8102,2.781841,0.0001,0.0001,0.0001
7000,3.6678,2.682241,0.0,0.0001,0.0
8000,3.5773,2.575866,0.0008,0.0016,0.001
9000,3.4203,2.447604,0.0015,0.004,0.0021
10000,3.3019,2.36368,0.0015,0.0039,0.0021


***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-1000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-1000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-1000/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-2000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-2000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-2000/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-3000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-3000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-3000/pytorch_model.bin
***** Runn

Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-10000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-10000/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-11000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-11000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-11000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-1000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-12000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-12000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-12000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache

***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-19000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-19000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-19000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-9000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-20000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-20000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-20000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-10000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/

***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-27000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-27000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-27000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-17000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-28000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-28000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-28000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-18000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache

Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-35000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-35000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-25000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-36000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-36000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-36000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-26000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-37000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-37000/config.json
Model weights saved in /da

Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-33000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-44000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-44000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-44000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-34000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-45000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-45000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-45000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-35000] due to args.save_total_limit
****

***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-52000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-52000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-52000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-42000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-53000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-53000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-53000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-43000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache

Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-60000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-60000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-50000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-61000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-61000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-61000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-51000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-62000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-62000/config.json
Model weights saved in /da

Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-58000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-69000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-69000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-69000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-59000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-70000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-70000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-70000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-60000] due to args.save_total_limit
****

***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-77000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-77000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-77000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-67000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-78000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-78000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-78000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-68000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache

Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-85000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-85000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-75000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-86000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-86000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-86000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-76000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-87000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-87000/config.json
Model weights saved in /da

Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-83000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-94000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-94000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-94000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-84000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-95000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-95000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-95000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-85000] due to args.save_total_limit
****

***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-102000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-102000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-102000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-92000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-103000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-103000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-103000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-93000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoo

***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-110000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-110000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-110000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-100000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-111000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-111000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-111000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-101000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/y

Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-118000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-118000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-108000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-119000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-119000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-119000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-109000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-120000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-120000/config.json
Model weights sav

Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-116000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-127000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-127000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-127000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-117000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-128000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-128000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-128000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-118000] due to args.save_total_l

***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-135000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-135000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-135000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-125000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-136000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-136000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-136000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-126000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/y

***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-143000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-143000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-143000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-133000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-144000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-144000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-144000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-134000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/y

Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-151000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-151000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-141000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-152000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-152000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-152000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-142000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-153000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-153000/config.json
Model weights sav

Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-149000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-160000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-160000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-160000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-150000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-161000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-161000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-161000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-151000] due to args.save_total_l

***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-168000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-168000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-168000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-158000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
Saving model checkpoint to /data4/yoomcache/checkpoint/gptONLY/checkpoint-169000
Configuration saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-169000/config.json
Model weights saved in /data4/yoomcache/checkpoint/gptONLY/checkpoint-169000/pytorch_model.bin
Deleting older checkpoint [/data4/yoomcache/checkpoint/gptONLY/checkpoint-159000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 1337
  Batch size = 16
