In [1]:
from transformers import AutoTokenizer
from transformers import TrainingArguments
from transformers import AdapterTrainer
from datasets import load_dataset, load_from_disk
from transformers import AutoModelForQuestionAnswering
from torch.utils.data import DataLoader
from transformers import default_data_collator
import evaluate
from tqdm import tqdm
from transformers import pipeline

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_checkpoint = "bert-base-multilingual-cased"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

In [3]:
max_length = 512
stride = 128

def preprocess_training_examples(examples):
    questions = [q.strip() for q in examples["question"]]
    context = examples["context"]
    
    # Tokenize question and context together into one input,
    # they will seperable with a special token between them.
    # Tokenizer will also split context into multiple chuncks,
    # if the max_length is exceeded.
    inputs = tokenizer(
        questions,
        context,
        max_length=max_length,
        truncation="only_second",
        stride=stride,
        return_overflowing_tokens=True,
        return_offsets_mapping=True,
        padding="max_length",
    )

    # offset_mapping is the result of the split into
    # multiple chunks
    offset_mapping = inputs.pop("offset_mapping")
    sample_map = inputs.pop("overflow_to_sample_mapping")
    answers = examples["answers"]
    start_positions = []
    end_positions = []

    # This step is required to see if an answer is inside
    # the context chunks or not. It labels the multiple chunks
    # generated by the tokenizer into either not having
    # the answer, or where the answer is located
    for i, offset in enumerate(offset_mapping):
        sample_idx = sample_map[i]
        answer = answers[sample_idx]
        start_char = answer["answer_start"][0]
        end_char = answer["answer_start"][0] + len(answer["text"][0])
        sequence_ids = inputs.sequence_ids(i)

        # Find the start and end of the context
        idx = 0
        while sequence_ids[idx] != 1:
            idx += 1
        context_start = idx
        while sequence_ids[idx] == 1:
            idx += 1
        context_end = idx - 1

        # If the answer is not fully inside the context, label is (0, 0)
        if offset[context_start][0] > start_char or offset[context_end][1] < end_char:
            start_positions.append(0)
            end_positions.append(0)
        else:
            # Otherwise it's the start and end token positions
            idx = context_start
            while idx <= context_end and offset[idx][0] <= start_char:
                idx += 1
            start_positions.append(idx - 1)

            idx = context_end
            while idx >= context_start and offset[idx][1] >= end_char:
                idx -= 1
            end_positions.append(idx + 1)

    inputs["start_positions"] = start_positions
    inputs["end_positions"] = end_positions
    return inputs

In [4]:
def preprocess_validation_examples(examples):
    questions = [q.strip() for q in examples["question"]]
    context = examples["context"]
    
    # It's the same thing as with the preprocess_training_examples
    # tokenizer, but here 
    inputs = tokenizer(
        questions,
        context,
        max_length=max_length,
        truncation="only_second",
        stride=stride,
        return_overflowing_tokens=True,
        return_offsets_mapping=True,
        padding="max_length",
    )

    sample_map = inputs.pop("overflow_to_sample_mapping")
    example_ids = []

    for i in range(len(inputs["input_ids"])):
        sample_idx = sample_map[i]
        example_ids.append(examples["id"][sample_idx])

        sequence_ids = inputs.sequence_ids(i)
        offset = inputs["offset_mapping"][i]
        inputs["offset_mapping"][i] = [
            o if sequence_ids[k] == 1 else None for k, o in enumerate(offset)
        ]

    inputs["example_id"] = example_ids
    return inputs

In [3]:
langs = ["en", "es", "de", "el", "ru", "tr", "ar", "vi", "zh", "hi", "ro", "th"]

In [4]:
model = AutoModelForQuestionAnswering.from_pretrained(model_checkpoint)

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForQuestionAnswering: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at bert-bas

In [10]:
batch_size = 8
epochs = 20

In [11]:
for lang in langs:
    train = load_from_disk(f'../data/xquad_{lang}_train.hf/')
    val = load_from_disk(f'../data/xquad_{lang}_val.hf/')
    train_dataset = train.map(
        preprocess_training_examples,
        batched=True,
        remove_columns=train.column_names,
    )

    validation_dataset = val.map(
        preprocess_validation_examples,
        batched=True,
        remove_columns=val.column_names,
    )
    train_dataset.set_format("torch")
    validation_set = validation_dataset.remove_columns(["example_id", "offset_mapping"])
    validation_set.set_format("torch")

    train_dataloader = DataLoader(
        train_dataset,
        shuffle=True,
        collate_fn=default_data_collator,
        batch_size=8,
    )

    validation_dataloader = DataLoader(
        validation_dataset,
        shuffle=True,
        collate_fn=default_data_collator,
        batch_size=8,
    )
    adapter_name = f'xquad_adapter'
    model.add_adapter(adapter_name)
    # model.load_adapter("../adapter_weights/adapter_qa_en_mbert_4ep/")
    model.train_adapter(adapter_name)
    model.set_active_adapters(adapter_name)
    model_name = model_checkpoint.split("/")[-1]
    args = TrainingArguments(
        f"{model_name}-adapter-xquad",
        evaluation_strategy = "epoch",
        learning_rate=1e-4,
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        num_train_epochs=epochs,
        weight_decay=0.01
    )
    trainer = AdapterTrainer(
        model,
        args,
        train_dataset=train_dataset,
        eval_dataset=validation_dataset,
        data_collator=default_data_collator,
        tokenizer=tokenizer
    )
    trainer.train()
    model.save_adapter(f"../adapter_qa_{lang}_mbert_{epochs}", adapter_name=adapter_name)
    model.set_active_adapters(None)
    model.delete_adapter(adapter_name)
    del train, val

Loading cached processed dataset at ../data/xquad_en_train.hf/cache-c1c761e0b7b0eb11.arrow
Loading cached processed dataset at ../data/xquad_en_val.hf/cache-3ca8755dd58ddcef.arrow
Adding adapter 'xquad_adapter'.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 727
  Num Epochs = 20
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 1820
  5%|▌         | 91/1820 [00:47<14:01,  2.05it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation

{'eval_runtime': 20.4812, 'eval_samples_per_second': 35.496, 'eval_steps_per_second': 4.443, 'epoch': 1.0}


 10%|█         | 182/1820 [01:54<13:36,  2.01it/s] The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 727
  Batch size = 8

 10%|█         | 182/1820 [02:15<13:36,  2.01it/s]

{'eval_runtime': 21.2761, 'eval_samples_per_second': 34.17, 'eval_steps_per_second': 4.277, 'epoch': 2.0}


 15%|█▌        | 273/1820 [03:03<12:51,  2.01it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 727
  Batch size = 8

 15%|█▌        | 273/1820 [03:23<12:51,  2.01it/s]

{'eval_runtime': 20.9088, 'eval_samples_per_second': 34.77, 'eval_steps_per_second': 4.352, 'epoch': 3.0}


 20%|██        | 364/1820 [04:10<11:54,  2.04it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 727
  Batch size = 8

 20%|██        | 364/1820 [04:31<11:54,  2.04it/s]

{'eval_runtime': 21.0355, 'eval_samples_per_second': 34.561, 'eval_steps_per_second': 4.326, 'epoch': 4.0}


 25%|██▌       | 455/1820 [05:18<11:12,  2.03it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 727
  Batch size = 8

 25%|██▌       | 455/1820 [05:39<11:12,  2.03it/s]

{'eval_runtime': 21.0017, 'eval_samples_per_second': 34.616, 'eval_steps_per_second': 4.333, 'epoch': 5.0}


 27%|██▋       | 500/1820 [06:03<11:25,  1.92it/s]  Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-500
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapt

{'loss': 2.4944, 'learning_rate': 7.252747252747253e-05, 'epoch': 5.49}


 30%|███       | 546/1820 [06:27<10:26,  2.03it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 727
  Batch size = 8

 30%|███       | 546/1820 [06:48<10:26,  2.03it/s]

{'eval_runtime': 21.5247, 'eval_samples_per_second': 33.775, 'eval_steps_per_second': 4.228, 'epoch': 6.0}


 35%|███▌      | 637/1820 [07:36<09:47,  2.01it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 727
  Batch size = 8

 35%|███▌      | 637/1820 [07:57<09:47,  2.01it/s]

{'eval_runtime': 21.0269, 'eval_samples_per_second': 34.575, 'eval_steps_per_second': 4.328, 'epoch': 7.0}


 40%|████      | 728/1820 [08:44<08:56,  2.03it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 727
  Batch size = 8

 40%|████      | 728/1820 [09:05<08:56,  2.03it/s]

{'eval_runtime': 20.9718, 'eval_samples_per_second': 34.666, 'eval_steps_per_second': 4.339, 'epoch': 8.0}


 45%|████▌     | 819/1820 [09:53<08:18,  2.01it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 727
  Batch size = 8

 45%|████▌     | 819/1820 [10:14<08:18,  2.01it/s]

{'eval_runtime': 21.0443, 'eval_samples_per_second': 34.546, 'eval_steps_per_second': 4.324, 'epoch': 9.0}


 50%|█████     | 910/1820 [11:02<07:27,  2.03it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 727
  Batch size = 8

 50%|█████     | 910/1820 [11:23<07:27,  2.03it/s]

{'eval_runtime': 21.4546, 'eval_samples_per_second': 33.885, 'eval_steps_per_second': 4.242, 'epoch': 10.0}


 55%|█████▍    | 1000/1820 [12:11<07:19,  1.87it/s] Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-1000
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cas

{'loss': 1.0786, 'learning_rate': 4.505494505494506e-05, 'epoch': 10.99}


 55%|█████▌    | 1001/1820 [12:12<08:26,  1.62it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 727
  Batch size = 8

 55%|█████▌    | 1001/1820 [12:34<08:26,  1.62it/s]

{'eval_runtime': 21.7383, 'eval_samples_per_second': 33.443, 'eval_steps_per_second': 4.186, 'epoch': 11.0}


 60%|██████    | 1092/1820 [13:21<05:59,  2.02it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 727
  Batch size = 8

 60%|██████    | 1092/1820 [13:42<05:59,  2.02it/s]

{'eval_runtime': 21.007, 'eval_samples_per_second': 34.607, 'eval_steps_per_second': 4.332, 'epoch': 12.0}


 65%|██████▌   | 1183/1820 [14:29<05:15,  2.02it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 727
  Batch size = 8

 65%|██████▌   | 1183/1820 [14:50<05:15,  2.02it/s]

{'eval_runtime': 20.9944, 'eval_samples_per_second': 34.628, 'eval_steps_per_second': 4.334, 'epoch': 13.0}


 70%|███████   | 1274/1820 [15:37<04:28,  2.04it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 727
  Batch size = 8

 70%|███████   | 1274/1820 [15:58<04:28,  2.04it/s]

{'eval_runtime': 21.0002, 'eval_samples_per_second': 34.619, 'eval_steps_per_second': 4.333, 'epoch': 14.0}


 75%|███████▌  | 1365/1820 [16:45<03:44,  2.03it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 727
  Batch size = 8

 75%|███████▌  | 1365/1820 [17:06<03:44,  2.03it/s]

{'eval_runtime': 21.1313, 'eval_samples_per_second': 34.404, 'eval_steps_per_second': 4.306, 'epoch': 15.0}


 80%|████████  | 1456/1820 [17:53<02:59,  2.03it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 727
  Batch size = 8

 80%|████████  | 1456/1820 [18:14<02:59,  2.03it/s]

{'eval_runtime': 21.204, 'eval_samples_per_second': 34.286, 'eval_steps_per_second': 4.292, 'epoch': 16.0}


 82%|████████▏ | 1500/1820 [18:37<02:44,  1.95it/s]Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-1500
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-case

{'loss': 0.6487, 'learning_rate': 1.7582417582417584e-05, 'epoch': 16.48}


 85%|████████▌ | 1547/1820 [19:02<02:20,  1.95it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 727
  Batch size = 8

 85%|████████▌ | 1547/1820 [19:24<02:20,  1.95it/s]

{'eval_runtime': 21.6293, 'eval_samples_per_second': 33.612, 'eval_steps_per_second': 4.207, 'epoch': 17.0}


 90%|█████████ | 1638/1820 [20:11<01:29,  2.03it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 727
  Batch size = 8

 90%|█████████ | 1638/1820 [20:32<01:29,  2.03it/s]

{'eval_runtime': 21.0649, 'eval_samples_per_second': 34.512, 'eval_steps_per_second': 4.32, 'epoch': 18.0}


 95%|█████████▌| 1729/1820 [21:19<00:44,  2.03it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 727
  Batch size = 8

 95%|█████████▌| 1729/1820 [21:40<00:44,  2.03it/s]

{'eval_runtime': 20.9678, 'eval_samples_per_second': 34.672, 'eval_steps_per_second': 4.34, 'epoch': 19.0}


100%|██████████| 1820/1820 [22:27<00:00,  2.03it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 727
  Batch size = 8

100%|██████████| 1820/1820 [22:48<00:00,  2.03it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


100%|██████████| 1820/1820 [22:49<00:00,  1.33it/s]
Configuration saved in ../adapter_qa_en_mbert_20/adapter_config.json
Module weights saved in ../adapter_qa_en_mbert_20/pytorch_adapter.bin


{'eval_runtime': 21.1304, 'eval_samples_per_second': 34.405, 'eval_steps_per_second': 4.307, 'epoch': 20.0}
{'train_runtime': 1369.0062, 'train_samples_per_second': 10.621, 'train_steps_per_second': 1.329, 'train_loss': 1.2483331617418227, 'epoch': 20.0}


Configuration saved in ../adapter_qa_en_mbert_20/head_config.json
Module weights saved in ../adapter_qa_en_mbert_20/pytorch_model_head.bin
Loading cached processed dataset at ../data/xquad_es_train.hf/cache-c25258d74683ab4e.arrow
Loading cached processed dataset at ../data/xquad_es_val.hf/cache-7f4c7e3071f544fb.arrow
Adding adapter 'xquad_adapter'.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 729
  Num Epochs = 20
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 1840
  5%|▍         | 91/1840 [00:46<15:00,  1.94it/s]The following columns in the evaluation set  don't have

{'eval_runtime': 21.2252, 'eval_samples_per_second': 34.346, 'eval_steps_per_second': 4.334, 'epoch': 1.0}


 10%|▉         | 183/1840 [01:55<14:13,  1.94it/s] The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 10%|█         | 184/1840 [02:16<14:13,  1.94it/s]

{'eval_runtime': 21.3739, 'eval_samples_per_second': 34.107, 'eval_steps_per_second': 4.304, 'epoch': 2.0}


 15%|█▍        | 275/1840 [03:04<13:23,  1.95it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 15%|█▌        | 276/1840 [03:26<13:22,  1.95it/s]

{'eval_runtime': 21.7965, 'eval_samples_per_second': 33.446, 'eval_steps_per_second': 4.221, 'epoch': 3.0}


 20%|█▉        | 367/1840 [04:13<12:47,  1.92it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 20%|██        | 368/1840 [04:35<12:47,  1.92it/s]

{'eval_runtime': 22.1614, 'eval_samples_per_second': 32.895, 'eval_steps_per_second': 4.151, 'epoch': 4.0}


 25%|██▍       | 459/1840 [05:22<11:51,  1.94it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 25%|██▌       | 460/1840 [05:44<11:50,  1.94it/s]

{'eval_runtime': 21.2836, 'eval_samples_per_second': 34.252, 'eval_steps_per_second': 4.323, 'epoch': 5.0}


 27%|██▋       | 500/1840 [06:04<11:27,  1.95it/s]  Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-500
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapt

{'loss': 2.5303, 'learning_rate': 7.282608695652175e-05, 'epoch': 5.43}


 30%|██▉       | 551/1840 [06:31<11:04,  1.94it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 30%|███       | 552/1840 [06:53<11:04,  1.94it/s]

{'eval_runtime': 21.6238, 'eval_samples_per_second': 33.713, 'eval_steps_per_second': 4.255, 'epoch': 6.0}


 35%|███▍      | 643/1840 [07:40<10:28,  1.90it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 35%|███▌      | 644/1840 [08:02<10:28,  1.90it/s]

{'eval_runtime': 22.2013, 'eval_samples_per_second': 32.836, 'eval_steps_per_second': 4.144, 'epoch': 7.0}


 40%|███▉      | 735/1840 [08:51<09:59,  1.84it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 40%|████      | 736/1840 [09:14<09:58,  1.84it/s]

{'eval_runtime': 22.3321, 'eval_samples_per_second': 32.644, 'eval_steps_per_second': 4.12, 'epoch': 8.0}


 45%|████▍     | 827/1840 [10:02<09:18,  1.81it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 45%|████▌     | 828/1840 [10:24<09:17,  1.81it/s]

{'eval_runtime': 21.7696, 'eval_samples_per_second': 33.487, 'eval_steps_per_second': 4.226, 'epoch': 9.0}


 50%|████▉     | 919/1840 [11:12<07:59,  1.92it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 50%|█████     | 920/1840 [11:34<07:59,  1.92it/s]

{'eval_runtime': 22.2101, 'eval_samples_per_second': 32.823, 'eval_steps_per_second': 4.142, 'epoch': 10.0}


 54%|█████▍    | 1000/1840 [12:17<07:21,  1.90it/s] Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-1000
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/adapter_config.json


{'loss': 1.2245, 'learning_rate': 4.565217391304348e-05, 'epoch': 10.87}


Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/pytorch_model_head.bin
tokenizer config file saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/tokenizer_config.json
Special tokens file saved in bert-base-multilingual-cased-adapter-xquad/

{'eval_runtime': 22.0453, 'eval_samples_per_second': 33.068, 'eval_steps_per_second': 4.173, 'epoch': 11.0}


 60%|█████▉    | 1103/1840 [13:34<06:33,  1.87it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 60%|██████    | 1104/1840 [13:56<06:33,  1.87it/s]

{'eval_runtime': 21.7033, 'eval_samples_per_second': 33.589, 'eval_steps_per_second': 4.239, 'epoch': 12.0}


 65%|██████▍   | 1195/1840 [14:43<05:34,  1.93it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 65%|██████▌   | 1196/1840 [15:05<05:34,  1.93it/s]

{'eval_runtime': 21.7095, 'eval_samples_per_second': 33.58, 'eval_steps_per_second': 4.238, 'epoch': 13.0}


 70%|██████▉   | 1287/1840 [15:53<04:56,  1.86it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 70%|███████   | 1288/1840 [16:15<04:56,  1.86it/s]

{'eval_runtime': 21.7754, 'eval_samples_per_second': 33.478, 'eval_steps_per_second': 4.225, 'epoch': 14.0}


 75%|███████▍  | 1379/1840 [17:04<04:00,  1.92it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 75%|███████▌  | 1380/1840 [17:26<03:59,  1.92it/s]

{'eval_runtime': 21.6242, 'eval_samples_per_second': 33.712, 'eval_steps_per_second': 4.254, 'epoch': 15.0}


 80%|███████▉  | 1471/1840 [18:14<03:14,  1.89it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 80%|████████  | 1472/1840 [18:36<03:14,  1.89it/s]

{'eval_runtime': 22.2083, 'eval_samples_per_second': 32.825, 'eval_steps_per_second': 4.143, 'epoch': 16.0}


 82%|████████▏ | 1500/1840 [18:51<02:58,  1.90it/s]Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-1500
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-case

{'loss': 0.7933, 'learning_rate': 1.8478260869565216e-05, 'epoch': 16.3}


 85%|████████▍ | 1563/1840 [19:24<02:24,  1.91it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 85%|████████▌ | 1564/1840 [19:46<02:24,  1.91it/s]

{'eval_runtime': 21.6817, 'eval_samples_per_second': 33.623, 'eval_steps_per_second': 4.243, 'epoch': 17.0}


 90%|████████▉ | 1655/1840 [20:34<01:36,  1.92it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 90%|█████████ | 1656/1840 [20:55<01:35,  1.92it/s]

{'eval_runtime': 21.6, 'eval_samples_per_second': 33.75, 'eval_steps_per_second': 4.259, 'epoch': 18.0}


 95%|█████████▍| 1747/1840 [21:43<00:49,  1.88it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 95%|█████████▌| 1748/1840 [22:05<00:48,  1.88it/s]

{'eval_runtime': 21.6495, 'eval_samples_per_second': 33.673, 'eval_steps_per_second': 4.25, 'epoch': 19.0}


100%|█████████▉| 1839/1840 [22:53<00:00,  1.93it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

100%|██████████| 1840/1840 [23:15<00:00,  1.93it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


100%|██████████| 1840/1840 [23:15<00:00,  1.32it/s]
Configuration saved in ../adapter_qa_es_mbert_20/adapter_config.json
Module weights saved in ../adapter_qa_es_mbert_20/pytorch_adapter.bin
Configuration saved in ../adapter_qa_es_mbert_20/head_config.json
Module weights saved in ../adapter_qa_es_mbert_20/pytorch_model_head.bin
Loading cached processed dataset at ../data/xquad_de_train.hf/cache-97d89ce126cdf80b.arrow
Loading cached processed dataset at ../data/xquad_de_val.hf/cache-3a8eced574c15597.arrow
Adding adapter 'xquad_adapter'.


{'eval_runtime': 21.9732, 'eval_samples_per_second': 33.177, 'eval_steps_per_second': 4.187, 'epoch': 20.0}
{'train_runtime': 1395.4391, 'train_samples_per_second': 10.448, 'train_steps_per_second': 1.319, 'train_loss': 1.356510759436566, 'epoch': 20.0}


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 731
  Num Epochs = 20
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 1840
  5%|▌         | 92/1840 [00:47<12:30,  2.33it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 731
  Batch size = 8

  5%|▌         | 92/1840 [01:09<12:30,  2.33it/s]

{'eval_runtime': 21.5071, 'eval_samples_per_second': 33.989, 'eval_steps_per_second': 4.278, 'epoch': 1.0}


 10%|█         | 184/1840 [01:57<12:04,  2.29it/s] The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 731
  Batch size = 8

 10%|█         | 184/1840 [02:18<12:04,  2.29it/s]

{'eval_runtime': 21.5859, 'eval_samples_per_second': 33.865, 'eval_steps_per_second': 4.262, 'epoch': 2.0}


 15%|█▌        | 276/1840 [03:06<11:04,  2.35it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 731
  Batch size = 8

 15%|█▌        | 276/1840 [03:28<11:04,  2.35it/s]

{'eval_runtime': 21.5514, 'eval_samples_per_second': 33.919, 'eval_steps_per_second': 4.269, 'epoch': 3.0}


 20%|██        | 368/1840 [04:16<10:34,  2.32it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 731
  Batch size = 8

 20%|██        | 368/1840 [04:38<10:34,  2.32it/s]

{'eval_runtime': 21.9424, 'eval_samples_per_second': 33.315, 'eval_steps_per_second': 4.193, 'epoch': 4.0}


 25%|██▌       | 460/1840 [05:25<10:01,  2.29it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 731
  Batch size = 8

 25%|██▌       | 460/1840 [05:47<10:01,  2.29it/s]

{'eval_runtime': 21.4859, 'eval_samples_per_second': 34.022, 'eval_steps_per_second': 4.282, 'epoch': 5.0}


 27%|██▋       | 500/1840 [06:08<11:29,  1.94it/s]  Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-500
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapt

{'loss': 2.5567, 'learning_rate': 7.282608695652175e-05, 'epoch': 5.43}


 30%|███       | 552/1840 [06:35<09:09,  2.35it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 731
  Batch size = 8

 30%|███       | 552/1840 [06:56<09:09,  2.35it/s]

{'eval_runtime': 21.4432, 'eval_samples_per_second': 34.09, 'eval_steps_per_second': 4.29, 'epoch': 6.0}


 35%|███▌      | 644/1840 [07:44<08:26,  2.36it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 731
  Batch size = 8

 35%|███▌      | 644/1840 [08:06<08:26,  2.36it/s]

{'eval_runtime': 21.9605, 'eval_samples_per_second': 33.287, 'eval_steps_per_second': 4.189, 'epoch': 7.0}


 40%|████      | 736/1840 [08:54<08:04,  2.28it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 731
  Batch size = 8

 40%|████      | 736/1840 [09:16<08:04,  2.28it/s]

{'eval_runtime': 21.6429, 'eval_samples_per_second': 33.776, 'eval_steps_per_second': 4.251, 'epoch': 8.0}


 45%|████▌     | 828/1840 [10:04<07:25,  2.27it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 731
  Batch size = 8

 45%|████▌     | 828/1840 [10:25<07:25,  2.27it/s]

{'eval_runtime': 21.6577, 'eval_samples_per_second': 33.752, 'eval_steps_per_second': 4.248, 'epoch': 9.0}


 50%|█████     | 920/1840 [11:14<06:45,  2.27it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 731
  Batch size = 8

 50%|█████     | 920/1840 [11:36<06:45,  2.27it/s]

{'eval_runtime': 22.19, 'eval_samples_per_second': 32.943, 'eval_steps_per_second': 4.146, 'epoch': 10.0}


 54%|█████▍    | 1000/1840 [12:18<07:33,  1.85it/s] Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-1000
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cas

{'loss': 1.3108, 'learning_rate': 4.565217391304348e-05, 'epoch': 10.87}


 55%|█████▌    | 1012/1840 [12:25<06:10,  2.23it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 731
  Batch size = 8

 55%|█████▌    | 1012/1840 [12:47<06:10,  2.23it/s]

{'eval_runtime': 22.2085, 'eval_samples_per_second': 32.915, 'eval_steps_per_second': 4.143, 'epoch': 11.0}


 60%|██████    | 1104/1840 [13:35<05:23,  2.28it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 731
  Batch size = 8

 60%|██████    | 1104/1840 [13:57<05:23,  2.28it/s]

{'eval_runtime': 21.8416, 'eval_samples_per_second': 33.468, 'eval_steps_per_second': 4.212, 'epoch': 12.0}


 65%|██████▌   | 1196/1840 [14:45<04:45,  2.25it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 731
  Batch size = 8

 65%|██████▌   | 1196/1840 [15:07<04:45,  2.25it/s]

{'eval_runtime': 21.7846, 'eval_samples_per_second': 33.556, 'eval_steps_per_second': 4.223, 'epoch': 13.0}


 70%|███████   | 1288/1840 [15:56<04:05,  2.24it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 731
  Batch size = 8

 70%|███████   | 1288/1840 [16:18<04:05,  2.24it/s]

{'eval_runtime': 21.9628, 'eval_samples_per_second': 33.283, 'eval_steps_per_second': 4.189, 'epoch': 14.0}


 75%|███████▌  | 1380/1840 [17:07<03:17,  2.33it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 731
  Batch size = 8

 75%|███████▌  | 1380/1840 [17:29<03:17,  2.33it/s]

{'eval_runtime': 21.8086, 'eval_samples_per_second': 33.519, 'eval_steps_per_second': 4.219, 'epoch': 15.0}


 80%|████████  | 1472/1840 [18:17<02:36,  2.34it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 731
  Batch size = 8

 80%|████████  | 1472/1840 [18:39<02:36,  2.34it/s]

{'eval_runtime': 21.8371, 'eval_samples_per_second': 33.475, 'eval_steps_per_second': 4.213, 'epoch': 16.0}


 82%|████████▏ | 1500/1840 [18:54<03:01,  1.87it/s]Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-1500
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-case

{'loss': 0.8337, 'learning_rate': 1.8478260869565216e-05, 'epoch': 16.3}


 85%|████████▌ | 1564/1840 [19:27<01:57,  2.35it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 731
  Batch size = 8

 85%|████████▌ | 1564/1840 [19:49<01:57,  2.35it/s]

{'eval_runtime': 21.6974, 'eval_samples_per_second': 33.691, 'eval_steps_per_second': 4.24, 'epoch': 17.0}


 90%|█████████ | 1656/1840 [20:38<01:18,  2.34it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 731
  Batch size = 8

 90%|█████████ | 1656/1840 [21:00<01:18,  2.34it/s]

{'eval_runtime': 21.9958, 'eval_samples_per_second': 33.234, 'eval_steps_per_second': 4.183, 'epoch': 18.0}


 95%|█████████▌| 1748/1840 [21:48<00:38,  2.36it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 731
  Batch size = 8

 95%|█████████▌| 1748/1840 [22:09<00:38,  2.36it/s]

{'eval_runtime': 21.2721, 'eval_samples_per_second': 34.364, 'eval_steps_per_second': 4.325, 'epoch': 19.0}


100%|██████████| 1840/1840 [22:56<00:00,  2.36it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 731
  Batch size = 8

100%|██████████| 1840/1840 [23:17<00:00,  2.36it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


100%|██████████| 1840/1840 [23:17<00:00,  1.32it/s]
Configuration saved in ../adapter_qa_de_mbert_20/adapter_config.json
Module weights saved in ../adapter_qa_de_mbert_20/pytorch_adapter.bin
Configuration saved in ../adapter_qa_de_mbert_20/head_config.json
Module weights saved in ../adapter_qa_de_mbert_20/pytorch_model_head.bin
Loading cached processed dataset at ../data/xquad_el_train.hf/cache-f51e0063d7391f8c.arrow


{'eval_runtime': 21.2535, 'eval_samples_per_second': 34.394, 'eval_steps_per_second': 4.329, 'epoch': 20.0}
{'train_runtime': 1397.9125, 'train_samples_per_second': 10.458, 'train_steps_per_second': 1.316, 'train_loss': 1.4007206875344982, 'epoch': 20.0}


Loading cached processed dataset at ../data/xquad_el_val.hf/cache-2b9876c108889bbb.arrow
Adding adapter 'xquad_adapter'.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 858
  Num Epochs = 20
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 2160
  5%|▌         | 108/2160 [00:55<14:33,  2.35it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 858
  Batch size = 8

  5%|▌         | 108/2160 [01:21<14:33,  2.3

{'eval_runtime': 25.8493, 'eval_samples_per_second': 33.192, 'eval_steps_per_second': 4.178, 'epoch': 1.0}


 10%|█         | 216/2160 [02:16<13:17,  2.44it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 858
  Batch size = 8

 10%|█         | 216/2160 [02:41<13:17,  2.44it/s]

{'eval_runtime': 25.0229, 'eval_samples_per_second': 34.289, 'eval_steps_per_second': 4.316, 'epoch': 2.0}


 15%|█▌        | 324/2160 [03:38<12:34,  2.43it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 858
  Batch size = 8

 15%|█▌        | 324/2160 [04:04<12:34,  2.43it/s]

{'eval_runtime': 25.3975, 'eval_samples_per_second': 33.783, 'eval_steps_per_second': 4.252, 'epoch': 3.0}


 20%|██        | 432/2160 [05:01<12:13,  2.36it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 858
  Batch size = 8

 20%|██        | 432/2160 [05:27<12:13,  2.36it/s]

{'eval_runtime': 25.7177, 'eval_samples_per_second': 33.362, 'eval_steps_per_second': 4.199, 'epoch': 4.0}


 23%|██▎       | 500/2160 [06:03<14:43,  1.88it/s]  Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-500
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapt

{'loss': 2.5543, 'learning_rate': 7.685185185185185e-05, 'epoch': 4.63}


 25%|██▌       | 540/2160 [06:24<11:24,  2.37it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 858
  Batch size = 8

 25%|██▌       | 540/2160 [06:50<11:24,  2.37it/s]

{'eval_runtime': 25.721, 'eval_samples_per_second': 33.358, 'eval_steps_per_second': 4.199, 'epoch': 5.0}


 30%|███       | 648/2160 [07:46<10:35,  2.38it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 858
  Batch size = 8

 30%|███       | 648/2160 [08:12<10:35,  2.38it/s]

{'eval_runtime': 25.7013, 'eval_samples_per_second': 33.384, 'eval_steps_per_second': 4.202, 'epoch': 6.0}


 35%|███▌      | 756/2160 [09:09<09:55,  2.36it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 858
  Batch size = 8

 35%|███▌      | 756/2160 [09:34<09:55,  2.36it/s]

{'eval_runtime': 25.542, 'eval_samples_per_second': 33.592, 'eval_steps_per_second': 4.228, 'epoch': 7.0}


 40%|████      | 864/2160 [10:31<09:01,  2.39it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 858
  Batch size = 8

 40%|████      | 864/2160 [10:56<09:01,  2.39it/s]

{'eval_runtime': 25.5721, 'eval_samples_per_second': 33.552, 'eval_steps_per_second': 4.223, 'epoch': 8.0}


 45%|████▌     | 972/2160 [11:53<08:27,  2.34it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 858
  Batch size = 8

 45%|████▌     | 972/2160 [12:19<08:27,  2.34it/s]

{'eval_runtime': 25.9595, 'eval_samples_per_second': 33.051, 'eval_steps_per_second': 4.16, 'epoch': 9.0}


 46%|████▋     | 1000/2160 [12:34<10:29,  1.84it/s] Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-1000
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cas

{'loss': 1.3939, 'learning_rate': 5.370370370370371e-05, 'epoch': 9.26}


 50%|█████     | 1080/2160 [13:18<07:38,  2.36it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 858
  Batch size = 8

 50%|█████     | 1080/2160 [13:44<07:38,  2.36it/s]

{'eval_runtime': 26.0177, 'eval_samples_per_second': 32.978, 'eval_steps_per_second': 4.151, 'epoch': 10.0}


 55%|█████▌    | 1188/2160 [14:42<06:56,  2.33it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 858
  Batch size = 8

 55%|█████▌    | 1188/2160 [15:08<06:56,  2.33it/s]

{'eval_runtime': 26.3246, 'eval_samples_per_second': 32.593, 'eval_steps_per_second': 4.103, 'epoch': 11.0}


 60%|██████    | 1296/2160 [16:04<05:57,  2.42it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 858
  Batch size = 8

 60%|██████    | 1296/2160 [16:30<05:57,  2.42it/s]

{'eval_runtime': 25.6717, 'eval_samples_per_second': 33.422, 'eval_steps_per_second': 4.207, 'epoch': 12.0}


 65%|██████▌   | 1404/2160 [17:28<05:28,  2.30it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 858
  Batch size = 8

 65%|██████▌   | 1404/2160 [17:54<05:28,  2.30it/s]

{'eval_runtime': 26.2437, 'eval_samples_per_second': 32.694, 'eval_steps_per_second': 4.115, 'epoch': 13.0}


 69%|██████▉   | 1500/2160 [18:45<05:40,  1.94it/s]  Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-1500
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-ca

{'loss': 0.9669, 'learning_rate': 3.055555555555556e-05, 'epoch': 13.89}


 70%|███████   | 1512/2160 [18:51<04:29,  2.40it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 858
  Batch size = 8

 70%|███████   | 1512/2160 [19:16<04:29,  2.40it/s]

{'eval_runtime': 25.4564, 'eval_samples_per_second': 33.705, 'eval_steps_per_second': 4.243, 'epoch': 14.0}


 75%|███████▌  | 1620/2160 [20:12<03:45,  2.39it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 858
  Batch size = 8

 75%|███████▌  | 1620/2160 [20:38<03:45,  2.39it/s]

{'eval_runtime': 25.2269, 'eval_samples_per_second': 34.011, 'eval_steps_per_second': 4.281, 'epoch': 15.0}


 80%|████████  | 1728/2160 [21:33<02:58,  2.42it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 858
  Batch size = 8

 80%|████████  | 1728/2160 [21:59<02:58,  2.42it/s]

{'eval_runtime': 25.5012, 'eval_samples_per_second': 33.645, 'eval_steps_per_second': 4.235, 'epoch': 16.0}


 85%|████████▌ | 1836/2160 [22:54<02:13,  2.43it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 858
  Batch size = 8

 85%|████████▌ | 1836/2160 [23:20<02:13,  2.43it/s]

{'eval_runtime': 25.0899, 'eval_samples_per_second': 34.197, 'eval_steps_per_second': 4.305, 'epoch': 17.0}


 90%|█████████ | 1944/2160 [24:15<01:29,  2.42it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 858
  Batch size = 8

 90%|█████████ | 1944/2160 [24:40<01:29,  2.42it/s]

{'eval_runtime': 25.2053, 'eval_samples_per_second': 34.04, 'eval_steps_per_second': 4.285, 'epoch': 18.0}


 93%|█████████▎| 2000/2160 [25:09<01:24,  1.89it/s]Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-2000
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-2000/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-2000/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-2000/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-2000/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-2000/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-2000/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-2000/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-case

{'loss': 0.7411, 'learning_rate': 7.4074074074074075e-06, 'epoch': 18.52}


 95%|█████████▌| 2052/2160 [25:37<00:44,  2.44it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 858
  Batch size = 8

 95%|█████████▌| 2052/2160 [26:03<00:44,  2.44it/s]

{'eval_runtime': 25.6424, 'eval_samples_per_second': 33.46, 'eval_steps_per_second': 4.212, 'epoch': 19.0}


100%|██████████| 2160/2160 [26:58<00:00,  2.45it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 858
  Batch size = 8

100%|██████████| 2160/2160 [27:23<00:00,  2.45it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


100%|██████████| 2160/2160 [27:23<00:00,  1.31it/s]
Configuration saved in ../adapter_qa_el_mbert_20/adapter_config.json
Module weights saved in ../adapter_qa_el_mbert_20/pytorch_adapter.bin
Configuration saved in ../adapter_qa_el_mbert_20/head_config.json
Module weights saved in ../adapter_qa_el_mbert_20/pytorch_model_head.bin
Loading cached processed dataset at ../data/xquad_ru_train.hf/cache-6d4c583527dd1084.arrow
Loading cached processed dataset at ../data/xquad_ru_val.hf/cache-0ba3109efe29e996.arrow
Adding adapter 'xquad_adapter'.


{'eval_runtime': 25.0336, 'eval_samples_per_second': 34.274, 'eval_steps_per_second': 4.314, 'epoch': 20.0}
{'train_runtime': 1643.63, 'train_samples_per_second': 10.44, 'train_steps_per_second': 1.314, 'train_loss': 1.3595154726946796, 'epoch': 20.0}


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 750
  Num Epochs = 20
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 1880
  5%|▌         | 94/1880 [00:48<14:14,  2.09it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

  5%|▌         | 94/1880 [01:09<14:14,  2.09it/s]

{'eval_runtime': 21.7546, 'eval_samples_per_second': 34.475, 'eval_steps_per_second': 4.321, 'epoch': 1.0}


 10%|█         | 188/1880 [01:58<13:27,  2.09it/s] The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 10%|█         | 188/1880 [02:20<13:27,  2.09it/s]

{'eval_runtime': 21.9011, 'eval_samples_per_second': 34.245, 'eval_steps_per_second': 4.292, 'epoch': 2.0}


 15%|█▌        | 282/1880 [03:08<12:46,  2.08it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 15%|█▌        | 282/1880 [03:30<12:46,  2.08it/s]

{'eval_runtime': 21.9665, 'eval_samples_per_second': 34.143, 'eval_steps_per_second': 4.279, 'epoch': 3.0}


 20%|██        | 376/1880 [04:18<12:00,  2.09it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 20%|██        | 376/1880 [04:40<12:00,  2.09it/s]

{'eval_runtime': 21.7691, 'eval_samples_per_second': 34.453, 'eval_steps_per_second': 4.318, 'epoch': 4.0}


 25%|██▌       | 470/1880 [05:29<11:12,  2.10it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 25%|██▌       | 470/1880 [05:50<11:12,  2.10it/s]

{'eval_runtime': 21.7459, 'eval_samples_per_second': 34.489, 'eval_steps_per_second': 4.323, 'epoch': 5.0}


 27%|██▋       | 500/1880 [06:06<11:49,  1.95it/s]  Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-500
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapt

{'loss': 2.5432, 'learning_rate': 7.340425531914894e-05, 'epoch': 5.32}


 30%|███       | 564/1880 [06:39<10:47,  2.03it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 30%|███       | 564/1880 [07:01<10:47,  2.03it/s]

{'eval_runtime': 21.8122, 'eval_samples_per_second': 34.384, 'eval_steps_per_second': 4.31, 'epoch': 6.0}


 35%|███▌      | 658/1880 [07:50<09:44,  2.09it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 35%|███▌      | 658/1880 [08:11<09:44,  2.09it/s]

{'eval_runtime': 21.8367, 'eval_samples_per_second': 34.346, 'eval_steps_per_second': 4.305, 'epoch': 7.0}


 40%|████      | 752/1880 [09:00<08:59,  2.09it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 40%|████      | 752/1880 [09:22<08:59,  2.09it/s]

{'eval_runtime': 21.826, 'eval_samples_per_second': 34.363, 'eval_steps_per_second': 4.307, 'epoch': 8.0}


 45%|████▌     | 846/1880 [10:10<08:17,  2.08it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 45%|████▌     | 846/1880 [10:32<08:17,  2.08it/s]

{'eval_runtime': 21.9267, 'eval_samples_per_second': 34.205, 'eval_steps_per_second': 4.287, 'epoch': 9.0}


 50%|█████     | 940/1880 [11:21<07:29,  2.09it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 50%|█████     | 940/1880 [11:42<07:29,  2.09it/s]

{'eval_runtime': 21.7894, 'eval_samples_per_second': 34.42, 'eval_steps_per_second': 4.314, 'epoch': 10.0}


 53%|█████▎    | 1000/1880 [12:13<07:32,  1.94it/s] Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-1000
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cas

{'loss': 1.2566, 'learning_rate': 4.680851063829788e-05, 'epoch': 10.64}


 55%|█████▌    | 1034/1880 [12:31<06:43,  2.10it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 55%|█████▌    | 1034/1880 [12:53<06:43,  2.10it/s]

{'eval_runtime': 21.7147, 'eval_samples_per_second': 34.539, 'eval_steps_per_second': 4.329, 'epoch': 11.0}


 60%|██████    | 1128/1880 [13:41<05:59,  2.09it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 60%|██████    | 1128/1880 [14:03<05:59,  2.09it/s]

{'eval_runtime': 21.7412, 'eval_samples_per_second': 34.497, 'eval_steps_per_second': 4.324, 'epoch': 12.0}


 65%|██████▌   | 1222/1880 [14:51<05:13,  2.10it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 65%|██████▌   | 1222/1880 [15:12<05:13,  2.10it/s]

{'eval_runtime': 21.6831, 'eval_samples_per_second': 34.589, 'eval_steps_per_second': 4.335, 'epoch': 13.0}


 70%|███████   | 1316/1880 [16:00<04:27,  2.11it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 70%|███████   | 1316/1880 [16:22<04:27,  2.11it/s]

{'eval_runtime': 21.6338, 'eval_samples_per_second': 34.668, 'eval_steps_per_second': 4.345, 'epoch': 14.0}


 75%|███████▌  | 1410/1880 [17:10<03:42,  2.11it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 75%|███████▌  | 1410/1880 [17:32<03:42,  2.11it/s]

{'eval_runtime': 21.618, 'eval_samples_per_second': 34.693, 'eval_steps_per_second': 4.348, 'epoch': 15.0}


 80%|███████▉  | 1500/1880 [18:18<03:13,  1.96it/s]Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-1500
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-case

{'loss': 0.7804, 'learning_rate': 2.0212765957446807e-05, 'epoch': 15.96}


 80%|████████  | 1504/1880 [18:20<03:12,  1.96it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 80%|████████  | 1504/1880 [18:42<03:12,  1.96it/s]

{'eval_runtime': 21.6736, 'eval_samples_per_second': 34.604, 'eval_steps_per_second': 4.337, 'epoch': 16.0}


 85%|████████▌ | 1598/1880 [19:28<02:06,  2.22it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 85%|████████▌ | 1598/1880 [19:49<02:06,  2.22it/s]

{'eval_runtime': 20.4979, 'eval_samples_per_second': 36.589, 'eval_steps_per_second': 4.586, 'epoch': 17.0}


 90%|█████████ | 1692/1880 [20:34<01:24,  2.22it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 90%|█████████ | 1692/1880 [20:55<01:24,  2.22it/s]

{'eval_runtime': 20.5009, 'eval_samples_per_second': 36.584, 'eval_steps_per_second': 4.585, 'epoch': 18.0}


 95%|█████████▌| 1786/1880 [21:40<00:42,  2.22it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 95%|█████████▌| 1786/1880 [22:01<00:42,  2.22it/s]

{'eval_runtime': 20.4981, 'eval_samples_per_second': 36.589, 'eval_steps_per_second': 4.586, 'epoch': 19.0}


100%|██████████| 1880/1880 [22:46<00:00,  2.22it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

100%|██████████| 1880/1880 [23:07<00:00,  2.22it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


100%|██████████| 1880/1880 [23:07<00:00,  1.36it/s]
Configuration saved in ../adapter_qa_ru_mbert_20/adapter_config.json
Module weights saved in ../adapter_qa_ru_mbert_20/pytorch_adapter.bin
Configuration saved in ../adapter_qa_ru_mbert_20/head_config.json
Module weights saved in ../adapter_qa_ru_mbert_20/pytorch_model_head.bin


{'eval_runtime': 20.5055, 'eval_samples_per_second': 36.576, 'eval_steps_per_second': 4.584, 'epoch': 20.0}
{'train_runtime': 1387.3799, 'train_samples_per_second': 10.812, 'train_steps_per_second': 1.355, 'train_loss': 1.3391674853385762, 'epoch': 20.0}


Loading cached processed dataset at ../data/xquad_tr_train.hf/cache-5f2555f4d4218890.arrow
Loading cached processed dataset at ../data/xquad_tr_val.hf/cache-4af96b4570c52d2b.arrow
Adding adapter 'xquad_adapter'.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 746
  Num Epochs = 20
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 1880
  5%|▌         | 94/1880 [00:45<11:27,  2.60it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation

{'eval_runtime': 20.5089, 'eval_samples_per_second': 36.374, 'eval_steps_per_second': 4.583, 'epoch': 1.0}


 10%|█         | 188/1880 [01:50<10:51,  2.60it/s] The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 746
  Batch size = 8

 10%|█         | 188/1880 [02:11<10:51,  2.60it/s]

{'eval_runtime': 20.5043, 'eval_samples_per_second': 36.383, 'eval_steps_per_second': 4.584, 'epoch': 2.0}


 15%|█▌        | 282/1880 [02:56<10:14,  2.60it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 746
  Batch size = 8

 15%|█▌        | 282/1880 [03:16<10:14,  2.60it/s]

{'eval_runtime': 20.5061, 'eval_samples_per_second': 36.379, 'eval_steps_per_second': 4.584, 'epoch': 3.0}


 20%|██        | 376/1880 [04:02<09:38,  2.60it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 746
  Batch size = 8

 20%|██        | 376/1880 [04:22<09:38,  2.60it/s]

{'eval_runtime': 20.5052, 'eval_samples_per_second': 36.381, 'eval_steps_per_second': 4.584, 'epoch': 4.0}


 25%|██▌       | 470/1880 [05:07<09:03,  2.60it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 746
  Batch size = 8

 25%|██▌       | 470/1880 [05:28<09:03,  2.60it/s]

{'eval_runtime': 20.5033, 'eval_samples_per_second': 36.384, 'eval_steps_per_second': 4.585, 'epoch': 5.0}


 27%|██▋       | 500/1880 [05:42<11:07,  2.07it/s]  Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-500
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapt

{'loss': 2.6297, 'learning_rate': 7.340425531914894e-05, 'epoch': 5.32}


 30%|███       | 564/1880 [06:13<08:26,  2.60it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 746
  Batch size = 8

 30%|███       | 564/1880 [06:34<08:26,  2.60it/s]

{'eval_runtime': 20.506, 'eval_samples_per_second': 36.38, 'eval_steps_per_second': 4.584, 'epoch': 6.0}


 35%|███▌      | 658/1880 [07:19<07:49,  2.60it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 746
  Batch size = 8

 35%|███▌      | 658/1880 [07:39<07:49,  2.60it/s]

{'eval_runtime': 20.5079, 'eval_samples_per_second': 36.376, 'eval_steps_per_second': 4.584, 'epoch': 7.0}


 40%|████      | 752/1880 [08:24<07:13,  2.60it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 746
  Batch size = 8

 40%|████      | 752/1880 [08:45<07:13,  2.60it/s]

{'eval_runtime': 20.5029, 'eval_samples_per_second': 36.385, 'eval_steps_per_second': 4.585, 'epoch': 8.0}


 45%|████▌     | 846/1880 [09:30<06:40,  2.58it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 746
  Batch size = 8

 45%|████▌     | 846/1880 [09:51<06:40,  2.58it/s]

{'eval_runtime': 20.5071, 'eval_samples_per_second': 36.378, 'eval_steps_per_second': 4.584, 'epoch': 9.0}


 50%|█████     | 940/1880 [10:36<06:01,  2.60it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 746
  Batch size = 8

 50%|█████     | 940/1880 [10:56<06:01,  2.60it/s]

{'eval_runtime': 20.5034, 'eval_samples_per_second': 36.384, 'eval_steps_per_second': 4.585, 'epoch': 10.0}


 53%|█████▎    | 1000/1880 [11:25<07:05,  2.07it/s] Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-1000
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cas

{'loss': 1.4677, 'learning_rate': 4.680851063829788e-05, 'epoch': 10.64}


 55%|█████▌    | 1034/1880 [11:42<05:24,  2.61it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 746
  Batch size = 8

 55%|█████▌    | 1034/1880 [12:02<05:24,  2.61it/s]

{'eval_runtime': 20.5015, 'eval_samples_per_second': 36.388, 'eval_steps_per_second': 4.585, 'epoch': 11.0}


 60%|██████    | 1128/1880 [12:47<04:49,  2.60it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 746
  Batch size = 8

 60%|██████    | 1128/1880 [13:08<04:49,  2.60it/s]

{'eval_runtime': 20.5036, 'eval_samples_per_second': 36.384, 'eval_steps_per_second': 4.585, 'epoch': 12.0}


 65%|██████▌   | 1222/1880 [13:53<04:12,  2.60it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 746
  Batch size = 8

 65%|██████▌   | 1222/1880 [14:14<04:12,  2.60it/s]

{'eval_runtime': 20.506, 'eval_samples_per_second': 36.38, 'eval_steps_per_second': 4.584, 'epoch': 13.0}


 70%|███████   | 1316/1880 [14:59<03:36,  2.60it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 746
  Batch size = 8

 70%|███████   | 1316/1880 [15:19<03:36,  2.60it/s]

{'eval_runtime': 20.5028, 'eval_samples_per_second': 36.385, 'eval_steps_per_second': 4.585, 'epoch': 14.0}


 75%|███████▌  | 1410/1880 [16:04<03:00,  2.60it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 746
  Batch size = 8

 75%|███████▌  | 1410/1880 [16:25<03:00,  2.60it/s]

{'eval_runtime': 20.501, 'eval_samples_per_second': 36.388, 'eval_steps_per_second': 4.585, 'epoch': 15.0}


 80%|███████▉  | 1500/1880 [17:08<03:03,  2.07it/s]Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-1500
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-case

{'loss': 0.9836, 'learning_rate': 2.0212765957446807e-05, 'epoch': 15.96}


 80%|████████  | 1504/1880 [17:10<02:37,  2.39it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 746
  Batch size = 8

 80%|████████  | 1504/1880 [17:31<02:37,  2.39it/s]

{'eval_runtime': 20.5058, 'eval_samples_per_second': 36.38, 'eval_steps_per_second': 4.584, 'epoch': 16.0}


 85%|████████▌ | 1598/1880 [18:16<01:48,  2.60it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 746
  Batch size = 8

 85%|████████▌ | 1598/1880 [18:36<01:48,  2.60it/s]

{'eval_runtime': 20.5074, 'eval_samples_per_second': 36.377, 'eval_steps_per_second': 4.584, 'epoch': 17.0}


 90%|█████████ | 1692/1880 [19:21<01:12,  2.60it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 746
  Batch size = 8

 90%|█████████ | 1692/1880 [19:42<01:12,  2.60it/s]

{'eval_runtime': 20.5037, 'eval_samples_per_second': 36.384, 'eval_steps_per_second': 4.585, 'epoch': 18.0}


 95%|█████████▌| 1786/1880 [20:27<00:36,  2.61it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 746
  Batch size = 8

 95%|█████████▌| 1786/1880 [20:48<00:36,  2.61it/s]

{'eval_runtime': 20.508, 'eval_samples_per_second': 36.376, 'eval_steps_per_second': 4.584, 'epoch': 19.0}


100%|██████████| 1880/1880 [21:33<00:00,  2.60it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 746
  Batch size = 8

100%|██████████| 1880/1880 [21:53<00:00,  2.60it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


100%|██████████| 1880/1880 [21:53<00:00,  1.43it/s]
Configuration saved in ../adapter_qa_tr_mbert_20/adapter_config.json
Module weights saved in ../adapter_qa_tr_mbert_20/pytorch_adapter.bin
Configuration saved in ../adapter_qa_tr_mbert_20/head_config.json
Module weights saved in ../adapter_qa_tr_mbert_20/pytorch_model_head.bin
Loading cached processed dataset at ../data/xquad_ar_train.hf/cache-404bffb4e4a23ec2.arrow
Loading cached processed dataset at ../data/xquad_ar_val.hf/cache-e02dfa7d0e83fa28.arrow
Adding adapter 'xquad_adapter'.
PyTorch: setting up devices
Th

{'eval_runtime': 20.5037, 'eval_samples_per_second': 36.384, 'eval_steps_per_second': 4.585, 'epoch': 20.0}
{'train_runtime': 1313.8027, 'train_samples_per_second': 11.356, 'train_steps_per_second': 1.431, 'train_loss': 1.5112942959399933, 'epoch': 20.0}


***** Running training *****
  Num examples = 758
  Num Epochs = 20
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 1900
  5%|▌         | 95/1900 [00:45<13:32,  2.22it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 758
  Batch size = 8

  5%|▌         | 95/1900 [01:06<13:32,  2.22it/s]

{'eval_runtime': 20.7242, 'eval_samples_per_second': 36.576, 'eval_steps_per_second': 4.584, 'epoch': 1.0}


 10%|█         | 190/1900 [01:52<12:49,  2.22it/s] The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 758
  Batch size = 8

 10%|█         | 190/1900 [02:13<12:49,  2.22it/s]

{'eval_runtime': 20.724, 'eval_samples_per_second': 36.576, 'eval_steps_per_second': 4.584, 'epoch': 2.0}


 15%|█▌        | 285/1900 [02:59<12:07,  2.22it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 758
  Batch size = 8

 15%|█▌        | 285/1900 [03:19<12:07,  2.22it/s]

{'eval_runtime': 20.7261, 'eval_samples_per_second': 36.572, 'eval_steps_per_second': 4.584, 'epoch': 3.0}


 20%|██        | 380/1900 [04:05<11:24,  2.22it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 758
  Batch size = 8

 20%|██        | 380/1900 [04:26<11:24,  2.22it/s]

{'eval_runtime': 20.7231, 'eval_samples_per_second': 36.577, 'eval_steps_per_second': 4.584, 'epoch': 4.0}


 25%|██▌       | 475/1900 [05:12<10:41,  2.22it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 758
  Batch size = 8

 25%|██▌       | 475/1900 [05:33<10:41,  2.22it/s]

{'eval_runtime': 20.7249, 'eval_samples_per_second': 36.574, 'eval_steps_per_second': 4.584, 'epoch': 5.0}


 26%|██▋       | 500/1900 [05:45<11:18,  2.06it/s]  Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-500
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapt

{'loss': 2.6542, 'learning_rate': 7.368421052631579e-05, 'epoch': 5.26}


 30%|███       | 570/1900 [06:19<09:58,  2.22it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 758
  Batch size = 8

 30%|███       | 570/1900 [06:40<09:58,  2.22it/s]

{'eval_runtime': 20.7274, 'eval_samples_per_second': 36.57, 'eval_steps_per_second': 4.583, 'epoch': 6.0}


 35%|███▌      | 665/1900 [07:26<09:15,  2.22it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 758
  Batch size = 8

 35%|███▌      | 665/1900 [07:46<09:15,  2.22it/s]

{'eval_runtime': 20.7232, 'eval_samples_per_second': 36.577, 'eval_steps_per_second': 4.584, 'epoch': 7.0}


 40%|████      | 760/1900 [08:32<08:32,  2.22it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 758
  Batch size = 8

 40%|████      | 760/1900 [08:53<08:32,  2.22it/s]

{'eval_runtime': 20.7232, 'eval_samples_per_second': 36.577, 'eval_steps_per_second': 4.584, 'epoch': 8.0}


 45%|████▌     | 855/1900 [09:39<07:50,  2.22it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 758
  Batch size = 8

 45%|████▌     | 855/1900 [10:00<07:50,  2.22it/s]

{'eval_runtime': 20.7222, 'eval_samples_per_second': 36.579, 'eval_steps_per_second': 4.584, 'epoch': 9.0}


 50%|█████     | 950/1900 [10:46<07:07,  2.22it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 758
  Batch size = 8

 50%|█████     | 950/1900 [11:06<07:07,  2.22it/s]

{'eval_runtime': 20.7223, 'eval_samples_per_second': 36.579, 'eval_steps_per_second': 4.584, 'epoch': 10.0}


 53%|█████▎    | 1000/1900 [11:31<07:14,  2.07it/s] Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-1000
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cas

{'loss': 1.4944, 'learning_rate': 4.736842105263158e-05, 'epoch': 10.53}


 55%|█████▌    | 1045/1900 [11:53<06:25,  2.22it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 758
  Batch size = 8

 55%|█████▌    | 1045/1900 [12:13<06:25,  2.22it/s]

{'eval_runtime': 20.7185, 'eval_samples_per_second': 36.586, 'eval_steps_per_second': 4.585, 'epoch': 11.0}


 60%|██████    | 1140/1900 [12:59<05:41,  2.22it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 758
  Batch size = 8

 60%|██████    | 1140/1900 [13:20<05:41,  2.22it/s]

{'eval_runtime': 20.7218, 'eval_samples_per_second': 36.58, 'eval_steps_per_second': 4.585, 'epoch': 12.0}


 65%|██████▌   | 1235/1900 [14:06<04:59,  2.22it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 758
  Batch size = 8

 65%|██████▌   | 1235/1900 [14:27<04:59,  2.22it/s]

{'eval_runtime': 20.7234, 'eval_samples_per_second': 36.577, 'eval_steps_per_second': 4.584, 'epoch': 13.0}


 70%|███████   | 1330/1900 [15:13<04:16,  2.22it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 758
  Batch size = 8

 70%|███████   | 1330/1900 [15:33<04:16,  2.22it/s]

{'eval_runtime': 20.7231, 'eval_samples_per_second': 36.578, 'eval_steps_per_second': 4.584, 'epoch': 14.0}


 75%|███████▌  | 1425/1900 [16:19<03:33,  2.22it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 758
  Batch size = 8

 75%|███████▌  | 1425/1900 [16:40<03:33,  2.22it/s]

{'eval_runtime': 20.723, 'eval_samples_per_second': 36.578, 'eval_steps_per_second': 4.584, 'epoch': 15.0}


 79%|███████▉  | 1500/1900 [17:16<03:13,  2.07it/s]Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-1500
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-case

{'loss': 1.0201, 'learning_rate': 2.105263157894737e-05, 'epoch': 15.79}


 80%|████████  | 1520/1900 [17:26<02:51,  2.22it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 758
  Batch size = 8

 80%|████████  | 1520/1900 [17:47<02:51,  2.22it/s]

{'eval_runtime': 20.7243, 'eval_samples_per_second': 36.575, 'eval_steps_per_second': 4.584, 'epoch': 16.0}


 85%|████████▌ | 1615/1900 [18:33<02:08,  2.22it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 758
  Batch size = 8

 85%|████████▌ | 1615/1900 [18:54<02:08,  2.22it/s]

{'eval_runtime': 20.7273, 'eval_samples_per_second': 36.57, 'eval_steps_per_second': 4.583, 'epoch': 17.0}


 90%|█████████ | 1710/1900 [19:40<01:27,  2.18it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 758
  Batch size = 8

 90%|█████████ | 1710/1900 [20:00<01:27,  2.18it/s]

{'eval_runtime': 20.7231, 'eval_samples_per_second': 36.578, 'eval_steps_per_second': 4.584, 'epoch': 18.0}


 95%|█████████▌| 1805/1900 [20:46<00:42,  2.22it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 758
  Batch size = 8

 95%|█████████▌| 1805/1900 [21:07<00:42,  2.22it/s]

{'eval_runtime': 20.727, 'eval_samples_per_second': 36.571, 'eval_steps_per_second': 4.583, 'epoch': 19.0}


100%|██████████| 1900/1900 [21:53<00:00,  2.22it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 758
  Batch size = 8

100%|██████████| 1900/1900 [22:14<00:00,  2.22it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


100%|██████████| 1900/1900 [22:14<00:00,  1.42it/s]
Configuration saved in ../adapter_qa_ar_mbert_20/adapter_config.json
Module weights saved in ../adapter_qa_ar_mbert_20/pytorch_adapter.bin
Configuration saved in ../adapter_qa_ar_mbert_20/head_config.json
Module weights saved in ../adapter_qa_ar_mbert_20/pytorch_model_head.bin


{'eval_runtime': 20.7259, 'eval_samples_per_second': 36.573, 'eval_steps_per_second': 4.584, 'epoch': 20.0}
{'train_runtime': 1334.3493, 'train_samples_per_second': 11.361, 'train_steps_per_second': 1.424, 'train_loss': 1.5257871928967928, 'epoch': 20.0}


Loading cached processed dataset at ../data/xquad_vi_train.hf/cache-91b0d50f2e3e5b09.arrow
Loading cached processed dataset at ../data/xquad_vi_val.hf/cache-71583ba368a81bd1.arrow
Adding adapter 'xquad_adapter'.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 736
  Num Epochs = 20
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 1840
  5%|▌         | 92/1840 [00:44<14:04,  2.07it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation

{'eval_runtime': 20.0648, 'eval_samples_per_second': 36.681, 'eval_steps_per_second': 4.585, 'epoch': 1.0}


 10%|█         | 184/1840 [01:49<13:21,  2.07it/s] The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 736
  Batch size = 8

 10%|█         | 184/1840 [02:09<13:21,  2.07it/s]

{'eval_runtime': 20.0671, 'eval_samples_per_second': 36.677, 'eval_steps_per_second': 4.585, 'epoch': 2.0}


 15%|█▌        | 276/1840 [02:53<12:35,  2.07it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 736
  Batch size = 8

 15%|█▌        | 276/1840 [03:14<12:35,  2.07it/s]

{'eval_runtime': 20.0617, 'eval_samples_per_second': 36.687, 'eval_steps_per_second': 4.586, 'epoch': 3.0}


 20%|██        | 368/1840 [03:58<11:51,  2.07it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 736
  Batch size = 8

 20%|██        | 368/1840 [04:18<11:51,  2.07it/s]

{'eval_runtime': 20.063, 'eval_samples_per_second': 36.684, 'eval_steps_per_second': 4.586, 'epoch': 4.0}


 25%|██▌       | 460/1840 [05:03<11:08,  2.07it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 736
  Batch size = 8

 25%|██▌       | 460/1840 [05:23<11:08,  2.07it/s]

{'eval_runtime': 20.0652, 'eval_samples_per_second': 36.68, 'eval_steps_per_second': 4.585, 'epoch': 5.0}


 27%|██▋       | 500/1840 [05:42<10:46,  2.07it/s]  Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-500
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapt

{'loss': 2.3835, 'learning_rate': 7.282608695652175e-05, 'epoch': 5.43}


 30%|███       | 552/1840 [06:08<10:22,  2.07it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 736
  Batch size = 8

 30%|███       | 552/1840 [06:28<10:22,  2.07it/s]

{'eval_runtime': 20.0643, 'eval_samples_per_second': 36.682, 'eval_steps_per_second': 4.585, 'epoch': 6.0}


 35%|███▌      | 644/1840 [07:13<09:36,  2.07it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 736
  Batch size = 8

 35%|███▌      | 644/1840 [07:33<09:36,  2.07it/s]

{'eval_runtime': 20.0641, 'eval_samples_per_second': 36.682, 'eval_steps_per_second': 4.585, 'epoch': 7.0}


 40%|████      | 736/1840 [08:17<08:52,  2.07it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 736
  Batch size = 8

 40%|████      | 736/1840 [08:37<08:52,  2.07it/s]

{'eval_runtime': 20.0627, 'eval_samples_per_second': 36.685, 'eval_steps_per_second': 4.586, 'epoch': 8.0}


 45%|████▌     | 828/1840 [09:22<08:09,  2.07it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 736
  Batch size = 8

 45%|████▌     | 828/1840 [09:42<08:09,  2.07it/s]

{'eval_runtime': 20.0632, 'eval_samples_per_second': 36.684, 'eval_steps_per_second': 4.586, 'epoch': 9.0}


 50%|█████     | 920/1840 [10:27<07:24,  2.07it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 736
  Batch size = 8

 50%|█████     | 920/1840 [10:47<07:24,  2.07it/s]

{'eval_runtime': 20.0616, 'eval_samples_per_second': 36.687, 'eval_steps_per_second': 4.586, 'epoch': 10.0}


 54%|█████▍    | 1000/1840 [11:26<06:46,  2.07it/s] Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-1000
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cas

{'loss': 1.2183, 'learning_rate': 4.565217391304348e-05, 'epoch': 10.87}


 55%|█████▌    | 1012/1840 [11:32<06:41,  2.06it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 736
  Batch size = 8

 55%|█████▌    | 1012/1840 [11:52<06:41,  2.06it/s]

{'eval_runtime': 20.0663, 'eval_samples_per_second': 36.678, 'eval_steps_per_second': 4.585, 'epoch': 11.0}


 60%|██████    | 1104/1840 [12:37<05:55,  2.07it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 736
  Batch size = 8

 60%|██████    | 1104/1840 [12:57<05:55,  2.07it/s]

{'eval_runtime': 20.0651, 'eval_samples_per_second': 36.681, 'eval_steps_per_second': 4.585, 'epoch': 12.0}


 65%|██████▌   | 1196/1840 [13:41<05:11,  2.07it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 736
  Batch size = 8

 65%|██████▌   | 1196/1840 [14:01<05:11,  2.07it/s]

{'eval_runtime': 20.0631, 'eval_samples_per_second': 36.684, 'eval_steps_per_second': 4.586, 'epoch': 13.0}


 70%|███████   | 1288/1840 [14:46<04:26,  2.07it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 736
  Batch size = 8

 70%|███████   | 1288/1840 [15:06<04:26,  2.07it/s]

{'eval_runtime': 20.0781, 'eval_samples_per_second': 36.657, 'eval_steps_per_second': 4.582, 'epoch': 14.0}


 75%|███████▌  | 1380/1840 [15:51<03:42,  2.07it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 736
  Batch size = 8

 75%|███████▌  | 1380/1840 [16:11<03:42,  2.07it/s]

{'eval_runtime': 20.0657, 'eval_samples_per_second': 36.679, 'eval_steps_per_second': 4.585, 'epoch': 15.0}


 80%|████████  | 1472/1840 [16:56<02:57,  2.07it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 736
  Batch size = 8

 80%|████████  | 1472/1840 [17:16<02:57,  2.07it/s]

{'eval_runtime': 20.0623, 'eval_samples_per_second': 36.686, 'eval_steps_per_second': 4.586, 'epoch': 16.0}


 82%|████████▏ | 1500/1840 [17:29<02:44,  2.07it/s]Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-1500
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-case

{'loss': 0.8099, 'learning_rate': 1.8478260869565216e-05, 'epoch': 16.3}


 85%|████████▌ | 1564/1840 [18:01<02:13,  2.07it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 736
  Batch size = 8

 85%|████████▌ | 1564/1840 [18:21<02:13,  2.07it/s]

{'eval_runtime': 20.0609, 'eval_samples_per_second': 36.688, 'eval_steps_per_second': 4.586, 'epoch': 17.0}


 90%|█████████ | 1656/1840 [19:05<01:28,  2.07it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 736
  Batch size = 8

 90%|█████████ | 1656/1840 [19:25<01:28,  2.07it/s]

{'eval_runtime': 20.0627, 'eval_samples_per_second': 36.685, 'eval_steps_per_second': 4.586, 'epoch': 18.0}


 95%|█████████▌| 1748/1840 [20:10<00:44,  2.07it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 736
  Batch size = 8

 95%|█████████▌| 1748/1840 [20:30<00:44,  2.07it/s]

{'eval_runtime': 20.0611, 'eval_samples_per_second': 36.688, 'eval_steps_per_second': 4.586, 'epoch': 19.0}


100%|██████████| 1840/1840 [21:15<00:00,  2.07it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 736
  Batch size = 8

100%|██████████| 1840/1840 [21:35<00:00,  2.07it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)




{'eval_runtime': 20.0646, 'eval_samples_per_second': 36.681, 'eval_steps_per_second': 4.585, 'epoch': 20.0}


100%|██████████| 1840/1840 [21:35<00:00,  1.42it/s]
Configuration saved in ../adapter_qa_vi_mbert_20/adapter_config.json
Module weights saved in ../adapter_qa_vi_mbert_20/pytorch_adapter.bin
Configuration saved in ../adapter_qa_vi_mbert_20/head_config.json
Module weights saved in ../adapter_qa_vi_mbert_20/pytorch_model_head.bin
Loading cached processed dataset at ../data/xquad_zh_train.hf/cache-9357be135a4d3f4b.arrow
Loading cached processed dataset at ../data/xquad_zh_val.hf/cache-2b6858b860327cd3.arrow
Adding adapter 'xquad_adapter'.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 729
  Num Epochs = 20
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed &

{'train_runtime': 1295.4679, 'train_samples_per_second': 11.363, 'train_steps_per_second': 1.42, 'train_loss': 1.3156472828077233, 'epoch': 20.0}


  5%|▍         | 91/1840 [00:44<14:05,  2.07it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

  5%|▌         | 92/1840 [01:04<14:05,  2.07it/s]

{'eval_runtime': 20.0655, 'eval_samples_per_second': 36.331, 'eval_steps_per_second': 4.585, 'epoch': 1.0}


 10%|▉         | 183/1840 [01:48<13:20,  2.07it/s] The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 10%|█         | 184/1840 [02:08<13:20,  2.07it/s]

{'eval_runtime': 20.0632, 'eval_samples_per_second': 36.335, 'eval_steps_per_second': 4.586, 'epoch': 2.0}


 15%|█▍        | 275/1840 [02:52<12:35,  2.07it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 15%|█▌        | 276/1840 [03:12<12:34,  2.07it/s]

{'eval_runtime': 20.0606, 'eval_samples_per_second': 36.34, 'eval_steps_per_second': 4.586, 'epoch': 3.0}


 20%|█▉        | 367/1840 [03:56<11:51,  2.07it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 20%|██        | 368/1840 [04:16<11:50,  2.07it/s]

{'eval_runtime': 20.0625, 'eval_samples_per_second': 36.336, 'eval_steps_per_second': 4.586, 'epoch': 4.0}


 25%|██▍       | 459/1840 [05:00<11:07,  2.07it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 25%|██▌       | 460/1840 [05:20<11:07,  2.07it/s]

{'eval_runtime': 20.0666, 'eval_samples_per_second': 36.329, 'eval_steps_per_second': 4.585, 'epoch': 5.0}


 27%|██▋       | 500/1840 [05:40<10:47,  2.07it/s]  Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-500
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapt

{'loss': 2.5592, 'learning_rate': 7.282608695652175e-05, 'epoch': 5.43}


 30%|██▉       | 551/1840 [06:04<10:23,  2.07it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 30%|███       | 552/1840 [06:25<10:22,  2.07it/s]

{'eval_runtime': 20.0637, 'eval_samples_per_second': 36.334, 'eval_steps_per_second': 4.585, 'epoch': 6.0}


 35%|███▍      | 643/1840 [07:09<09:38,  2.07it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 35%|███▌      | 644/1840 [07:29<09:38,  2.07it/s]

{'eval_runtime': 20.0663, 'eval_samples_per_second': 36.33, 'eval_steps_per_second': 4.585, 'epoch': 7.0}


 40%|███▉      | 735/1840 [08:13<08:53,  2.07it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 40%|████      | 736/1840 [08:33<08:52,  2.07it/s]

{'eval_runtime': 20.0616, 'eval_samples_per_second': 36.338, 'eval_steps_per_second': 4.586, 'epoch': 8.0}


 45%|████▍     | 827/1840 [09:17<08:08,  2.07it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 45%|████▌     | 828/1840 [09:37<08:08,  2.07it/s]

{'eval_runtime': 20.0639, 'eval_samples_per_second': 36.334, 'eval_steps_per_second': 4.585, 'epoch': 9.0}


 50%|████▉     | 919/1840 [10:21<07:25,  2.07it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 50%|█████     | 920/1840 [10:41<07:24,  2.07it/s]

{'eval_runtime': 20.0651, 'eval_samples_per_second': 36.332, 'eval_steps_per_second': 4.585, 'epoch': 10.0}


 54%|█████▍    | 1000/1840 [11:20<06:45,  2.07it/s] Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-1000
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cas

{'loss': 1.3565, 'learning_rate': 4.565217391304348e-05, 'epoch': 10.87}


 55%|█████▍    | 1011/1840 [11:26<06:43,  2.05it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 55%|█████▌    | 1012/1840 [11:46<06:43,  2.05it/s]

{'eval_runtime': 20.0668, 'eval_samples_per_second': 36.329, 'eval_steps_per_second': 4.585, 'epoch': 11.0}


 60%|█████▉    | 1103/1840 [12:30<05:56,  2.07it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 60%|██████    | 1104/1840 [12:50<05:55,  2.07it/s]

{'eval_runtime': 20.064, 'eval_samples_per_second': 36.334, 'eval_steps_per_second': 4.585, 'epoch': 12.0}


 65%|██████▍   | 1195/1840 [13:34<05:11,  2.07it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 65%|██████▌   | 1196/1840 [13:54<05:10,  2.07it/s]

{'eval_runtime': 20.0656, 'eval_samples_per_second': 36.331, 'eval_steps_per_second': 4.585, 'epoch': 13.0}


 70%|██████▉   | 1287/1840 [14:38<04:27,  2.07it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 70%|███████   | 1288/1840 [14:58<04:26,  2.07it/s]

{'eval_runtime': 20.0642, 'eval_samples_per_second': 36.333, 'eval_steps_per_second': 4.585, 'epoch': 14.0}


 75%|███████▍  | 1379/1840 [15:42<03:42,  2.07it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 75%|███████▌  | 1380/1840 [16:02<03:42,  2.07it/s]

{'eval_runtime': 20.0625, 'eval_samples_per_second': 36.336, 'eval_steps_per_second': 4.586, 'epoch': 15.0}


 80%|███████▉  | 1471/1840 [16:46<02:58,  2.07it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 80%|████████  | 1472/1840 [17:06<02:57,  2.07it/s]

{'eval_runtime': 20.0629, 'eval_samples_per_second': 36.336, 'eval_steps_per_second': 4.586, 'epoch': 16.0}


 82%|████████▏ | 1500/1840 [17:20<02:44,  2.07it/s]Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-1500
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-case

{'loss': 0.9005, 'learning_rate': 1.8478260869565216e-05, 'epoch': 16.3}


 85%|████████▍ | 1563/1840 [17:51<02:13,  2.07it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 85%|████████▌ | 1564/1840 [18:11<02:13,  2.07it/s]

{'eval_runtime': 20.0646, 'eval_samples_per_second': 36.333, 'eval_steps_per_second': 4.585, 'epoch': 17.0}


 90%|████████▉ | 1655/1840 [18:55<01:29,  2.07it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 90%|█████████ | 1656/1840 [19:15<01:28,  2.07it/s]

{'eval_runtime': 20.0645, 'eval_samples_per_second': 36.333, 'eval_steps_per_second': 4.585, 'epoch': 18.0}


 95%|█████████▍| 1747/1840 [19:59<00:44,  2.07it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 95%|█████████▌| 1748/1840 [20:19<00:44,  2.07it/s]

{'eval_runtime': 20.0682, 'eval_samples_per_second': 36.326, 'eval_steps_per_second': 4.584, 'epoch': 19.0}


100%|██████████| 1840/1840 [21:10<00:00,  1.85it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

100%|██████████| 1840/1840 [21:40<00:00,  1.85it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


100%|██████████| 1840/1840 [21:40<00:00,  1.42it/s]
Configuration saved in ../adapter_qa_zh_mbert_20/adapter_config.json
Module weights saved in ../adapter_qa_zh_mbert_20/pytorch_adapter.bin
Configuration saved in ../adapter_qa_zh_mbert_20/head_config.json
Module weights saved in ../adapter_qa_zh_mbert_20/pytorch_model_head.bin
Loading cached processed dataset at ../data/xquad_hi_train.hf/cache-ec2f6bd4903280d0.arrow
Loading cached processed dataset at ../data/xquad_hi_val.hf/cache-2bbb8849ec12d10a.arrow
Adding adapter 'xquad_adapter'.


{'eval_runtime': 29.6963, 'eval_samples_per_second': 24.549, 'eval_steps_per_second': 3.098, 'epoch': 20.0}
{'train_runtime': 1300.2958, 'train_samples_per_second': 11.213, 'train_steps_per_second': 1.415, 'train_loss': 1.441805881002675, 'epoch': 20.0}


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 765
  Num Epochs = 20
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 1920
  5%|▌         | 96/1920 [01:07<19:05,  1.59it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 765
  Batch size = 8

  5%|▌         | 96/1920 [01:38<19:05,  1.59it/s]

{'eval_runtime': 31.0561, 'eval_samples_per_second': 24.633, 'eval_steps_per_second': 3.091, 'epoch': 1.0}


 10%|█         | 192/1920 [02:46<18:11,  1.58it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 765
  Batch size = 8

 10%|█         | 192/1920 [03:17<18:11,  1.58it/s]

{'eval_runtime': 31.0842, 'eval_samples_per_second': 24.611, 'eval_steps_per_second': 3.088, 'epoch': 2.0}


 15%|█▌        | 288/1920 [04:25<17:06,  1.59it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 765
  Batch size = 8

 15%|█▌        | 288/1920 [04:55<17:06,  1.59it/s]

{'eval_runtime': 29.657, 'eval_samples_per_second': 25.795, 'eval_steps_per_second': 3.237, 'epoch': 3.0}


 20%|██        | 384/1920 [05:41<11:02,  2.32it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 765
  Batch size = 8

 20%|██        | 384/1920 [06:02<11:02,  2.32it/s]

{'eval_runtime': 20.9616, 'eval_samples_per_second': 36.495, 'eval_steps_per_second': 4.58, 'epoch': 4.0}


 25%|██▌       | 480/1920 [06:49<10:20,  2.32it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 765
  Batch size = 8

 25%|██▌       | 480/1920 [07:10<10:20,  2.32it/s]

{'eval_runtime': 20.9656, 'eval_samples_per_second': 36.488, 'eval_steps_per_second': 4.579, 'epoch': 5.0}


 26%|██▌       | 500/1920 [07:19<11:37,  2.04it/s]  Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-500
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapt

{'loss': 2.5925, 'learning_rate': 7.395833333333335e-05, 'epoch': 5.21}


 30%|███       | 576/1920 [07:56<09:39,  2.32it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 765
  Batch size = 8

 30%|███       | 576/1920 [08:17<09:39,  2.32it/s]

{'eval_runtime': 20.9701, 'eval_samples_per_second': 36.481, 'eval_steps_per_second': 4.578, 'epoch': 6.0}


 35%|███▌      | 672/1920 [09:04<08:57,  2.32it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 765
  Batch size = 8

 35%|███▌      | 672/1920 [09:25<08:57,  2.32it/s]

{'eval_runtime': 20.9715, 'eval_samples_per_second': 36.478, 'eval_steps_per_second': 4.578, 'epoch': 7.0}


 40%|████      | 768/1920 [10:11<08:15,  2.32it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 765
  Batch size = 8

 40%|████      | 768/1920 [10:32<08:15,  2.32it/s]

{'eval_runtime': 20.9715, 'eval_samples_per_second': 36.478, 'eval_steps_per_second': 4.578, 'epoch': 8.0}


 45%|████▌     | 864/1920 [11:18<07:34,  2.32it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 765
  Batch size = 8

 45%|████▌     | 864/1920 [11:39<07:34,  2.32it/s]

{'eval_runtime': 20.9706, 'eval_samples_per_second': 36.48, 'eval_steps_per_second': 4.578, 'epoch': 9.0}


 50%|█████     | 960/1920 [12:26<06:53,  2.32it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 765
  Batch size = 8

 50%|█████     | 960/1920 [12:47<06:53,  2.32it/s]

{'eval_runtime': 20.9744, 'eval_samples_per_second': 36.473, 'eval_steps_per_second': 4.577, 'epoch': 10.0}


 52%|█████▏    | 1000/1920 [13:06<07:25,  2.07it/s] Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-1000
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cas

{'loss': 1.5501, 'learning_rate': 4.791666666666667e-05, 'epoch': 10.42}


 55%|█████▌    | 1056/1920 [13:33<06:12,  2.32it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 765
  Batch size = 8

 55%|█████▌    | 1056/1920 [13:54<06:12,  2.32it/s]

{'eval_runtime': 20.975, 'eval_samples_per_second': 36.472, 'eval_steps_per_second': 4.577, 'epoch': 11.0}


 60%|██████    | 1152/1920 [14:41<05:31,  2.32it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 765
  Batch size = 8

 60%|██████    | 1152/1920 [15:02<05:31,  2.32it/s]

{'eval_runtime': 20.9746, 'eval_samples_per_second': 36.473, 'eval_steps_per_second': 4.577, 'epoch': 12.0}


 65%|██████▌   | 1248/1920 [15:48<04:49,  2.32it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 765
  Batch size = 8

 65%|██████▌   | 1248/1920 [16:09<04:49,  2.32it/s]

{'eval_runtime': 20.9748, 'eval_samples_per_second': 36.472, 'eval_steps_per_second': 4.577, 'epoch': 13.0}


 70%|███████   | 1344/1920 [16:56<04:07,  2.32it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 765
  Batch size = 8

 70%|███████   | 1344/1920 [17:17<04:07,  2.32it/s]

{'eval_runtime': 20.9742, 'eval_samples_per_second': 36.473, 'eval_steps_per_second': 4.577, 'epoch': 14.0}


 75%|███████▌  | 1440/1920 [18:03<03:27,  2.32it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 765
  Batch size = 8

 75%|███████▌  | 1440/1920 [18:24<03:27,  2.32it/s]

{'eval_runtime': 20.9719, 'eval_samples_per_second': 36.477, 'eval_steps_per_second': 4.578, 'epoch': 15.0}


 78%|███████▊  | 1500/1920 [18:53<03:23,  2.06it/s]Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-1500
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-case

{'loss': 1.0907, 'learning_rate': 2.1875e-05, 'epoch': 15.62}


 80%|████████  | 1536/1920 [19:11<02:45,  2.32it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 765
  Batch size = 8

 80%|████████  | 1536/1920 [19:32<02:45,  2.32it/s]

{'eval_runtime': 20.9742, 'eval_samples_per_second': 36.473, 'eval_steps_per_second': 4.577, 'epoch': 16.0}


 85%|████████▌ | 1632/1920 [20:18<02:04,  2.32it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 765
  Batch size = 8

 85%|████████▌ | 1632/1920 [20:39<02:04,  2.32it/s]

{'eval_runtime': 20.9746, 'eval_samples_per_second': 36.473, 'eval_steps_per_second': 4.577, 'epoch': 17.0}


 90%|█████████ | 1728/1920 [21:25<01:22,  2.32it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 765
  Batch size = 8

 90%|█████████ | 1728/1920 [21:46<01:22,  2.32it/s]

{'eval_runtime': 20.9756, 'eval_samples_per_second': 36.471, 'eval_steps_per_second': 4.577, 'epoch': 18.0}


 95%|█████████▌| 1824/1920 [22:33<00:41,  2.32it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 765
  Batch size = 8

 95%|█████████▌| 1824/1920 [22:54<00:41,  2.32it/s]

{'eval_runtime': 20.973, 'eval_samples_per_second': 36.475, 'eval_steps_per_second': 4.577, 'epoch': 19.0}


100%|██████████| 1920/1920 [23:40<00:00,  2.31it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 765
  Batch size = 8

100%|██████████| 1920/1920 [24:01<00:00,  2.31it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


100%|██████████| 1920/1920 [24:01<00:00,  1.33it/s]
Configuration saved in ../adapter_qa_hi_mbert_20/adapter_config.json
Module weights saved in ../adapter_qa_hi_mbert_20/pytorch_adapter.bin
Configuration saved in ../adapter_qa_hi_mbert_20/head_config.json
Module weights saved in ../adapter_qa_hi_mbert_20/pytorch_model_head.bin


{'eval_runtime': 20.9754, 'eval_samples_per_second': 36.471, 'eval_steps_per_second': 4.577, 'epoch': 20.0}
{'train_runtime': 1441.6599, 'train_samples_per_second': 10.613, 'train_steps_per_second': 1.332, 'train_loss': 1.5515939235687255, 'epoch': 20.0}


Loading cached processed dataset at ../data/xquad_ro_train.hf/cache-cf2ac577b2081573.arrow
Loading cached processed dataset at ../data/xquad_ro_val.hf/cache-604feaa8aa1cac04.arrow
Adding adapter 'xquad_adapter'.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 750
  Num Epochs = 20
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 1880
  5%|▌         | 94/1880 [00:45<13:23,  2.22it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation

{'eval_runtime': 20.5442, 'eval_samples_per_second': 36.507, 'eval_steps_per_second': 4.576, 'epoch': 1.0}


 10%|█         | 188/1880 [01:51<12:43,  2.22it/s] The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 10%|█         | 188/1880 [02:12<12:43,  2.22it/s]

{'eval_runtime': 20.5328, 'eval_samples_per_second': 36.527, 'eval_steps_per_second': 4.578, 'epoch': 2.0}


 15%|█▌        | 282/1880 [03:02<17:01,  1.56it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 15%|█▌        | 282/1880 [03:33<17:01,  1.56it/s]

{'eval_runtime': 30.2834, 'eval_samples_per_second': 24.766, 'eval_steps_per_second': 3.104, 'epoch': 3.0}


 20%|██        | 376/1880 [04:43<15:52,  1.58it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 20%|██        | 376/1880 [05:13<15:52,  1.58it/s]

{'eval_runtime': 30.4645, 'eval_samples_per_second': 24.619, 'eval_steps_per_second': 3.086, 'epoch': 4.0}


 25%|██▌       | 470/1880 [06:22<15:53,  1.48it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 25%|██▌       | 470/1880 [06:54<15:53,  1.48it/s]

{'eval_runtime': 31.9961, 'eval_samples_per_second': 23.44, 'eval_steps_per_second': 2.938, 'epoch': 5.0}


 27%|██▋       | 500/1880 [07:19<17:42,  1.30it/s]  Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-500
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapt

{'loss': 2.3476, 'learning_rate': 7.340425531914894e-05, 'epoch': 5.32}


 30%|███       | 564/1880 [08:03<13:12,  1.66it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 30%|███       | 564/1880 [08:31<13:12,  1.66it/s]

{'eval_runtime': 28.2845, 'eval_samples_per_second': 26.516, 'eval_steps_per_second': 3.323, 'epoch': 6.0}


 35%|███▌      | 658/1880 [09:35<12:44,  1.60it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 35%|███▌      | 658/1880 [10:04<12:44,  1.60it/s]

{'eval_runtime': 29.4171, 'eval_samples_per_second': 25.495, 'eval_steps_per_second': 3.195, 'epoch': 7.0}


 40%|████      | 752/1880 [11:09<11:49,  1.59it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 40%|████      | 752/1880 [11:38<11:49,  1.59it/s]

{'eval_runtime': 29.0294, 'eval_samples_per_second': 25.836, 'eval_steps_per_second': 3.238, 'epoch': 8.0}


 45%|████▌     | 846/1880 [12:41<10:30,  1.64it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 45%|████▌     | 846/1880 [13:11<10:30,  1.64it/s]

{'eval_runtime': 29.5668, 'eval_samples_per_second': 25.366, 'eval_steps_per_second': 3.179, 'epoch': 9.0}


 50%|█████     | 940/1880 [14:12<09:23,  1.67it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 50%|█████     | 940/1880 [14:45<09:23,  1.67it/s]

{'eval_runtime': 33.0794, 'eval_samples_per_second': 22.673, 'eval_steps_per_second': 2.842, 'epoch': 10.0}


 53%|█████▎    | 1000/1880 [15:28<09:33,  1.53it/s] Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-1000
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cas

{'loss': 1.1471, 'learning_rate': 4.680851063829788e-05, 'epoch': 10.64}


 55%|█████▌    | 1034/1880 [15:51<09:07,  1.55it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 55%|█████▌    | 1034/1880 [16:21<09:07,  1.55it/s]

{'eval_runtime': 29.9135, 'eval_samples_per_second': 25.072, 'eval_steps_per_second': 3.142, 'epoch': 11.0}


 60%|██████    | 1128/1880 [17:30<09:35,  1.31it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 60%|██████    | 1128/1880 [18:02<09:35,  1.31it/s]

{'eval_runtime': 31.9677, 'eval_samples_per_second': 23.461, 'eval_steps_per_second': 2.94, 'epoch': 12.0}


 65%|██████▌   | 1222/1880 [19:03<06:39,  1.65it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 65%|██████▌   | 1222/1880 [19:31<06:39,  1.65it/s]

{'eval_runtime': 28.0627, 'eval_samples_per_second': 26.726, 'eval_steps_per_second': 3.35, 'epoch': 13.0}


 70%|███████   | 1316/1880 [20:33<06:08,  1.53it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 70%|███████   | 1316/1880 [21:02<06:08,  1.53it/s]

{'eval_runtime': 28.6834, 'eval_samples_per_second': 26.148, 'eval_steps_per_second': 3.277, 'epoch': 14.0}


 75%|███████▌  | 1410/1880 [22:12<05:18,  1.47it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 75%|███████▌  | 1410/1880 [22:46<05:18,  1.47it/s]

{'eval_runtime': 33.4319, 'eval_samples_per_second': 22.434, 'eval_steps_per_second': 2.812, 'epoch': 15.0}


 80%|███████▉  | 1500/1880 [24:02<05:22,  1.18it/s]  Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-1500
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-ca

{'loss': 0.7094, 'learning_rate': 2.0212765957446807e-05, 'epoch': 15.96}


 80%|████████  | 1504/1880 [24:05<05:10,  1.21it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 80%|████████  | 1504/1880 [24:42<05:10,  1.21it/s]

{'eval_runtime': 36.5787, 'eval_samples_per_second': 20.504, 'eval_steps_per_second': 2.57, 'epoch': 16.0}


 85%|████████▌ | 1598/1880 [26:04<03:50,  1.23it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 85%|████████▌ | 1598/1880 [26:42<03:50,  1.23it/s]

{'eval_runtime': 37.5961, 'eval_samples_per_second': 19.949, 'eval_steps_per_second': 2.5, 'epoch': 17.0}


 90%|█████████ | 1692/1880 [28:04<02:32,  1.23it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 90%|█████████ | 1692/1880 [28:42<02:32,  1.23it/s]

{'eval_runtime': 37.2897, 'eval_samples_per_second': 20.113, 'eval_steps_per_second': 2.521, 'epoch': 18.0}


 95%|█████████▌| 1786/1880 [30:03<01:17,  1.22it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 95%|█████████▌| 1786/1880 [30:40<01:17,  1.22it/s]

{'eval_runtime': 36.8753, 'eval_samples_per_second': 20.339, 'eval_steps_per_second': 2.549, 'epoch': 19.0}


100%|██████████| 1880/1880 [32:02<00:00,  1.24it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

100%|██████████| 1880/1880 [32:39<00:00,  1.24it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)




{'eval_runtime': 37.2428, 'eval_samples_per_second': 20.138, 'eval_steps_per_second': 2.524, 'epoch': 20.0}


100%|██████████| 1880/1880 [32:40<00:00,  1.04s/it]
Configuration saved in ../adapter_qa_ro_mbert_20/adapter_config.json
Module weights saved in ../adapter_qa_ro_mbert_20/pytorch_adapter.bin
Configuration saved in ../adapter_qa_ro_mbert_20/head_config.json
Module weights saved in ../adapter_qa_ro_mbert_20/pytorch_model_head.bin
Loading cached processed dataset at ../data/xquad_th_train.hf/cache-25032f34083e8756.arrow
Loading cached processed dataset at ../data/xquad_th_val.hf/cache-20051bdfb6bc48be.arrow
Adding adapter 'xquad_adapter'.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 1057
  Num Epochs = 20
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed 

{'train_runtime': 1960.0653, 'train_samples_per_second': 7.653, 'train_steps_per_second': 0.959, 'train_loss': 1.2293481867364113, 'epoch': 20.0}


  5%|▌         | 133/2660 [01:53<25:32,  1.65it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 1057
  Batch size = 8

  5%|▌         | 133/2660 [02:42<25:32,  1.65it/s]

{'eval_runtime': 49.3602, 'eval_samples_per_second': 21.414, 'eval_steps_per_second': 2.694, 'epoch': 1.0}


 10%|▉         | 265/2660 [04:15<21:48,  1.83it/s]   The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 1057
  Batch size = 8

 10%|█         | 266/2660 [04:48<21:47,  1.83it/s]

{'eval_runtime': 32.9456, 'eval_samples_per_second': 32.083, 'eval_steps_per_second': 4.037, 'epoch': 2.0}


 15%|█▌        | 399/2660 [06:04<16:49,  2.24it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 1057
  Batch size = 8

 15%|█▌        | 399/2660 [06:39<16:49,  2.24it/s]

{'eval_runtime': 35.7247, 'eval_samples_per_second': 29.587, 'eval_steps_per_second': 3.723, 'epoch': 3.0}


 19%|█▉        | 500/2660 [07:37<19:50,  1.81it/s]  Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-500
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-500/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapt

{'loss': 1.9182, 'learning_rate': 8.120300751879699e-05, 'epoch': 3.76}


 20%|█▉        | 531/2660 [07:54<19:26,  1.83it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 1057
  Batch size = 8

 20%|██        | 532/2660 [08:35<19:25,  1.83it/s]

{'eval_runtime': 40.8015, 'eval_samples_per_second': 25.906, 'eval_steps_per_second': 3.26, 'epoch': 4.0}


 25%|██▌       | 665/2660 [10:20<19:18,  1.72it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 1057
  Batch size = 8

 25%|██▌       | 665/2660 [11:10<19:18,  1.72it/s]

{'eval_runtime': 50.0919, 'eval_samples_per_second': 21.101, 'eval_steps_per_second': 2.655, 'epoch': 5.0}


 30%|███       | 798/2660 [12:55<18:14,  1.70it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 1057
  Batch size = 8

 30%|███       | 798/2660 [13:43<18:14,  1.70it/s]

{'eval_runtime': 47.7999, 'eval_samples_per_second': 22.113, 'eval_steps_per_second': 2.782, 'epoch': 6.0}


 35%|███▌      | 931/2660 [15:33<18:06,  1.59it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 1057
  Batch size = 8

 35%|███▌      | 931/2660 [16:22<18:06,  1.59it/s]

{'eval_runtime': 49.6357, 'eval_samples_per_second': 21.295, 'eval_steps_per_second': 2.68, 'epoch': 7.0}


 38%|███▊      | 1000/2660 [17:20<22:12,  1.25it/s] Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-1000
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1000/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cas

{'loss': 1.1913, 'learning_rate': 6.240601503759398e-05, 'epoch': 7.52}


 40%|████      | 1064/2660 [18:18<18:41,  1.42it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 1057
  Batch size = 8

 40%|████      | 1064/2660 [19:16<18:41,  1.42it/s]

{'eval_runtime': 58.4901, 'eval_samples_per_second': 18.071, 'eval_steps_per_second': 2.274, 'epoch': 8.0}


 45%|████▌     | 1197/2660 [21:20<17:53,  1.36it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 1057
  Batch size = 8

 45%|████▌     | 1197/2660 [22:21<17:53,  1.36it/s]

{'eval_runtime': 61.6102, 'eval_samples_per_second': 17.156, 'eval_steps_per_second': 2.159, 'epoch': 9.0}


 50%|█████     | 1330/2660 [24:15<13:38,  1.62it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 1057
  Batch size = 8

 50%|█████     | 1330/2660 [25:10<13:38,  1.62it/s]

{'eval_runtime': 54.9849, 'eval_samples_per_second': 19.223, 'eval_steps_per_second': 2.419, 'epoch': 10.0}


 55%|█████▌    | 1463/2660 [27:06<11:25,  1.74it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 1057
  Batch size = 8

 55%|█████▌    | 1463/2660 [27:55<11:25,  1.74it/s]

{'eval_runtime': 49.6088, 'eval_samples_per_second': 21.307, 'eval_steps_per_second': 2.681, 'epoch': 11.0}


 56%|█████▋    | 1500/2660 [28:24<14:36,  1.32it/s]  Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-1500
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-1500/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-ca

{'loss': 0.8511, 'learning_rate': 4.3609022556390975e-05, 'epoch': 11.28}


 60%|██████    | 1596/2660 [29:39<11:15,  1.57it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 1057
  Batch size = 8

 60%|██████    | 1596/2660 [30:28<11:15,  1.57it/s]

{'eval_runtime': 48.6727, 'eval_samples_per_second': 21.716, 'eval_steps_per_second': 2.733, 'epoch': 12.0}


 65%|██████▌   | 1729/2660 [32:14<09:11,  1.69it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 1057
  Batch size = 8

 65%|██████▌   | 1729/2660 [33:02<09:11,  1.69it/s]

{'eval_runtime': 48.5519, 'eval_samples_per_second': 21.771, 'eval_steps_per_second': 2.739, 'epoch': 13.0}


 70%|███████   | 1862/2660 [34:45<07:46,  1.71it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 1057
  Batch size = 8

 70%|███████   | 1862/2660 [35:32<07:46,  1.71it/s]

{'eval_runtime': 47.3065, 'eval_samples_per_second': 22.344, 'eval_steps_per_second': 2.811, 'epoch': 14.0}


 75%|███████▌  | 1995/2660 [37:15<06:27,  1.72it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 1057
  Batch size = 8

 75%|███████▌  | 1995/2660 [38:01<06:27,  1.72it/s]

{'eval_runtime': 46.3453, 'eval_samples_per_second': 22.807, 'eval_steps_per_second': 2.87, 'epoch': 15.0}


 75%|███████▌  | 2000/2660 [38:05<44:42,  4.07s/it]  Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-2000
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-2000/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-2000/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-2000/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-2000/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-2000/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-2000/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-2000/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-ca

{'loss': 0.664, 'learning_rate': 2.4812030075187968e-05, 'epoch': 15.04}


 80%|████████  | 2128/2660 [39:45<05:30,  1.61it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 1057
  Batch size = 8

 80%|████████  | 2128/2660 [40:36<05:30,  1.61it/s]

{'eval_runtime': 50.3618, 'eval_samples_per_second': 20.988, 'eval_steps_per_second': 2.641, 'epoch': 16.0}


 85%|████████▌ | 2261/2660 [42:26<04:07,  1.61it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 1057
  Batch size = 8

 85%|████████▌ | 2261/2660 [43:17<04:07,  1.61it/s]

{'eval_runtime': 50.6174, 'eval_samples_per_second': 20.882, 'eval_steps_per_second': 2.628, 'epoch': 17.0}


 90%|█████████ | 2394/2660 [45:05<02:44,  1.62it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 1057
  Batch size = 8

 90%|█████████ | 2394/2660 [45:55<02:44,  1.62it/s]

{'eval_runtime': 50.2975, 'eval_samples_per_second': 21.015, 'eval_steps_per_second': 2.644, 'epoch': 18.0}


 94%|█████████▍| 2500/2660 [47:22<01:59,  1.34it/s]  Saving model checkpoint to bert-base-multilingual-cased-adapter-xquad/checkpoint-2500
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-2500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-2500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-2500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-2500/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-2500/xquad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-2500/xquad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-xquad/checkpoint-2500/xquad_adapter/head_config.json
Module weights saved in bert-base-multilingual-ca

{'loss': 0.5444, 'learning_rate': 6.015037593984962e-06, 'epoch': 18.8}


 95%|█████████▌| 2527/2660 [47:44<01:21,  1.64it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 1057
  Batch size = 8

 95%|█████████▌| 2527/2660 [48:34<01:21,  1.64it/s]

{'eval_runtime': 50.6597, 'eval_samples_per_second': 20.865, 'eval_steps_per_second': 2.625, 'epoch': 19.0}


100%|██████████| 2660/2660 [50:25<00:00,  1.56it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Evaluation *****
  Num examples = 1057
  Batch size = 8

100%|██████████| 2660/2660 [51:17<00:00,  1.56it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


100%|██████████| 2660/2660 [51:17<00:00,  1.16s/it]
Configuration saved in ../adapter_qa_th_mbert_20/adapter_config.json
Module weights saved in ../adapter_qa_th_mbert_20/pytorch_adapter.bin
Configuration saved in ../adapter_qa_th_mbert_20/head_config.json
Module weights saved in ../adapter_qa_th_mbert_20/pytorch_model_head.bin


{'eval_runtime': 51.7018, 'eval_samples_per_second': 20.444, 'eval_steps_per_second': 2.572, 'epoch': 20.0}
{'train_runtime': 3077.3133, 'train_samples_per_second': 6.87, 'train_steps_per_second': 0.864, 'train_loss': 0.9997729136531515, 'epoch': 20.0}


In [None]:
from transformers import pipeline

# Replace this with your own checkpoint
# model_checkpoint = adapter_checkpoint
question_answerer = pipeline("question-answering", model=model, tokenizer=tokenizer)

context = """
? Transformers is backed by the three most popular deep learning libraries ? Jax, PyTorch and TensorFlow ? with a seamless integration
between them. It's straightforward to train your models with one before loading them for inference with the other.
"""
question = "What is backed by deep learning libraries?"
question_answerer(question=question, context=context)

In [5]:
def get_predictions(dataset):
    
    predictions = []
    for example in tqdm(dataset):
        question = example['question']
        context = example['context']
        prediction = question_answerer(question=question, context=context)

        predictions.append(prediction)
    
    return predictions

In [6]:
# Need to convert the variables so that they can be used by the evaluation.compute function
def convert_for_evaluation(predictions, examples):
    ref = []
    pred = []
    for i, id in enumerate(examples['id']):
        ref.append({
            'answers': examples['answers'][i],
            'id': examples['id'][i]
        })
        pred.append({
            'prediction_text': predictions[i]['answer'],
            'id': examples['id'][i]
        })
        
    return pred, ref

In [7]:
squad_metric = evaluate.load("squad")
results_en2lang = {}
results_en = {}

for lang in langs:
    test = load_from_disk(f'../data/xquad_{lang}_test.hf/')

    adapter_name = f'xquad_adapter'
    model.load_adapter(f"../adapter_weights/adapter_qa_{lang}_mbert_20")
    model.train_adapter(adapter_name)
    model.set_active_adapters(adapter_name)

    question_answerer = pipeline("question-answering", model=model, tokenizer=tokenizer)
    print(f'Running predictions for {lang}')
    predictions = get_predictions(test)

    predictions, references = convert_for_evaluation(predictions, test) 
    res = squad_metric.compute(predictions=predictions, references=references)
    results_en2lang[lang] = res

Running predictions for en


  tensor = as_tensor(value)
  p_mask = np.asarray(
100%|██████████| 238/238 [00:49<00:00,  4.77it/s]
Overwriting existing adapter 'xquad_adapter'.


Running predictions for es


100%|██████████| 238/238 [01:02<00:00,  3.78it/s]
Overwriting existing adapter 'xquad_adapter'.


Running predictions for de


100%|██████████| 238/238 [00:53<00:00,  4.41it/s]
Overwriting existing adapter 'xquad_adapter'.


Running predictions for el


100%|██████████| 238/238 [02:07<00:00,  1.87it/s]
Overwriting existing adapter 'xquad_adapter'.


Running predictions for ru


100%|██████████| 238/238 [01:24<00:00,  2.81it/s]
Overwriting existing adapter 'xquad_adapter'.


Running predictions for tr


100%|██████████| 238/238 [01:12<00:00,  3.28it/s]
Overwriting existing adapter 'xquad_adapter'.


Running predictions for ar


100%|██████████| 238/238 [01:11<00:00,  3.32it/s]
Overwriting existing adapter 'xquad_adapter'.


Running predictions for vi


100%|██████████| 238/238 [01:03<00:00,  3.72it/s]
Overwriting existing adapter 'xquad_adapter'.


Running predictions for zh


100%|██████████| 238/238 [01:12<00:00,  3.27it/s]
Overwriting existing adapter 'xquad_adapter'.


Running predictions for hi


100%|██████████| 238/238 [01:36<00:00,  2.46it/s]
Overwriting existing adapter 'xquad_adapter'.


Running predictions for ro


100%|██████████| 238/238 [01:18<00:00,  3.01it/s]
Overwriting existing adapter 'xquad_adapter'.


Running predictions for th


100%|██████████| 238/238 [03:20<00:00,  1.19it/s]


In [8]:
results_en2lang

{'en': {'exact_match': 42.016806722689076, 'f1': 52.958167338419464},
 'es': {'exact_match': 29.831932773109244, 'f1': 51.29742386700498},
 'de': {'exact_match': 34.45378151260504, 'f1': 46.94773980273347},
 'el': {'exact_match': 31.092436974789916, 'f1': 43.02789647327463},
 'ru': {'exact_match': 34.87394957983193, 'f1': 48.446618379755726},
 'tr': {'exact_match': 25.630252100840337, 'f1': 39.659607432716676},
 'ar': {'exact_match': 26.050420168067227, 'f1': 41.64258127493421},
 'vi': {'exact_match': 32.773109243697476, 'f1': 52.76812381551958},
 'zh': {'exact_match': 31.092436974789916, 'f1': 40.01400560224091},
 'hi': {'exact_match': 31.932773109243698, 'f1': 46.186680435967574},
 'ro': {'exact_match': 37.39495798319328, 'f1': 53.1701333820386},
 'th': {'exact_match': 36.134453781512605, 'f1': 41.9467787114846}}