In [13]:
from transformers import AutoTokenizer
from transformers import TrainingArguments
from transformers import AdapterTrainer
from datasets import load_dataset, load_from_disk
from transformers import AutoModelForQuestionAnswering
from torch.utils.data import DataLoader
from transformers import default_data_collator
import evaluate
from tqdm import tqdm
from transformers import pipeline

In [2]:
model_checkpoint = "bert-base-multilingual-cased"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

In [3]:
max_length = 512
stride = 128

def preprocess_training_examples(examples):
    questions = [q.strip() for q in examples["question"]]
    context = examples["context"]
    
    # Tokenize question and context together into one input,
    # they will seperable with a special token between them.
    # Tokenizer will also split context into multiple chuncks,
    # if the max_length is exceeded.
    inputs = tokenizer(
        questions,
        context,
        max_length=max_length,
        truncation="only_second",
        stride=stride,
        return_overflowing_tokens=True,
        return_offsets_mapping=True,
        padding="max_length",
    )

    # offset_mapping is the result of the split into
    # multiple chunks
    offset_mapping = inputs.pop("offset_mapping")
    sample_map = inputs.pop("overflow_to_sample_mapping")
    answers = examples["answers"]
    start_positions = []
    end_positions = []

    # This step is required to see if an answer is inside
    # the context chunks or not. It labels the multiple chunks
    # generated by the tokenizer into either not having
    # the answer, or where the answer is located
    for i, offset in enumerate(offset_mapping):
        sample_idx = sample_map[i]
        answer = answers[sample_idx]
        start_char = answer["answer_start"][0]
        end_char = answer["answer_start"][0] + len(answer["text"][0])
        sequence_ids = inputs.sequence_ids(i)

        # Find the start and end of the context
        idx = 0
        while sequence_ids[idx] != 1:
            idx += 1
        context_start = idx
        while sequence_ids[idx] == 1:
            idx += 1
        context_end = idx - 1

        # If the answer is not fully inside the context, label is (0, 0)
        if offset[context_start][0] > start_char or offset[context_end][1] < end_char:
            start_positions.append(0)
            end_positions.append(0)
        else:
            # Otherwise it's the start and end token positions
            idx = context_start
            while idx <= context_end and offset[idx][0] <= start_char:
                idx += 1
            start_positions.append(idx - 1)

            idx = context_end
            while idx >= context_start and offset[idx][1] >= end_char:
                idx -= 1
            end_positions.append(idx + 1)

    inputs["start_positions"] = start_positions
    inputs["end_positions"] = end_positions
    return inputs

In [4]:
def preprocess_validation_examples(examples):
    questions = [q.strip() for q in examples["question"]]
    context = examples["context"]
    
    # It's the same thing as with the preprocess_training_examples
    # tokenizer, but here 
    inputs = tokenizer(
        questions,
        context,
        max_length=max_length,
        truncation="only_second",
        stride=stride,
        return_overflowing_tokens=True,
        return_offsets_mapping=True,
        padding="max_length",
    )

    sample_map = inputs.pop("overflow_to_sample_mapping")
    example_ids = []

    for i in range(len(inputs["input_ids"])):
        sample_idx = sample_map[i]
        example_ids.append(examples["id"][sample_idx])

        sequence_ids = inputs.sequence_ids(i)
        offset = inputs["offset_mapping"][i]
        inputs["offset_mapping"][i] = [
            o if sequence_ids[k] == 1 else None for k, o in enumerate(offset)
        ]

    inputs["example_id"] = example_ids
    return inputs

In [5]:
langs = ["en", "es", "de", "el", "ru", "tr", "ar", "vi", "zh", "hi", "ro", "th"]

In [6]:
test = load_from_disk('../data/xquad_es_test.hf/')

In [11]:
model = AutoModelForQuestionAnswering.from_pretrained(model_checkpoint)

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForQuestionAnswering: ['cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at bert-bas

In [8]:
batch_size = 8
epochs = 15

In [10]:
for lang in langs:
    train = load_from_disk(f'../data/xquad_{lang}_train.hf/')
    val = load_from_disk(f'../data/xquad_{lang}_val.hf/')
    train_dataset = train.map(
        preprocess_training_examples,
        batched=True,
        remove_columns=train.column_names,
    )

    validation_dataset = val.map(
        preprocess_validation_examples,
        batched=True,
        remove_columns=val.column_names,
    )
    train_dataset.set_format("torch")
    validation_set = validation_dataset.remove_columns(["example_id", "offset_mapping"])
    validation_set.set_format("torch")

    train_dataloader = DataLoader(
        train_dataset,
        shuffle=True,
        collate_fn=default_data_collator,
        batch_size=8,
    )

    validation_dataloader = DataLoader(
        validation_dataset,
        shuffle=True,
        collate_fn=default_data_collator,
        batch_size=8,
    )
    adapter_name = 'squad_adapter'
    # model.add_adapter(adapter_name)
    model.load_adapter("../adapter_weights/adapter_qa_en_mbert_4ep/")
    model.train_adapter(adapter_name)
    model.set_active_adapters(adapter_name)
    model_name = model_checkpoint.split("/")[-1]
    args = TrainingArguments(
        f"{model_name}-adapter-squad",
        evaluation_strategy = "epoch",
        learning_rate=1e-4,
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        num_train_epochs=epochs,
        weight_decay=0.01
    )
    trainer = AdapterTrainer(
        model,
        args,
        train_dataset=train_dataset,
        eval_dataset=validation_dataset,
        data_collator=default_data_collator,
        tokenizer=tokenizer
    )
    trainer.train()
    model.save_adapter(f"../adapter_qa_en-{lang}_mbert_{epochs}", adapter_name=adapter_name)
    del train, val

100%|██████████| 1/1 [00:00<00:00,  2.98ba/s]
Loading cached processed dataset at ../data/xquad_en_val.hf/cache-3ca8755dd58ddcef.arrow
Loading module configuration from ../adapter_weights/adapter_qa_en_mbert_4ep/adapter_config.json
Overwriting existing adapter 'squad_adapter'.
Loading module weights from ../adapter_weights/adapter_qa_en_mbert_4ep/pytorch_adapter.bin
Loading module configuration from ../adapter_weights/adapter_qa_en_mbert_4ep/head_config.json
Loading module weights from ../adapter_weights/adapter_qa_en_mbert_4ep/pytorch_model_head.bin
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 727
  Num Epochs = 15
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel

{'eval_runtime': 22.0462, 'eval_samples_per_second': 32.976, 'eval_steps_per_second': 4.128, 'epoch': 1.0}


 13%|█▎        | 182/1365 [01:58<10:09,  1.94it/s] The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 727
  Batch size = 8

 13%|█▎        | 182/1365 [02:20<10:09,  1.94it/s]

{'eval_runtime': 22.1868, 'eval_samples_per_second': 32.767, 'eval_steps_per_second': 4.102, 'epoch': 2.0}


 20%|██        | 273/1365 [03:09<09:19,  1.95it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 727
  Batch size = 8

 20%|██        | 273/1365 [03:31<09:19,  1.95it/s]

{'eval_runtime': 22.299, 'eval_samples_per_second': 32.602, 'eval_steps_per_second': 4.081, 'epoch': 3.0}


 27%|██▋       | 364/1365 [04:20<08:31,  1.96it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 727
  Batch size = 8

 27%|██▋       | 364/1365 [04:42<08:31,  1.96it/s]

{'eval_runtime': 22.0855, 'eval_samples_per_second': 32.917, 'eval_steps_per_second': 4.12, 'epoch': 4.0}


 33%|███▎      | 455/1365 [05:31<07:42,  1.97it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 727
  Batch size = 8

 33%|███▎      | 455/1365 [05:53<07:42,  1.97it/s]

{'eval_runtime': 22.4418, 'eval_samples_per_second': 32.395, 'eval_steps_per_second': 4.055, 'epoch': 5.0}


 37%|███▋      | 500/1365 [06:17<07:39,  1.88it/s]  Saving model checkpoint to bert-base-multilingual-cased-adapter-squad/checkpoint-500
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/pytorch_model_head.bin
tokenizer config file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/tokenizer_config.json
Special tokens file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/special_tokens_map.json


{'loss': 0.6234, 'learning_rate': 6.336996336996337e-05, 'epoch': 5.49}


 40%|████      | 546/1365 [06:42<06:57,  1.96it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 727
  Batch size = 8

 40%|████      | 546/1365 [07:04<06:57,  1.96it/s]

{'eval_runtime': 22.06, 'eval_samples_per_second': 32.956, 'eval_steps_per_second': 4.125, 'epoch': 6.0}


 47%|████▋     | 637/1365 [08:02<07:54,  1.53it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 727
  Batch size = 8

 47%|████▋     | 637/1365 [08:27<07:54,  1.53it/s]

{'eval_runtime': 24.8894, 'eval_samples_per_second': 29.209, 'eval_steps_per_second': 3.656, 'epoch': 7.0}


 53%|█████▎    | 728/1365 [09:14<05:15,  2.02it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 727
  Batch size = 8

 53%|█████▎    | 728/1365 [09:36<05:15,  2.02it/s]

{'eval_runtime': 21.5214, 'eval_samples_per_second': 33.78, 'eval_steps_per_second': 4.228, 'epoch': 8.0}


 60%|██████    | 819/1365 [10:24<04:33,  1.99it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 727
  Batch size = 8

 60%|██████    | 819/1365 [10:45<04:33,  1.99it/s]

{'eval_runtime': 21.7674, 'eval_samples_per_second': 33.399, 'eval_steps_per_second': 4.181, 'epoch': 9.0}


 67%|██████▋   | 910/1365 [11:33<03:48,  1.99it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 727
  Batch size = 8

 67%|██████▋   | 910/1365 [11:54<03:48,  1.99it/s]

{'eval_runtime': 21.4758, 'eval_samples_per_second': 33.852, 'eval_steps_per_second': 4.237, 'epoch': 10.0}


 73%|███████▎  | 1000/1365 [12:41<03:09,  1.92it/s]Saving model checkpoint to bert-base-multilingual-cased-adapter-squad/checkpoint-1000
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/pytorch_model_head.bin
tokenizer config file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/tokenizer_config.json
Special tokens file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/special_tokens_map.json


{'loss': 0.2512, 'learning_rate': 2.673992673992674e-05, 'epoch': 10.99}


 73%|███████▎  | 1001/1365 [12:42<03:35,  1.69it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 727
  Batch size = 8

 73%|███████▎  | 1001/1365 [13:04<03:35,  1.69it/s]

{'eval_runtime': 21.7815, 'eval_samples_per_second': 33.377, 'eval_steps_per_second': 4.178, 'epoch': 11.0}


 80%|████████  | 1092/1365 [13:51<02:16,  2.00it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 727
  Batch size = 8

 80%|████████  | 1092/1365 [14:13<02:16,  2.00it/s]

{'eval_runtime': 21.7547, 'eval_samples_per_second': 33.418, 'eval_steps_per_second': 4.183, 'epoch': 12.0}


 87%|████████▋ | 1183/1365 [15:01<01:31,  1.98it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 727
  Batch size = 8

 87%|████████▋ | 1183/1365 [15:23<01:31,  1.98it/s]

{'eval_runtime': 21.7445, 'eval_samples_per_second': 33.434, 'eval_steps_per_second': 4.185, 'epoch': 13.0}


 93%|█████████▎| 1274/1365 [16:11<00:48,  1.88it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 727
  Batch size = 8

 93%|█████████▎| 1274/1365 [16:36<00:48,  1.88it/s]

{'eval_runtime': 24.5875, 'eval_samples_per_second': 29.568, 'eval_steps_per_second': 3.701, 'epoch': 14.0}


100%|██████████| 1365/1365 [17:27<00:00,  1.94it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 727
  Batch size = 8

100%|██████████| 1365/1365 [17:49<00:00,  1.94it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


100%|██████████| 1365/1365 [17:49<00:00,  1.94it/s]

{'eval_runtime': 22.5166, 'eval_samples_per_second': 32.287, 'eval_steps_per_second': 4.041, 'epoch': 15.0}
{'train_runtime': 1069.8317, 'train_samples_per_second': 10.193, 'train_steps_per_second': 1.276, 'train_loss': 0.360575433528467, 'epoch': 15.0}


100%|██████████| 1365/1365 [17:49<00:00,  1.28it/s]
Configuration saved in ../adapter_qa_en-en_mbert_15/adapter_config.json
Module weights saved in ../adapter_qa_en-en_mbert_15/pytorch_adapter.bin
Configuration saved in ../adapter_qa_en-en_mbert_15/head_config.json
Module weights saved in ../adapter_qa_en-en_mbert_15/pytorch_model_head.bin
100%|██████████| 1/1 [00:00<00:00,  3.37ba/s]
Loading cached processed dataset at ../data/xquad_es_val.hf/cache-7f4c7e3071f544fb.arrow
Loading module configuration from ../adapter_weights/adapter_qa_en_mbert_4ep/adapter_config.json
Overwriting existing adapter 'squad_adapter'.
Loading module weights from ../adapter_weights/adapter_qa_en_mbert_4ep/pytorch_adapter.bin
Loading module configuration from ../adapter_weights/adapter_qa_en_mbert_4ep/head_config.json
Loading module weights from ../adapter_weights/adapter_qa_en_mbert_4ep/pytorch_model_head.bin
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in 

{'eval_runtime': 22.8462, 'eval_samples_per_second': 31.909, 'eval_steps_per_second': 4.027, 'epoch': 1.0}


 13%|█▎        | 183/1380 [02:01<10:43,  1.86it/s] The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 13%|█▎        | 184/1380 [02:24<10:43,  1.86it/s]

{'eval_runtime': 22.6995, 'eval_samples_per_second': 32.115, 'eval_steps_per_second': 4.053, 'epoch': 2.0}


 20%|█▉        | 275/1380 [03:13<09:58,  1.85it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 20%|██        | 276/1380 [03:36<09:58,  1.85it/s]

{'eval_runtime': 22.7246, 'eval_samples_per_second': 32.08, 'eval_steps_per_second': 4.048, 'epoch': 3.0}


 27%|██▋       | 367/1380 [04:25<09:08,  1.85it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 27%|██▋       | 368/1380 [04:48<09:08,  1.85it/s]

{'eval_runtime': 22.7019, 'eval_samples_per_second': 32.112, 'eval_steps_per_second': 4.053, 'epoch': 4.0}


 33%|███▎      | 459/1380 [05:36<08:13,  1.87it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 33%|███▎      | 460/1380 [05:59<08:12,  1.87it/s]

{'eval_runtime': 22.6355, 'eval_samples_per_second': 32.206, 'eval_steps_per_second': 4.064, 'epoch': 5.0}


 36%|███▌      | 500/1380 [06:21<07:55,  1.85it/s]  Saving model checkpoint to bert-base-multilingual-cased-adapter-squad/checkpoint-500
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/pytorch_model_head.bin
tokenizer config file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/tokenizer_config.json
Special tokens file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/special_tokens_map.json


{'loss': 0.929, 'learning_rate': 6.376811594202898e-05, 'epoch': 5.43}


 40%|███▉      | 551/1380 [06:49<07:29,  1.85it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 40%|████      | 552/1380 [07:12<07:28,  1.85it/s]

{'eval_runtime': 22.7915, 'eval_samples_per_second': 31.986, 'eval_steps_per_second': 4.037, 'epoch': 6.0}


 47%|████▋     | 643/1380 [08:01<06:39,  1.84it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 47%|████▋     | 644/1380 [08:24<06:38,  1.84it/s]

{'eval_runtime': 22.8382, 'eval_samples_per_second': 31.92, 'eval_steps_per_second': 4.028, 'epoch': 7.0}


 53%|█████▎    | 735/1380 [09:13<05:47,  1.86it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 53%|█████▎    | 736/1380 [09:36<05:46,  1.86it/s]

{'eval_runtime': 22.6455, 'eval_samples_per_second': 32.192, 'eval_steps_per_second': 4.063, 'epoch': 8.0}


 60%|█████▉    | 827/1380 [10:25<04:58,  1.85it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 60%|██████    | 828/1380 [10:48<04:58,  1.85it/s]

{'eval_runtime': 22.7831, 'eval_samples_per_second': 31.997, 'eval_steps_per_second': 4.038, 'epoch': 9.0}


 67%|██████▋   | 919/1380 [11:37<04:10,  1.84it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 67%|██████▋   | 920/1380 [12:00<04:09,  1.84it/s]

{'eval_runtime': 22.9085, 'eval_samples_per_second': 31.822, 'eval_steps_per_second': 4.016, 'epoch': 10.0}


 72%|███████▏  | 1000/1380 [12:44<03:26,  1.84it/s]Saving model checkpoint to bert-base-multilingual-cased-adapter-squad/checkpoint-1000
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/pytorch_model_head.bin
tokenizer config file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/tokenizer_config.json
Special tokens file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/special_tokens_map.json


{'loss': 0.4487, 'learning_rate': 2.753623188405797e-05, 'epoch': 10.87}


 73%|███████▎  | 1011/1380 [12:50<03:20,  1.84it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 73%|███████▎  | 1012/1380 [13:13<03:19,  1.84it/s]

{'eval_runtime': 22.7593, 'eval_samples_per_second': 32.031, 'eval_steps_per_second': 4.042, 'epoch': 11.0}


 80%|███████▉  | 1103/1380 [14:02<02:30,  1.84it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 80%|████████  | 1104/1380 [14:25<02:29,  1.84it/s]

{'eval_runtime': 22.9763, 'eval_samples_per_second': 31.728, 'eval_steps_per_second': 4.004, 'epoch': 12.0}


 87%|████████▋ | 1195/1380 [15:16<01:40,  1.83it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 87%|████████▋ | 1196/1380 [15:39<01:40,  1.83it/s]

{'eval_runtime': 22.8726, 'eval_samples_per_second': 31.872, 'eval_steps_per_second': 4.022, 'epoch': 13.0}


 93%|█████████▎| 1287/1380 [16:28<00:50,  1.85it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 93%|█████████▎| 1288/1380 [16:51<00:49,  1.85it/s]

{'eval_runtime': 22.7833, 'eval_samples_per_second': 31.997, 'eval_steps_per_second': 4.038, 'epoch': 14.0}


100%|█████████▉| 1379/1380 [17:40<00:00,  1.86it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

100%|██████████| 1380/1380 [18:03<00:00,  1.86it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


100%|██████████| 1380/1380 [18:03<00:00,  1.27it/s]
Configuration saved in ../adapter_qa_en-es_mbert_15/adapter_config.json
Module weights saved in ../adapter_qa_en-es_mbert_15/pytorch_adapter.bin
Configuration saved in ../adapter_qa_en-es_mbert_15/head_config.json
Module weights saved in ../adapter_qa_en-es_mbert_15/pytorch_model_head.bin


{'eval_runtime': 22.5768, 'eval_samples_per_second': 32.29, 'eval_steps_per_second': 4.075, 'epoch': 15.0}
{'train_runtime': 1083.4515, 'train_samples_per_second': 10.093, 'train_steps_per_second': 1.274, 'train_loss': 0.5848589302836985, 'epoch': 15.0}


100%|██████████| 1/1 [00:00<00:00,  3.08ba/s]
100%|██████████| 1/1 [00:02<00:00,  2.45s/ba]
Loading module configuration from ../adapter_weights/adapter_qa_en_mbert_4ep/adapter_config.json
Overwriting existing adapter 'squad_adapter'.
Loading module weights from ../adapter_weights/adapter_qa_en_mbert_4ep/pytorch_adapter.bin
Loading module configuration from ../adapter_weights/adapter_qa_en_mbert_4ep/head_config.json
Loading module weights from ../adapter_weights/adapter_qa_en_mbert_4ep/pytorch_model_head.bin
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 731
  Num Epochs = 15
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradien

{'eval_runtime': 22.6582, 'eval_samples_per_second': 32.262, 'eval_steps_per_second': 4.06, 'epoch': 1.0}


 13%|█▎        | 184/1380 [02:01<08:48,  2.26it/s] The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 731
  Batch size = 8

 13%|█▎        | 184/1380 [02:24<08:48,  2.26it/s]

{'eval_runtime': 22.6781, 'eval_samples_per_second': 32.234, 'eval_steps_per_second': 4.057, 'epoch': 2.0}


 20%|██        | 276/1380 [03:14<08:16,  2.22it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 731
  Batch size = 8

 20%|██        | 276/1380 [03:37<08:16,  2.22it/s]

{'eval_runtime': 23.0558, 'eval_samples_per_second': 31.706, 'eval_steps_per_second': 3.99, 'epoch': 3.0}


 27%|██▋       | 368/1380 [04:26<07:28,  2.26it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 731
  Batch size = 8

 27%|██▋       | 368/1380 [04:49<07:28,  2.26it/s]

{'eval_runtime': 23.0641, 'eval_samples_per_second': 31.694, 'eval_steps_per_second': 3.989, 'epoch': 4.0}


 33%|███▎      | 460/1380 [05:39<07:02,  2.18it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 731
  Batch size = 8

 33%|███▎      | 460/1380 [06:02<07:02,  2.18it/s]

{'eval_runtime': 23.077, 'eval_samples_per_second': 31.677, 'eval_steps_per_second': 3.987, 'epoch': 5.0}


 36%|███▌      | 500/1380 [06:24<08:07,  1.80it/s]  Saving model checkpoint to bert-base-multilingual-cased-adapter-squad/checkpoint-500
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/pytorch_model_head.bin
tokenizer config file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/tokenizer_config.json
Special tokens file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/special_tokens_map.json


{'loss': 0.9578, 'learning_rate': 6.376811594202898e-05, 'epoch': 5.43}


 40%|████      | 552/1380 [06:53<06:15,  2.21it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 731
  Batch size = 8

 40%|████      | 552/1380 [07:16<06:15,  2.21it/s]

{'eval_runtime': 22.9409, 'eval_samples_per_second': 31.865, 'eval_steps_per_second': 4.01, 'epoch': 6.0}


 47%|████▋     | 644/1380 [08:05<05:27,  2.24it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 731
  Batch size = 8

 47%|████▋     | 644/1380 [08:29<05:27,  2.24it/s]

{'eval_runtime': 23.2896, 'eval_samples_per_second': 31.387, 'eval_steps_per_second': 3.95, 'epoch': 7.0}


 53%|█████▎    | 736/1380 [09:18<04:44,  2.27it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 731
  Batch size = 8

 53%|█████▎    | 736/1380 [09:41<04:44,  2.27it/s]

{'eval_runtime': 22.8082, 'eval_samples_per_second': 32.05, 'eval_steps_per_second': 4.034, 'epoch': 8.0}


 60%|██████    | 828/1380 [10:31<04:03,  2.27it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 731
  Batch size = 8

 60%|██████    | 828/1380 [10:54<04:03,  2.27it/s]

{'eval_runtime': 22.8952, 'eval_samples_per_second': 31.928, 'eval_steps_per_second': 4.018, 'epoch': 9.0}


 67%|██████▋   | 920/1380 [11:43<03:22,  2.27it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 731
  Batch size = 8

 67%|██████▋   | 920/1380 [12:05<03:22,  2.27it/s]

{'eval_runtime': 22.4981, 'eval_samples_per_second': 32.492, 'eval_steps_per_second': 4.089, 'epoch': 10.0}


 72%|███████▏  | 1000/1380 [12:48<03:21,  1.88it/s]Saving model checkpoint to bert-base-multilingual-cased-adapter-squad/checkpoint-1000
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/pytorch_model_head.bin
tokenizer config file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/tokenizer_config.json
Special tokens file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/special_tokens_map.json


{'loss': 0.4476, 'learning_rate': 2.753623188405797e-05, 'epoch': 10.87}


 73%|███████▎  | 1012/1380 [12:55<02:42,  2.27it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 731
  Batch size = 8

 73%|███████▎  | 1012/1380 [13:17<02:42,  2.27it/s]

{'eval_runtime': 22.5434, 'eval_samples_per_second': 32.426, 'eval_steps_per_second': 4.081, 'epoch': 11.0}


 80%|████████  | 1104/1380 [14:06<02:02,  2.26it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 731
  Batch size = 8

 80%|████████  | 1104/1380 [14:28<02:02,  2.26it/s]

{'eval_runtime': 22.4723, 'eval_samples_per_second': 32.529, 'eval_steps_per_second': 4.094, 'epoch': 12.0}


 87%|████████▋ | 1196/1380 [15:18<01:21,  2.26it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 731
  Batch size = 8

 87%|████████▋ | 1196/1380 [15:40<01:21,  2.26it/s]

{'eval_runtime': 22.551, 'eval_samples_per_second': 32.415, 'eval_steps_per_second': 4.08, 'epoch': 13.0}


 93%|█████████▎| 1288/1380 [16:29<00:42,  2.19it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 731
  Batch size = 8

 93%|█████████▎| 1288/1380 [16:52<00:42,  2.19it/s]

{'eval_runtime': 23.0304, 'eval_samples_per_second': 31.741, 'eval_steps_per_second': 3.995, 'epoch': 14.0}


100%|██████████| 1380/1380 [17:42<00:00,  2.23it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 731
  Batch size = 8

100%|██████████| 1380/1380 [18:05<00:00,  2.23it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


100%|██████████| 1380/1380 [18:05<00:00,  1.27it/s]
Configuration saved in ../adapter_qa_en-de_mbert_15/adapter_config.json
Module weights saved in ../adapter_qa_en-de_mbert_15/pytorch_adapter.bin
Configuration saved in ../adapter_qa_en-de_mbert_15/head_config.json
Module weights saved in ../adapter_qa_en-de_mbert_15/pytorch_model_head.bin


{'eval_runtime': 23.0662, 'eval_samples_per_second': 31.691, 'eval_steps_per_second': 3.989, 'epoch': 15.0}
{'train_runtime': 1085.7323, 'train_samples_per_second': 10.099, 'train_steps_per_second': 1.271, 'train_loss': 0.5915993953096694, 'epoch': 15.0}


100%|██████████| 1/1 [00:00<00:00,  1.85ba/s]
100%|██████████| 1/1 [00:04<00:00,  4.07s/ba]
Loading module configuration from ../adapter_weights/adapter_qa_en_mbert_4ep/adapter_config.json
Overwriting existing adapter 'squad_adapter'.
Loading module weights from ../adapter_weights/adapter_qa_en_mbert_4ep/pytorch_adapter.bin
Loading module configuration from ../adapter_weights/adapter_qa_en_mbert_4ep/head_config.json
Loading module weights from ../adapter_weights/adapter_qa_en_mbert_4ep/pytorch_model_head.bin
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 858
  Num Epochs = 15
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradien

{'eval_runtime': 26.7009, 'eval_samples_per_second': 32.134, 'eval_steps_per_second': 4.045, 'epoch': 1.0}


 13%|█▎        | 216/1620 [02:22<09:47,  2.39it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 858
  Batch size = 8

 13%|█▎        | 216/1620 [02:48<09:47,  2.39it/s]

{'eval_runtime': 26.3808, 'eval_samples_per_second': 32.524, 'eval_steps_per_second': 4.094, 'epoch': 2.0}


 20%|██        | 324/1620 [03:45<09:07,  2.37it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 858
  Batch size = 8

 20%|██        | 324/1620 [04:11<09:07,  2.37it/s]

{'eval_runtime': 26.0472, 'eval_samples_per_second': 32.94, 'eval_steps_per_second': 4.146, 'epoch': 3.0}


 27%|██▋       | 432/1620 [05:07<08:13,  2.41it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 858
  Batch size = 8

 27%|██▋       | 432/1620 [05:33<08:13,  2.41it/s]

{'eval_runtime': 26.0134, 'eval_samples_per_second': 32.983, 'eval_steps_per_second': 4.152, 'epoch': 4.0}


 31%|███       | 500/1620 [06:09<09:46,  1.91it/s]  Saving model checkpoint to bert-base-multilingual-cased-adapter-squad/checkpoint-500
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/pytorch_model_head.bin
tokenizer config file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/tokenizer_config.json
Special tokens file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/special_tokens_map.json


{'loss': 1.1911, 'learning_rate': 6.91358024691358e-05, 'epoch': 4.63}


 33%|███▎      | 540/1620 [06:30<07:28,  2.41it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 858
  Batch size = 8

 33%|███▎      | 540/1620 [06:56<07:28,  2.41it/s]

{'eval_runtime': 26.0153, 'eval_samples_per_second': 32.981, 'eval_steps_per_second': 4.151, 'epoch': 5.0}


 40%|████      | 648/1620 [07:52<06:44,  2.41it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 858
  Batch size = 8

 40%|████      | 648/1620 [08:18<06:44,  2.41it/s]

{'eval_runtime': 26.0026, 'eval_samples_per_second': 32.997, 'eval_steps_per_second': 4.153, 'epoch': 6.0}


 47%|████▋     | 756/1620 [09:14<05:59,  2.40it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 858
  Batch size = 8

 47%|████▋     | 756/1620 [09:40<05:59,  2.40it/s]

{'eval_runtime': 26.0481, 'eval_samples_per_second': 32.939, 'eval_steps_per_second': 4.146, 'epoch': 7.0}


 53%|█████▎    | 864/1620 [10:34<04:46,  2.63it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 858
  Batch size = 8

 53%|█████▎    | 864/1620 [10:58<04:46,  2.63it/s]

{'eval_runtime': 23.7301, 'eval_samples_per_second': 36.157, 'eval_steps_per_second': 4.551, 'epoch': 8.0}


 60%|██████    | 972/1620 [11:49<04:05,  2.64it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 858
  Batch size = 8

 60%|██████    | 972/1620 [12:13<04:05,  2.64it/s]

{'eval_runtime': 23.7349, 'eval_samples_per_second': 36.149, 'eval_steps_per_second': 4.55, 'epoch': 9.0}


 62%|██████▏   | 1000/1620 [12:27<04:56,  2.09it/s] Saving model checkpoint to bert-base-multilingual-cased-adapter-squad/checkpoint-1000
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/pytorch_model_head.bin
tokenizer config file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/tokenizer_config.json
Special tokens file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/special_tokens_map.json


{'loss': 0.6118, 'learning_rate': 3.82716049382716e-05, 'epoch': 9.26}


 67%|██████▋   | 1080/1620 [13:05<03:25,  2.63it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 858
  Batch size = 8

 67%|██████▋   | 1080/1620 [13:29<03:25,  2.63it/s]

{'eval_runtime': 23.7312, 'eval_samples_per_second': 36.155, 'eval_steps_per_second': 4.551, 'epoch': 10.0}


 73%|███████▎  | 1188/1620 [14:20<02:44,  2.63it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 858
  Batch size = 8

 73%|███████▎  | 1188/1620 [14:44<02:44,  2.63it/s]

{'eval_runtime': 23.7296, 'eval_samples_per_second': 36.157, 'eval_steps_per_second': 4.551, 'epoch': 11.0}


 80%|████████  | 1296/1620 [15:35<02:03,  2.63it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 858
  Batch size = 8

 80%|████████  | 1296/1620 [15:59<02:03,  2.63it/s]

{'eval_runtime': 23.7342, 'eval_samples_per_second': 36.15, 'eval_steps_per_second': 4.55, 'epoch': 12.0}


 87%|████████▋ | 1404/1620 [16:50<01:22,  2.63it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 858
  Batch size = 8

 87%|████████▋ | 1404/1620 [17:14<01:22,  2.63it/s]

{'eval_runtime': 23.7353, 'eval_samples_per_second': 36.149, 'eval_steps_per_second': 4.55, 'epoch': 13.0}


 93%|█████████▎| 1500/1620 [18:00<00:57,  2.09it/s]Saving model checkpoint to bert-base-multilingual-cased-adapter-squad/checkpoint-1500
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1500/squad_adapter/pytorch_model_head.bin
tokenizer config file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1500/tokenizer_config.json
Special tokens file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1500/special_tokens_map.json


{'loss': 0.4033, 'learning_rate': 7.4074074074074075e-06, 'epoch': 13.89}


 93%|█████████▎| 1512/1620 [18:06<00:41,  2.62it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 858
  Batch size = 8

 93%|█████████▎| 1512/1620 [18:29<00:41,  2.62it/s]

{'eval_runtime': 23.7318, 'eval_samples_per_second': 36.154, 'eval_steps_per_second': 4.551, 'epoch': 14.0}


100%|██████████| 1620/1620 [19:21<00:00,  2.63it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 858
  Batch size = 8

100%|██████████| 1620/1620 [19:45<00:00,  2.63it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


100%|██████████| 1620/1620 [19:45<00:00,  1.37it/s]
Configuration saved in ../adapter_qa_en-el_mbert_15/adapter_config.json
Module weights saved in ../adapter_qa_en-el_mbert_15/pytorch_adapter.bin
Configuration saved in ../adapter_qa_en-el_mbert_15/head_config.json
Module weights saved in ../adapter_qa_en-el_mbert_15/pytorch_model_head.bin


{'eval_runtime': 23.732, 'eval_samples_per_second': 36.154, 'eval_steps_per_second': 4.551, 'epoch': 15.0}
{'train_runtime': 1185.0939, 'train_samples_per_second': 10.86, 'train_steps_per_second': 1.367, 'train_loss': 0.7063427042078089, 'epoch': 15.0}


100%|██████████| 1/1 [00:00<00:00,  2.34ba/s]
100%|██████████| 1/1 [00:02<00:00,  2.39s/ba]
Loading module configuration from ../adapter_weights/adapter_qa_en_mbert_4ep/adapter_config.json
Overwriting existing adapter 'squad_adapter'.
Loading module weights from ../adapter_weights/adapter_qa_en_mbert_4ep/pytorch_adapter.bin
Loading module configuration from ../adapter_weights/adapter_qa_en_mbert_4ep/head_config.json
Loading module weights from ../adapter_weights/adapter_qa_en_mbert_4ep/pytorch_model_head.bin
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 750
  Num Epochs = 15
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradien

{'eval_runtime': 20.6285, 'eval_samples_per_second': 36.358, 'eval_steps_per_second': 4.557, 'epoch': 1.0}


 13%|█▎        | 188/1410 [01:50<09:04,  2.25it/s] The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 13%|█▎        | 188/1410 [02:11<09:04,  2.25it/s]

{'eval_runtime': 20.6299, 'eval_samples_per_second': 36.355, 'eval_steps_per_second': 4.557, 'epoch': 2.0}


 20%|██        | 282/1410 [02:56<08:22,  2.25it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 20%|██        | 282/1410 [03:16<08:22,  2.25it/s]

{'eval_runtime': 20.6305, 'eval_samples_per_second': 36.354, 'eval_steps_per_second': 4.556, 'epoch': 3.0}


 27%|██▋       | 376/1410 [04:01<07:40,  2.25it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 27%|██▋       | 376/1410 [04:22<07:40,  2.25it/s]

{'eval_runtime': 20.6338, 'eval_samples_per_second': 36.348, 'eval_steps_per_second': 4.556, 'epoch': 4.0}


 33%|███▎      | 470/1410 [05:07<06:58,  2.24it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 33%|███▎      | 470/1410 [05:28<06:58,  2.24it/s]

{'eval_runtime': 20.6299, 'eval_samples_per_second': 36.355, 'eval_steps_per_second': 4.556, 'epoch': 5.0}


 35%|███▌      | 500/1410 [05:42<07:15,  2.09it/s]  Saving model checkpoint to bert-base-multilingual-cased-adapter-squad/checkpoint-500
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/pytorch_model_head.bin
tokenizer config file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/tokenizer_config.json
Special tokens file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/special_tokens_map.json


{'loss': 0.9799, 'learning_rate': 6.453900709219859e-05, 'epoch': 5.32}


 40%|████      | 564/1410 [06:13<06:16,  2.24it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 40%|████      | 564/1410 [06:34<06:16,  2.24it/s]

{'eval_runtime': 20.6295, 'eval_samples_per_second': 36.356, 'eval_steps_per_second': 4.557, 'epoch': 6.0}


 47%|████▋     | 658/1410 [07:19<05:34,  2.25it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 47%|████▋     | 658/1410 [07:39<05:34,  2.25it/s]

{'eval_runtime': 20.634, 'eval_samples_per_second': 36.348, 'eval_steps_per_second': 4.556, 'epoch': 7.0}


 53%|█████▎    | 752/1410 [08:24<04:53,  2.24it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 53%|█████▎    | 752/1410 [08:45<04:53,  2.24it/s]

{'eval_runtime': 20.6307, 'eval_samples_per_second': 36.354, 'eval_steps_per_second': 4.556, 'epoch': 8.0}


 60%|██████    | 846/1410 [09:30<04:11,  2.24it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 60%|██████    | 846/1410 [09:51<04:11,  2.24it/s]

{'eval_runtime': 20.6321, 'eval_samples_per_second': 36.351, 'eval_steps_per_second': 4.556, 'epoch': 9.0}


 67%|██████▋   | 940/1410 [10:36<03:29,  2.25it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 67%|██████▋   | 940/1410 [10:56<03:29,  2.25it/s]

{'eval_runtime': 20.6303, 'eval_samples_per_second': 36.354, 'eval_steps_per_second': 4.556, 'epoch': 10.0}


 71%|███████   | 1000/1410 [11:25<03:16,  2.09it/s]Saving model checkpoint to bert-base-multilingual-cased-adapter-squad/checkpoint-1000
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/pytorch_model_head.bin
tokenizer config file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/tokenizer_config.json
Special tokens file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/special_tokens_map.json


{'loss': 0.4665, 'learning_rate': 2.9078014184397162e-05, 'epoch': 10.64}


 73%|███████▎  | 1034/1410 [11:41<02:47,  2.25it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 73%|███████▎  | 1034/1410 [12:02<02:47,  2.25it/s]

{'eval_runtime': 20.6357, 'eval_samples_per_second': 36.345, 'eval_steps_per_second': 4.555, 'epoch': 11.0}


 80%|████████  | 1128/1410 [12:47<02:05,  2.25it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 80%|████████  | 1128/1410 [13:08<02:05,  2.25it/s]

{'eval_runtime': 20.6356, 'eval_samples_per_second': 36.345, 'eval_steps_per_second': 4.555, 'epoch': 12.0}


 87%|████████▋ | 1222/1410 [13:53<01:23,  2.24it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 87%|████████▋ | 1222/1410 [14:13<01:23,  2.24it/s]

{'eval_runtime': 20.6287, 'eval_samples_per_second': 36.357, 'eval_steps_per_second': 4.557, 'epoch': 13.0}


 93%|█████████▎| 1316/1410 [14:58<00:41,  2.25it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 93%|█████████▎| 1316/1410 [15:19<00:41,  2.25it/s]

{'eval_runtime': 20.6323, 'eval_samples_per_second': 36.351, 'eval_steps_per_second': 4.556, 'epoch': 14.0}


100%|██████████| 1410/1410 [16:04<00:00,  2.24it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

100%|██████████| 1410/1410 [16:25<00:00,  2.24it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


100%|██████████| 1410/1410 [16:25<00:00,  1.43it/s]
Configuration saved in ../adapter_qa_en-ru_mbert_15/adapter_config.json
Module weights saved in ../adapter_qa_en-ru_mbert_15/pytorch_adapter.bin
Configuration saved in ../adapter_qa_en-ru_mbert_15/head_config.json
Module weights saved in ../adapter_qa_en-ru_mbert_15/pytorch_model_head.bin


{'eval_runtime': 20.6326, 'eval_samples_per_second': 36.35, 'eval_steps_per_second': 4.556, 'epoch': 15.0}
{'train_runtime': 985.4237, 'train_samples_per_second': 11.416, 'train_steps_per_second': 1.431, 'train_loss': 0.599484983403632, 'epoch': 15.0}


100%|██████████| 1/1 [00:00<00:00,  3.56ba/s]
100%|██████████| 1/1 [00:02<00:00,  2.40s/ba]
Loading module configuration from ../adapter_weights/adapter_qa_en_mbert_4ep/adapter_config.json
Overwriting existing adapter 'squad_adapter'.
Loading module weights from ../adapter_weights/adapter_qa_en_mbert_4ep/pytorch_adapter.bin
Loading module configuration from ../adapter_weights/adapter_qa_en_mbert_4ep/head_config.json
Loading module weights from ../adapter_weights/adapter_qa_en_mbert_4ep/pytorch_model_head.bin
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 746
  Num Epochs = 15
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradien

{'eval_runtime': 20.6316, 'eval_samples_per_second': 36.158, 'eval_steps_per_second': 4.556, 'epoch': 1.0}


 13%|█▎        | 188/1410 [01:50<07:44,  2.63it/s] The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 746
  Batch size = 8

 13%|█▎        | 188/1410 [02:10<07:44,  2.63it/s]

{'eval_runtime': 20.6294, 'eval_samples_per_second': 36.162, 'eval_steps_per_second': 4.557, 'epoch': 2.0}


 20%|██        | 282/1410 [02:55<07:09,  2.63it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 746
  Batch size = 8

 20%|██        | 282/1410 [03:16<07:09,  2.63it/s]

{'eval_runtime': 20.6338, 'eval_samples_per_second': 36.154, 'eval_steps_per_second': 4.556, 'epoch': 3.0}


 27%|██▋       | 376/1410 [04:00<06:32,  2.64it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 746
  Batch size = 8

 27%|██▋       | 376/1410 [04:21<06:32,  2.64it/s]

{'eval_runtime': 20.6372, 'eval_samples_per_second': 36.148, 'eval_steps_per_second': 4.555, 'epoch': 4.0}


 33%|███▎      | 470/1410 [05:06<05:56,  2.63it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 746
  Batch size = 8

 33%|███▎      | 470/1410 [05:26<05:56,  2.63it/s]

{'eval_runtime': 20.6283, 'eval_samples_per_second': 36.164, 'eval_steps_per_second': 4.557, 'epoch': 5.0}


 35%|███▌      | 500/1410 [05:41<07:15,  2.09it/s]  Saving model checkpoint to bert-base-multilingual-cased-adapter-squad/checkpoint-500
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/pytorch_model_head.bin
tokenizer config file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/tokenizer_config.json
Special tokens file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/special_tokens_map.json


{'loss': 1.2558, 'learning_rate': 6.453900709219859e-05, 'epoch': 5.32}


 40%|████      | 564/1410 [06:11<05:21,  2.63it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 746
  Batch size = 8

 40%|████      | 564/1410 [06:32<05:21,  2.63it/s]

{'eval_runtime': 20.6331, 'eval_samples_per_second': 36.155, 'eval_steps_per_second': 4.556, 'epoch': 6.0}


 47%|████▋     | 658/1410 [07:17<04:45,  2.63it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 746
  Batch size = 8

 47%|████▋     | 658/1410 [07:37<04:45,  2.63it/s]

{'eval_runtime': 20.6267, 'eval_samples_per_second': 36.167, 'eval_steps_per_second': 4.557, 'epoch': 7.0}


 53%|█████▎    | 752/1410 [08:22<04:09,  2.63it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 746
  Batch size = 8

 53%|█████▎    | 752/1410 [08:42<04:09,  2.63it/s]

{'eval_runtime': 20.628, 'eval_samples_per_second': 36.165, 'eval_steps_per_second': 4.557, 'epoch': 8.0}


 60%|██████    | 846/1410 [09:27<03:34,  2.63it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 746
  Batch size = 8

 60%|██████    | 846/1410 [09:48<03:34,  2.63it/s]

{'eval_runtime': 20.6321, 'eval_samples_per_second': 36.157, 'eval_steps_per_second': 4.556, 'epoch': 9.0}


 67%|██████▋   | 940/1410 [10:33<02:58,  2.63it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 746
  Batch size = 8

 67%|██████▋   | 940/1410 [10:53<02:58,  2.63it/s]

{'eval_runtime': 20.629, 'eval_samples_per_second': 36.163, 'eval_steps_per_second': 4.557, 'epoch': 10.0}


 71%|███████   | 1000/1410 [11:22<03:16,  2.09it/s]Saving model checkpoint to bert-base-multilingual-cased-adapter-squad/checkpoint-1000
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/pytorch_model_head.bin
tokenizer config file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/tokenizer_config.json
Special tokens file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/special_tokens_map.json


{'loss': 0.6477, 'learning_rate': 2.9078014184397162e-05, 'epoch': 10.64}


 73%|███████▎  | 1034/1410 [11:38<02:22,  2.63it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 746
  Batch size = 8

 73%|███████▎  | 1034/1410 [11:59<02:22,  2.63it/s]

{'eval_runtime': 20.6367, 'eval_samples_per_second': 36.149, 'eval_steps_per_second': 4.555, 'epoch': 11.0}


 80%|████████  | 1128/1410 [12:43<01:47,  2.63it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 746
  Batch size = 8

 80%|████████  | 1128/1410 [13:04<01:47,  2.63it/s]

{'eval_runtime': 20.6266, 'eval_samples_per_second': 36.167, 'eval_steps_per_second': 4.557, 'epoch': 12.0}


 87%|████████▋ | 1222/1410 [13:49<01:11,  2.63it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 746
  Batch size = 8

 87%|████████▋ | 1222/1410 [14:09<01:11,  2.63it/s]

{'eval_runtime': 20.6288, 'eval_samples_per_second': 36.163, 'eval_steps_per_second': 4.557, 'epoch': 13.0}


 93%|█████████▎| 1316/1410 [14:54<00:35,  2.64it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 746
  Batch size = 8

 93%|█████████▎| 1316/1410 [15:15<00:35,  2.64it/s]

{'eval_runtime': 20.631, 'eval_samples_per_second': 36.159, 'eval_steps_per_second': 4.556, 'epoch': 14.0}


100%|██████████| 1410/1410 [16:00<00:00,  2.63it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 746
  Batch size = 8

100%|██████████| 1410/1410 [16:20<00:00,  2.63it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


100%|██████████| 1410/1410 [16:20<00:00,  1.44it/s]
Configuration saved in ../adapter_qa_en-tr_mbert_15/adapter_config.json
Module weights saved in ../adapter_qa_en-tr_mbert_15/pytorch_adapter.bin
Configuration saved in ../adapter_qa_en-tr_mbert_15/head_config.json
Module weights saved in ../adapter_qa_en-tr_mbert_15/pytorch_model_head.bin


{'eval_runtime': 20.6262, 'eval_samples_per_second': 36.168, 'eval_steps_per_second': 4.557, 'epoch': 15.0}
{'train_runtime': 980.7048, 'train_samples_per_second': 11.41, 'train_steps_per_second': 1.438, 'train_loss': 0.8067661041909076, 'epoch': 15.0}


100%|██████████| 1/1 [00:00<00:00,  3.36ba/s]
100%|██████████| 1/1 [00:02<00:00,  2.55s/ba]
Loading module configuration from ../adapter_weights/adapter_qa_en_mbert_4ep/adapter_config.json
Overwriting existing adapter 'squad_adapter'.
Loading module weights from ../adapter_weights/adapter_qa_en_mbert_4ep/pytorch_adapter.bin
Loading module configuration from ../adapter_weights/adapter_qa_en_mbert_4ep/head_config.json
Loading module weights from ../adapter_weights/adapter_qa_en_mbert_4ep/pytorch_model_head.bin
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 758
  Num Epochs = 15
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradien

{'eval_runtime': 20.8478, 'eval_samples_per_second': 36.359, 'eval_steps_per_second': 4.557, 'epoch': 1.0}


 13%|█▎        | 190/1425 [01:51<09:10,  2.24it/s] The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 758
  Batch size = 8

 13%|█▎        | 190/1425 [02:12<09:10,  2.24it/s]

{'eval_runtime': 20.8498, 'eval_samples_per_second': 36.355, 'eval_steps_per_second': 4.556, 'epoch': 2.0}


 20%|██        | 285/1425 [02:58<08:27,  2.24it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 758
  Batch size = 8

 20%|██        | 285/1425 [03:18<08:27,  2.24it/s]

{'eval_runtime': 20.8522, 'eval_samples_per_second': 36.351, 'eval_steps_per_second': 4.556, 'epoch': 3.0}


 27%|██▋       | 380/1425 [04:04<07:45,  2.24it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 758
  Batch size = 8

 27%|██▋       | 380/1425 [04:25<07:45,  2.24it/s]

{'eval_runtime': 20.8519, 'eval_samples_per_second': 36.352, 'eval_steps_per_second': 4.556, 'epoch': 4.0}


 33%|███▎      | 475/1425 [05:10<07:02,  2.25it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 758
  Batch size = 8

 33%|███▎      | 475/1425 [05:31<07:02,  2.25it/s]

{'eval_runtime': 20.8511, 'eval_samples_per_second': 36.353, 'eval_steps_per_second': 4.556, 'epoch': 5.0}


 35%|███▌      | 500/1425 [05:43<07:23,  2.08it/s]  Saving model checkpoint to bert-base-multilingual-cased-adapter-squad/checkpoint-500
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/pytorch_model_head.bin
tokenizer config file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/tokenizer_config.json
Special tokens file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/special_tokens_map.json


{'loss': 1.3313, 'learning_rate': 6.49122807017544e-05, 'epoch': 5.26}


 40%|████      | 570/1425 [06:17<06:21,  2.24it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 758
  Batch size = 8

 40%|████      | 570/1425 [06:38<06:21,  2.24it/s]

{'eval_runtime': 20.8508, 'eval_samples_per_second': 36.354, 'eval_steps_per_second': 4.556, 'epoch': 6.0}


 47%|████▋     | 665/1425 [07:23<05:38,  2.24it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 758
  Batch size = 8

 47%|████▋     | 665/1425 [07:44<05:38,  2.24it/s]

{'eval_runtime': 20.853, 'eval_samples_per_second': 36.35, 'eval_steps_per_second': 4.556, 'epoch': 7.0}


 53%|█████▎    | 760/1425 [08:30<04:56,  2.25it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 758
  Batch size = 8

 53%|█████▎    | 760/1425 [08:51<04:56,  2.25it/s]

{'eval_runtime': 20.8498, 'eval_samples_per_second': 36.355, 'eval_steps_per_second': 4.556, 'epoch': 8.0}


 60%|██████    | 855/1425 [09:36<04:13,  2.25it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 758
  Batch size = 8

 60%|██████    | 855/1425 [09:57<04:13,  2.25it/s]

{'eval_runtime': 20.8545, 'eval_samples_per_second': 36.347, 'eval_steps_per_second': 4.555, 'epoch': 9.0}


 67%|██████▋   | 950/1425 [10:42<03:31,  2.24it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 758
  Batch size = 8

 67%|██████▋   | 950/1425 [11:03<03:31,  2.24it/s]

{'eval_runtime': 20.8531, 'eval_samples_per_second': 36.35, 'eval_steps_per_second': 4.556, 'epoch': 10.0}


 70%|███████   | 1000/1425 [11:27<03:23,  2.09it/s]Saving model checkpoint to bert-base-multilingual-cased-adapter-squad/checkpoint-1000
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/pytorch_model_head.bin
tokenizer config file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/tokenizer_config.json
Special tokens file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/special_tokens_map.json


{'loss': 0.7195, 'learning_rate': 2.9824561403508772e-05, 'epoch': 10.53}


 73%|███████▎  | 1045/1425 [11:49<02:49,  2.25it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 758
  Batch size = 8

 73%|███████▎  | 1045/1425 [12:10<02:49,  2.25it/s]

{'eval_runtime': 20.8531, 'eval_samples_per_second': 36.35, 'eval_steps_per_second': 4.556, 'epoch': 11.0}


 80%|████████  | 1140/1425 [12:55<02:06,  2.24it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 758
  Batch size = 8

 80%|████████  | 1140/1425 [13:16<02:06,  2.24it/s]

{'eval_runtime': 20.855, 'eval_samples_per_second': 36.346, 'eval_steps_per_second': 4.555, 'epoch': 12.0}


 87%|████████▋ | 1235/1425 [14:02<01:24,  2.24it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 758
  Batch size = 8

 87%|████████▋ | 1235/1425 [14:23<01:24,  2.24it/s]

{'eval_runtime': 20.8515, 'eval_samples_per_second': 36.352, 'eval_steps_per_second': 4.556, 'epoch': 13.0}


 93%|█████████▎| 1330/1425 [15:08<00:42,  2.24it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 758
  Batch size = 8

 93%|█████████▎| 1330/1425 [15:29<00:42,  2.24it/s]

{'eval_runtime': 20.8546, 'eval_samples_per_second': 36.347, 'eval_steps_per_second': 4.555, 'epoch': 14.0}


100%|██████████| 1425/1425 [16:15<00:00,  2.25it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 758
  Batch size = 8

100%|██████████| 1425/1425 [16:35<00:00,  2.25it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


100%|██████████| 1425/1425 [16:36<00:00,  1.43it/s]
Configuration saved in ../adapter_qa_en-ar_mbert_15/adapter_config.json
Module weights saved in ../adapter_qa_en-ar_mbert_15/pytorch_adapter.bin
Configuration saved in ../adapter_qa_en-ar_mbert_15/head_config.json
Module weights saved in ../adapter_qa_en-ar_mbert_15/pytorch_model_head.bin


{'eval_runtime': 20.8563, 'eval_samples_per_second': 36.344, 'eval_steps_per_second': 4.555, 'epoch': 15.0}
{'train_runtime': 996.1553, 'train_samples_per_second': 11.414, 'train_steps_per_second': 1.43, 'train_loss': 0.8728424607661732, 'epoch': 15.0}


100%|██████████| 1/1 [00:00<00:00,  3.52ba/s]
100%|██████████| 1/1 [00:02<00:00,  2.24s/ba]
Loading module configuration from ../adapter_weights/adapter_qa_en_mbert_4ep/adapter_config.json
Overwriting existing adapter 'squad_adapter'.
Loading module weights from ../adapter_weights/adapter_qa_en_mbert_4ep/pytorch_adapter.bin
Loading module configuration from ../adapter_weights/adapter_qa_en_mbert_4ep/head_config.json
Loading module weights from ../adapter_weights/adapter_qa_en_mbert_4ep/pytorch_model_head.bin
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 736
  Num Epochs = 15
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradien

{'eval_runtime': 20.1827, 'eval_samples_per_second': 36.467, 'eval_steps_per_second': 4.558, 'epoch': 1.0}


 13%|█▎        | 184/1380 [01:48<09:33,  2.09it/s] The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 736
  Batch size = 8

 13%|█▎        | 184/1380 [02:08<09:33,  2.09it/s]

{'eval_runtime': 20.1848, 'eval_samples_per_second': 36.463, 'eval_steps_per_second': 4.558, 'epoch': 2.0}


 20%|██        | 276/1380 [02:52<08:49,  2.09it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 736
  Batch size = 8

 20%|██        | 276/1380 [03:13<08:49,  2.09it/s]

{'eval_runtime': 20.182, 'eval_samples_per_second': 36.468, 'eval_steps_per_second': 4.559, 'epoch': 3.0}


 27%|██▋       | 368/1380 [03:57<08:03,  2.09it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 736
  Batch size = 8

 27%|██▋       | 368/1380 [04:17<08:03,  2.09it/s]

{'eval_runtime': 20.1889, 'eval_samples_per_second': 36.456, 'eval_steps_per_second': 4.557, 'epoch': 4.0}


 33%|███▎      | 460/1380 [05:01<07:20,  2.09it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 736
  Batch size = 8

 33%|███▎      | 460/1380 [05:21<07:20,  2.09it/s]

{'eval_runtime': 20.1862, 'eval_samples_per_second': 36.461, 'eval_steps_per_second': 4.558, 'epoch': 5.0}


 36%|███▌      | 500/1380 [05:41<07:00,  2.09it/s]  Saving model checkpoint to bert-base-multilingual-cased-adapter-squad/checkpoint-500
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/pytorch_model_head.bin
tokenizer config file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/tokenizer_config.json
Special tokens file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/special_tokens_map.json


{'loss': 1.0296, 'learning_rate': 6.376811594202898e-05, 'epoch': 5.43}


 40%|████      | 552/1380 [06:06<06:35,  2.09it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 736
  Batch size = 8

 40%|████      | 552/1380 [06:26<06:35,  2.09it/s]

{'eval_runtime': 20.1824, 'eval_samples_per_second': 36.467, 'eval_steps_per_second': 4.558, 'epoch': 6.0}


 47%|████▋     | 644/1380 [07:10<05:52,  2.09it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 736
  Batch size = 8

 47%|████▋     | 644/1380 [07:31<05:52,  2.09it/s]

{'eval_runtime': 20.1853, 'eval_samples_per_second': 36.462, 'eval_steps_per_second': 4.558, 'epoch': 7.0}


 53%|█████▎    | 736/1380 [08:15<05:07,  2.09it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 736
  Batch size = 8

 53%|█████▎    | 736/1380 [08:35<05:07,  2.09it/s]

{'eval_runtime': 20.1875, 'eval_samples_per_second': 36.458, 'eval_steps_per_second': 4.557, 'epoch': 8.0}


 60%|██████    | 828/1380 [09:19<04:24,  2.09it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 736
  Batch size = 8

 60%|██████    | 828/1380 [09:39<04:24,  2.09it/s]

{'eval_runtime': 20.1847, 'eval_samples_per_second': 36.463, 'eval_steps_per_second': 4.558, 'epoch': 9.0}


 67%|██████▋   | 920/1380 [10:24<03:40,  2.09it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 736
  Batch size = 8

 67%|██████▋   | 920/1380 [10:44<03:40,  2.09it/s]

{'eval_runtime': 20.1868, 'eval_samples_per_second': 36.459, 'eval_steps_per_second': 4.557, 'epoch': 10.0}


 72%|███████▏  | 1000/1380 [11:22<03:01,  2.09it/s]Saving model checkpoint to bert-base-multilingual-cased-adapter-squad/checkpoint-1000
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/pytorch_model_head.bin
tokenizer config file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/tokenizer_config.json
Special tokens file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/special_tokens_map.json


{'loss': 0.5324, 'learning_rate': 2.753623188405797e-05, 'epoch': 10.87}


 73%|███████▎  | 1012/1380 [11:28<02:56,  2.08it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 736
  Batch size = 8

 73%|███████▎  | 1012/1380 [11:48<02:56,  2.08it/s]

{'eval_runtime': 20.1844, 'eval_samples_per_second': 36.464, 'eval_steps_per_second': 4.558, 'epoch': 11.0}


 80%|████████  | 1104/1380 [12:33<02:12,  2.09it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 736
  Batch size = 8

 80%|████████  | 1104/1380 [12:53<02:12,  2.09it/s]

{'eval_runtime': 20.184, 'eval_samples_per_second': 36.465, 'eval_steps_per_second': 4.558, 'epoch': 12.0}


 87%|████████▋ | 1196/1380 [13:37<01:28,  2.09it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 736
  Batch size = 8

 87%|████████▋ | 1196/1380 [13:57<01:28,  2.09it/s]

{'eval_runtime': 20.183, 'eval_samples_per_second': 36.466, 'eval_steps_per_second': 4.558, 'epoch': 13.0}


 93%|█████████▎| 1288/1380 [14:42<00:43,  2.09it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 736
  Batch size = 8

 93%|█████████▎| 1288/1380 [15:02<00:43,  2.09it/s]

{'eval_runtime': 20.1834, 'eval_samples_per_second': 36.466, 'eval_steps_per_second': 4.558, 'epoch': 14.0}


100%|██████████| 1380/1380 [15:46<00:00,  2.09it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 736
  Batch size = 8

100%|██████████| 1380/1380 [16:06<00:00,  2.09it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)




{'eval_runtime': 20.1849, 'eval_samples_per_second': 36.463, 'eval_steps_per_second': 4.558, 'epoch': 15.0}


100%|██████████| 1380/1380 [16:06<00:00,  1.43it/s]
Configuration saved in ../adapter_qa_en-vi_mbert_15/adapter_config.json
Module weights saved in ../adapter_qa_en-vi_mbert_15/pytorch_adapter.bin
Configuration saved in ../adapter_qa_en-vi_mbert_15/head_config.json
Module weights saved in ../adapter_qa_en-vi_mbert_15/pytorch_model_head.bin


{'train_runtime': 966.8622, 'train_samples_per_second': 11.418, 'train_steps_per_second': 1.427, 'train_loss': 0.6666272923566293, 'epoch': 15.0}


100%|██████████| 1/1 [00:00<00:00,  3.81ba/s]
100%|██████████| 1/1 [00:02<00:00,  2.24s/ba]
Loading module configuration from ../adapter_weights/adapter_qa_en_mbert_4ep/adapter_config.json
Overwriting existing adapter 'squad_adapter'.
Loading module weights from ../adapter_weights/adapter_qa_en_mbert_4ep/pytorch_adapter.bin
Loading module configuration from ../adapter_weights/adapter_qa_en_mbert_4ep/head_config.json
Loading module weights from ../adapter_weights/adapter_qa_en_mbert_4ep/pytorch_model_head.bin
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 729
  Num Epochs = 15
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradien

{'eval_runtime': 20.1854, 'eval_samples_per_second': 36.115, 'eval_steps_per_second': 4.558, 'epoch': 1.0}


 13%|█▎        | 183/1380 [01:47<09:32,  2.09it/s] The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 13%|█▎        | 184/1380 [02:07<09:32,  2.09it/s]

{'eval_runtime': 20.1838, 'eval_samples_per_second': 36.118, 'eval_steps_per_second': 4.558, 'epoch': 2.0}


 20%|█▉        | 275/1380 [02:51<08:49,  2.09it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 20%|██        | 276/1380 [03:11<08:48,  2.09it/s]

{'eval_runtime': 20.1845, 'eval_samples_per_second': 36.117, 'eval_steps_per_second': 4.558, 'epoch': 3.0}


 27%|██▋       | 367/1380 [03:55<08:04,  2.09it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 27%|██▋       | 368/1380 [04:15<08:04,  2.09it/s]

{'eval_runtime': 20.1902, 'eval_samples_per_second': 36.107, 'eval_steps_per_second': 4.557, 'epoch': 4.0}


 33%|███▎      | 459/1380 [04:58<07:20,  2.09it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 33%|███▎      | 460/1380 [05:19<07:20,  2.09it/s]

{'eval_runtime': 20.1892, 'eval_samples_per_second': 36.108, 'eval_steps_per_second': 4.557, 'epoch': 5.0}


 36%|███▌      | 500/1380 [05:38<07:00,  2.09it/s]  Saving model checkpoint to bert-base-multilingual-cased-adapter-squad/checkpoint-500
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/pytorch_model_head.bin
tokenizer config file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/tokenizer_config.json
Special tokens file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/special_tokens_map.json


{'loss': 1.2523, 'learning_rate': 6.376811594202898e-05, 'epoch': 5.43}


 40%|███▉      | 551/1380 [06:03<06:37,  2.09it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 40%|████      | 552/1380 [06:23<06:36,  2.09it/s]

{'eval_runtime': 20.1856, 'eval_samples_per_second': 36.115, 'eval_steps_per_second': 4.558, 'epoch': 6.0}


 47%|████▋     | 643/1380 [07:06<05:52,  2.09it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 47%|████▋     | 644/1380 [07:27<05:52,  2.09it/s]

{'eval_runtime': 20.1848, 'eval_samples_per_second': 36.116, 'eval_steps_per_second': 4.558, 'epoch': 7.0}


 53%|█████▎    | 735/1380 [08:10<05:08,  2.09it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 53%|█████▎    | 736/1380 [08:30<05:08,  2.09it/s]

{'eval_runtime': 20.1904, 'eval_samples_per_second': 36.106, 'eval_steps_per_second': 4.557, 'epoch': 8.0}


 60%|█████▉    | 827/1380 [09:14<04:24,  2.09it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 60%|██████    | 828/1380 [09:34<04:24,  2.09it/s]

{'eval_runtime': 20.1863, 'eval_samples_per_second': 36.114, 'eval_steps_per_second': 4.558, 'epoch': 9.0}


 67%|██████▋   | 919/1380 [10:18<03:40,  2.09it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 67%|██████▋   | 920/1380 [10:38<03:39,  2.09it/s]

{'eval_runtime': 20.1877, 'eval_samples_per_second': 36.111, 'eval_steps_per_second': 4.557, 'epoch': 10.0}


 72%|███████▏  | 1000/1380 [11:16<03:01,  2.09it/s]Saving model checkpoint to bert-base-multilingual-cased-adapter-squad/checkpoint-1000
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/pytorch_model_head.bin
tokenizer config file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/tokenizer_config.json
Special tokens file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/special_tokens_map.json


{'loss': 0.6842, 'learning_rate': 2.753623188405797e-05, 'epoch': 10.87}


 73%|███████▎  | 1011/1380 [11:22<02:57,  2.08it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 73%|███████▎  | 1012/1380 [11:42<02:57,  2.08it/s]

{'eval_runtime': 20.1897, 'eval_samples_per_second': 36.108, 'eval_steps_per_second': 4.557, 'epoch': 11.0}


 80%|███████▉  | 1103/1380 [12:26<02:12,  2.09it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 80%|████████  | 1104/1380 [12:46<02:12,  2.09it/s]

{'eval_runtime': 20.1827, 'eval_samples_per_second': 36.12, 'eval_steps_per_second': 4.558, 'epoch': 12.0}


 87%|████████▋ | 1195/1380 [13:30<01:28,  2.09it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 87%|████████▋ | 1196/1380 [13:50<01:28,  2.09it/s]

{'eval_runtime': 20.1915, 'eval_samples_per_second': 36.104, 'eval_steps_per_second': 4.556, 'epoch': 13.0}


 93%|█████████▎| 1287/1380 [14:33<00:44,  2.09it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

 93%|█████████▎| 1288/1380 [14:54<00:44,  2.09it/s]

{'eval_runtime': 20.1827, 'eval_samples_per_second': 36.12, 'eval_steps_per_second': 4.558, 'epoch': 14.0}


100%|█████████▉| 1379/1380 [15:37<00:00,  2.09it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 729
  Batch size = 8

100%|██████████| 1380/1380 [15:58<00:00,  2.09it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


100%|██████████| 1380/1380 [15:58<00:00,  1.44it/s]
Configuration saved in ../adapter_qa_en-zh_mbert_15/adapter_config.json
Module weights saved in ../adapter_qa_en-zh_mbert_15/pytorch_adapter.bin
Configuration saved in ../adapter_qa_en-zh_mbert_15/head_config.json
Module weights saved in ../adapter_qa_en-zh_mbert_15/pytorch_model_head.bin


{'eval_runtime': 20.1867, 'eval_samples_per_second': 36.113, 'eval_steps_per_second': 4.557, 'epoch': 15.0}
{'train_runtime': 958.1011, 'train_samples_per_second': 11.413, 'train_steps_per_second': 1.44, 'train_loss': 0.8392391702403192, 'epoch': 15.0}


100%|██████████| 1/1 [00:00<00:00,  3.03ba/s]
100%|██████████| 1/1 [00:02<00:00,  2.73s/ba]
Loading module configuration from ../adapter_weights/adapter_qa_en_mbert_4ep/adapter_config.json
Overwriting existing adapter 'squad_adapter'.
Loading module weights from ../adapter_weights/adapter_qa_en_mbert_4ep/pytorch_adapter.bin
Loading module configuration from ../adapter_weights/adapter_qa_en_mbert_4ep/head_config.json
Loading module weights from ../adapter_weights/adapter_qa_en_mbert_4ep/pytorch_model_head.bin
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 765
  Num Epochs = 15
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradien

{'eval_runtime': 21.0764, 'eval_samples_per_second': 36.297, 'eval_steps_per_second': 4.555, 'epoch': 1.0}


 13%|█▎        | 192/1440 [01:52<08:52,  2.35it/s] The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 765
  Batch size = 8

 13%|█▎        | 192/1440 [02:13<08:52,  2.35it/s]

{'eval_runtime': 21.0754, 'eval_samples_per_second': 36.298, 'eval_steps_per_second': 4.555, 'epoch': 2.0}


 20%|██        | 288/1440 [02:59<08:11,  2.35it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 765
  Batch size = 8

 20%|██        | 288/1440 [03:20<08:11,  2.35it/s]

{'eval_runtime': 21.0794, 'eval_samples_per_second': 36.291, 'eval_steps_per_second': 4.554, 'epoch': 3.0}


 27%|██▋       | 384/1440 [04:06<07:30,  2.34it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 765
  Batch size = 8

 27%|██▋       | 384/1440 [04:27<07:30,  2.34it/s]

{'eval_runtime': 21.0742, 'eval_samples_per_second': 36.3, 'eval_steps_per_second': 4.555, 'epoch': 4.0}


 33%|███▎      | 480/1440 [05:13<06:49,  2.35it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 765
  Batch size = 8

 33%|███▎      | 480/1440 [05:34<06:49,  2.35it/s]

{'eval_runtime': 21.0764, 'eval_samples_per_second': 36.296, 'eval_steps_per_second': 4.555, 'epoch': 5.0}


 35%|███▍      | 500/1440 [05:44<07:37,  2.06it/s]  Saving model checkpoint to bert-base-multilingual-cased-adapter-squad/checkpoint-500
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/pytorch_model_head.bin
tokenizer config file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/tokenizer_config.json
Special tokens file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/special_tokens_map.json


{'loss': 1.4234, 'learning_rate': 6.527777777777778e-05, 'epoch': 5.21}


 40%|████      | 576/1440 [06:21<06:07,  2.35it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 765
  Batch size = 8

 40%|████      | 576/1440 [06:42<06:07,  2.35it/s]

{'eval_runtime': 21.0782, 'eval_samples_per_second': 36.293, 'eval_steps_per_second': 4.554, 'epoch': 6.0}


 47%|████▋     | 672/1440 [07:28<05:27,  2.35it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 765
  Batch size = 8

 47%|████▋     | 672/1440 [07:49<05:27,  2.35it/s]

{'eval_runtime': 21.0759, 'eval_samples_per_second': 36.297, 'eval_steps_per_second': 4.555, 'epoch': 7.0}


 53%|█████▎    | 768/1440 [08:35<04:46,  2.35it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 765
  Batch size = 8

 53%|█████▎    | 768/1440 [08:56<04:46,  2.35it/s]

{'eval_runtime': 21.0776, 'eval_samples_per_second': 36.294, 'eval_steps_per_second': 4.555, 'epoch': 8.0}


 60%|██████    | 864/1440 [09:41<04:05,  2.35it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 765
  Batch size = 8

 60%|██████    | 864/1440 [10:03<04:05,  2.35it/s]

{'eval_runtime': 21.0778, 'eval_samples_per_second': 36.294, 'eval_steps_per_second': 4.555, 'epoch': 9.0}


 67%|██████▋   | 960/1440 [10:48<03:24,  2.35it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 765
  Batch size = 8

 67%|██████▋   | 960/1440 [11:10<03:24,  2.35it/s]

{'eval_runtime': 21.0761, 'eval_samples_per_second': 36.297, 'eval_steps_per_second': 4.555, 'epoch': 10.0}


 69%|██████▉   | 1000/1440 [11:29<03:30,  2.09it/s]Saving model checkpoint to bert-base-multilingual-cased-adapter-squad/checkpoint-1000
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/pytorch_model_head.bin
tokenizer config file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/tokenizer_config.json
Special tokens file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/special_tokens_map.json


{'loss': 0.7727, 'learning_rate': 3.055555555555556e-05, 'epoch': 10.42}


 73%|███████▎  | 1056/1440 [11:56<02:43,  2.35it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 765
  Batch size = 8

 73%|███████▎  | 1056/1440 [12:17<02:43,  2.35it/s]

{'eval_runtime': 21.0738, 'eval_samples_per_second': 36.301, 'eval_steps_per_second': 4.555, 'epoch': 11.0}


 80%|████████  | 1152/1440 [13:03<02:02,  2.35it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 765
  Batch size = 8

 80%|████████  | 1152/1440 [13:24<02:02,  2.35it/s]

{'eval_runtime': 21.0758, 'eval_samples_per_second': 36.297, 'eval_steps_per_second': 4.555, 'epoch': 12.0}


 87%|████████▋ | 1248/1440 [14:10<01:21,  2.35it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 765
  Batch size = 8

 87%|████████▋ | 1248/1440 [14:31<01:21,  2.35it/s]

{'eval_runtime': 21.0788, 'eval_samples_per_second': 36.292, 'eval_steps_per_second': 4.554, 'epoch': 13.0}


 93%|█████████▎| 1344/1440 [15:17<00:40,  2.35it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 765
  Batch size = 8

 93%|█████████▎| 1344/1440 [15:38<00:40,  2.35it/s]

{'eval_runtime': 21.0772, 'eval_samples_per_second': 36.295, 'eval_steps_per_second': 4.555, 'epoch': 14.0}


100%|██████████| 1440/1440 [16:24<00:00,  2.35it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 765
  Batch size = 8

100%|██████████| 1440/1440 [16:45<00:00,  2.35it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


100%|██████████| 1440/1440 [16:45<00:00,  1.43it/s]
Configuration saved in ../adapter_qa_en-hi_mbert_15/adapter_config.json
Module weights saved in ../adapter_qa_en-hi_mbert_15/pytorch_adapter.bin
Configuration saved in ../adapter_qa_en-hi_mbert_15/head_config.json
Module weights saved in ../adapter_qa_en-hi_mbert_15/pytorch_model_head.bin


{'eval_runtime': 21.0771, 'eval_samples_per_second': 36.295, 'eval_steps_per_second': 4.555, 'epoch': 15.0}
{'train_runtime': 1005.4437, 'train_samples_per_second': 11.413, 'train_steps_per_second': 1.432, 'train_loss': 0.925193362765842, 'epoch': 15.0}


100%|██████████| 1/1 [00:00<00:00,  3.47ba/s]
100%|██████████| 1/1 [00:02<00:00,  2.49s/ba]
Loading module configuration from ../adapter_weights/adapter_qa_en_mbert_4ep/adapter_config.json
Overwriting existing adapter 'squad_adapter'.
Loading module weights from ../adapter_weights/adapter_qa_en_mbert_4ep/pytorch_adapter.bin
Loading module configuration from ../adapter_weights/adapter_qa_en_mbert_4ep/head_config.json
Loading module weights from ../adapter_weights/adapter_qa_en_mbert_4ep/pytorch_model_head.bin
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 750
  Num Epochs = 15
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradien

{'eval_runtime': 20.6318, 'eval_samples_per_second': 36.352, 'eval_steps_per_second': 4.556, 'epoch': 1.0}


 13%|█▎        | 188/1410 [01:50<09:05,  2.24it/s] The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 13%|█▎        | 188/1410 [02:11<09:05,  2.24it/s]

{'eval_runtime': 20.6345, 'eval_samples_per_second': 36.347, 'eval_steps_per_second': 4.555, 'epoch': 2.0}


 20%|██        | 282/1410 [02:56<08:22,  2.24it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 20%|██        | 282/1410 [03:16<08:22,  2.24it/s]

{'eval_runtime': 20.6291, 'eval_samples_per_second': 36.356, 'eval_steps_per_second': 4.557, 'epoch': 3.0}


 27%|██▋       | 376/1410 [04:01<07:40,  2.24it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 27%|██▋       | 376/1410 [04:22<07:40,  2.24it/s]

{'eval_runtime': 20.6334, 'eval_samples_per_second': 36.349, 'eval_steps_per_second': 4.556, 'epoch': 4.0}


 33%|███▎      | 470/1410 [05:07<06:59,  2.24it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 33%|███▎      | 470/1410 [05:28<06:59,  2.24it/s]

{'eval_runtime': 20.6346, 'eval_samples_per_second': 36.347, 'eval_steps_per_second': 4.555, 'epoch': 5.0}


 35%|███▌      | 500/1410 [05:42<07:15,  2.09it/s]  Saving model checkpoint to bert-base-multilingual-cased-adapter-squad/checkpoint-500
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/pytorch_model_head.bin
tokenizer config file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/tokenizer_config.json
Special tokens file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/special_tokens_map.json


{'loss': 0.8902, 'learning_rate': 6.453900709219859e-05, 'epoch': 5.32}


 40%|████      | 564/1410 [06:13<06:16,  2.25it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 40%|████      | 564/1410 [06:34<06:16,  2.25it/s]

{'eval_runtime': 20.6307, 'eval_samples_per_second': 36.354, 'eval_steps_per_second': 4.556, 'epoch': 6.0}


 47%|████▋     | 658/1410 [07:19<05:35,  2.24it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 47%|████▋     | 658/1410 [07:39<05:35,  2.24it/s]

{'eval_runtime': 20.6308, 'eval_samples_per_second': 36.353, 'eval_steps_per_second': 4.556, 'epoch': 7.0}


 53%|█████▎    | 752/1410 [08:24<04:52,  2.25it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 53%|█████▎    | 752/1410 [08:45<04:52,  2.25it/s]

{'eval_runtime': 20.6313, 'eval_samples_per_second': 36.353, 'eval_steps_per_second': 4.556, 'epoch': 8.0}


 60%|██████    | 846/1410 [09:30<04:11,  2.24it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 60%|██████    | 846/1410 [09:51<04:11,  2.24it/s]

{'eval_runtime': 20.6325, 'eval_samples_per_second': 36.35, 'eval_steps_per_second': 4.556, 'epoch': 9.0}


 67%|██████▋   | 940/1410 [10:36<03:29,  2.24it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 67%|██████▋   | 940/1410 [10:56<03:29,  2.24it/s]

{'eval_runtime': 20.6303, 'eval_samples_per_second': 36.354, 'eval_steps_per_second': 4.556, 'epoch': 10.0}


 71%|███████   | 1000/1410 [11:25<03:16,  2.09it/s]Saving model checkpoint to bert-base-multilingual-cased-adapter-squad/checkpoint-1000
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/pytorch_model_head.bin
tokenizer config file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/tokenizer_config.json
Special tokens file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/special_tokens_map.json


{'loss': 0.3967, 'learning_rate': 2.9078014184397162e-05, 'epoch': 10.64}


 73%|███████▎  | 1034/1410 [11:42<02:47,  2.24it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 73%|███████▎  | 1034/1410 [12:02<02:47,  2.24it/s]

{'eval_runtime': 20.629, 'eval_samples_per_second': 36.357, 'eval_steps_per_second': 4.557, 'epoch': 11.0}


 80%|████████  | 1128/1410 [12:47<02:05,  2.24it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 80%|████████  | 1128/1410 [13:08<02:05,  2.24it/s]

{'eval_runtime': 20.6317, 'eval_samples_per_second': 36.352, 'eval_steps_per_second': 4.556, 'epoch': 12.0}


 87%|████████▋ | 1222/1410 [13:53<01:23,  2.25it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 87%|████████▋ | 1222/1410 [14:14<01:23,  2.25it/s]

{'eval_runtime': 20.6285, 'eval_samples_per_second': 36.357, 'eval_steps_per_second': 4.557, 'epoch': 13.0}


 93%|█████████▎| 1316/1410 [14:59<00:41,  2.24it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

 93%|█████████▎| 1316/1410 [15:19<00:41,  2.24it/s]

{'eval_runtime': 20.6301, 'eval_samples_per_second': 36.355, 'eval_steps_per_second': 4.556, 'epoch': 14.0}


100%|██████████| 1410/1410 [16:04<00:00,  2.25it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 750
  Batch size = 8

100%|██████████| 1410/1410 [16:25<00:00,  2.25it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


100%|██████████| 1410/1410 [16:25<00:00,  1.43it/s]
Configuration saved in ../adapter_qa_en-ro_mbert_15/adapter_config.json
Module weights saved in ../adapter_qa_en-ro_mbert_15/pytorch_adapter.bin
Configuration saved in ../adapter_qa_en-ro_mbert_15/head_config.json
Module weights saved in ../adapter_qa_en-ro_mbert_15/pytorch_model_head.bin


{'eval_runtime': 20.6293, 'eval_samples_per_second': 36.356, 'eval_steps_per_second': 4.557, 'epoch': 15.0}
{'train_runtime': 985.7125, 'train_samples_per_second': 11.413, 'train_steps_per_second': 1.43, 'train_loss': 0.5254595979731134, 'epoch': 15.0}


100%|██████████| 1/1 [00:00<00:00,  1.37ba/s]
100%|██████████| 1/1 [00:04<00:00,  4.48s/ba]
Loading module configuration from ../adapter_weights/adapter_qa_en_mbert_4ep/adapter_config.json
Overwriting existing adapter 'squad_adapter'.
Loading module weights from ../adapter_weights/adapter_qa_en_mbert_4ep/pytorch_adapter.bin
Loading module configuration from ../adapter_weights/adapter_qa_en_mbert_4ep/head_config.json
Loading module weights from ../adapter_weights/adapter_qa_en_mbert_4ep/pytorch_model_head.bin
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 1057
  Num Epochs = 15
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradie

{'eval_runtime': 29.2366, 'eval_samples_per_second': 36.153, 'eval_steps_per_second': 4.549, 'epoch': 1.0}


 13%|█▎        | 265/1995 [02:36<13:47,  2.09it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 1057
  Batch size = 8

 13%|█▎        | 266/1995 [03:05<13:46,  2.09it/s]

{'eval_runtime': 29.2402, 'eval_samples_per_second': 36.149, 'eval_steps_per_second': 4.549, 'epoch': 2.0}


 20%|█▉        | 398/1995 [04:08<12:44,  2.09it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 1057
  Batch size = 8

 20%|██        | 399/1995 [04:37<12:43,  2.09it/s]

{'eval_runtime': 29.2385, 'eval_samples_per_second': 36.151, 'eval_steps_per_second': 4.549, 'epoch': 3.0}


 25%|██▌       | 500/1995 [05:26<11:55,  2.09it/s]  Saving model checkpoint to bert-base-multilingual-cased-adapter-squad/checkpoint-500
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/pytorch_model_head.bin
tokenizer config file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/tokenizer_config.json
Special tokens file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/special_tokens_map.json


{'loss': 1.1896, 'learning_rate': 7.4937343358396e-05, 'epoch': 3.76}


 27%|██▋       | 531/1995 [05:41<11:39,  2.09it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 1057
  Batch size = 8

 27%|██▋       | 532/1995 [06:10<11:39,  2.09it/s]

{'eval_runtime': 29.2401, 'eval_samples_per_second': 36.149, 'eval_steps_per_second': 4.549, 'epoch': 4.0}


 33%|███▎      | 664/1995 [07:13<10:37,  2.09it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 1057
  Batch size = 8

 33%|███▎      | 665/1995 [07:43<10:36,  2.09it/s]

{'eval_runtime': 29.2426, 'eval_samples_per_second': 36.146, 'eval_steps_per_second': 4.548, 'epoch': 5.0}


 40%|███▉      | 797/1995 [08:46<09:33,  2.09it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 1057
  Batch size = 8

 40%|████      | 798/1995 [09:15<09:32,  2.09it/s]

{'eval_runtime': 29.2437, 'eval_samples_per_second': 36.145, 'eval_steps_per_second': 4.548, 'epoch': 6.0}


 47%|████▋     | 930/1995 [10:18<08:29,  2.09it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 1057
  Batch size = 8

 47%|████▋     | 931/1995 [10:47<08:28,  2.09it/s]

{'eval_runtime': 29.2399, 'eval_samples_per_second': 36.149, 'eval_steps_per_second': 4.549, 'epoch': 7.0}


 50%|█████     | 1000/1995 [11:20<07:56,  2.09it/s] Saving model checkpoint to bert-base-multilingual-cased-adapter-squad/checkpoint-1000
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/squad_adapter/pytorch_model_head.bin
tokenizer config file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/tokenizer_config.json
Special tokens file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/special_tokens_map.json


{'loss': 0.691, 'learning_rate': 4.987468671679198e-05, 'epoch': 7.52}


 53%|█████▎    | 1063/1995 [11:51<07:25,  2.09it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 1057
  Batch size = 8

 53%|█████▎    | 1064/1995 [12:20<07:25,  2.09it/s]

{'eval_runtime': 29.2413, 'eval_samples_per_second': 36.147, 'eval_steps_per_second': 4.548, 'epoch': 8.0}


 60%|█████▉    | 1196/1995 [13:23<06:21,  2.09it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 1057
  Batch size = 8

 60%|██████    | 1197/1995 [13:53<06:21,  2.09it/s]

{'eval_runtime': 29.2412, 'eval_samples_per_second': 36.148, 'eval_steps_per_second': 4.548, 'epoch': 9.0}


 67%|██████▋   | 1329/1995 [14:56<05:18,  2.09it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 1057
  Batch size = 8

 67%|██████▋   | 1330/1995 [15:25<05:18,  2.09it/s]

{'eval_runtime': 29.2412, 'eval_samples_per_second': 36.148, 'eval_steps_per_second': 4.548, 'epoch': 10.0}


 73%|███████▎  | 1462/1995 [16:28<04:14,  2.09it/s]  The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 1057
  Batch size = 8

 73%|███████▎  | 1463/1995 [16:58<04:14,  2.09it/s]

{'eval_runtime': 29.2401, 'eval_samples_per_second': 36.149, 'eval_steps_per_second': 4.549, 'epoch': 11.0}


 75%|███████▌  | 1500/1995 [17:15<03:56,  2.09it/s]  Saving model checkpoint to bert-base-multilingual-cased-adapter-squad/checkpoint-1500
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1500/squad_adapter/pytorch_model_head.bin
tokenizer config file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1500/tokenizer_config.json
Special tokens file saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1500/special_tokens_map.json


{'loss': 0.4732, 'learning_rate': 2.4812030075187968e-05, 'epoch': 11.28}


 80%|███████▉  | 1595/1995 [18:01<03:11,  2.09it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 1057
  Batch size = 8

 80%|████████  | 1596/1995 [18:30<03:10,  2.09it/s]

{'eval_runtime': 29.2383, 'eval_samples_per_second': 36.151, 'eval_steps_per_second': 4.549, 'epoch': 12.0}


 87%|████████▋ | 1728/1995 [19:33<02:07,  2.09it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 1057
  Batch size = 8

 87%|████████▋ | 1729/1995 [20:03<02:07,  2.09it/s]

{'eval_runtime': 29.2417, 'eval_samples_per_second': 36.147, 'eval_steps_per_second': 4.548, 'epoch': 13.0}


 93%|█████████▎| 1861/1995 [21:06<01:04,  2.09it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 1057
  Batch size = 8

 93%|█████████▎| 1862/1995 [21:35<01:03,  2.09it/s]

{'eval_runtime': 29.2387, 'eval_samples_per_second': 36.151, 'eval_steps_per_second': 4.549, 'epoch': 14.0}


100%|█████████▉| 1994/1995 [22:38<00:00,  2.09it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.
***** Running Evaluation *****
  Num examples = 1057
  Batch size = 8

100%|██████████| 1995/1995 [23:08<00:00,  2.09it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


100%|██████████| 1995/1995 [23:08<00:00,  1.44it/s]
Configuration saved in ../adapter_qa_en-th_mbert_15/adapter_config.json
Module weights saved in ../adapter_qa_en-th_mbert_15/pytorch_adapter.bin
Configuration saved in ../adapter_qa_en-th_mbert_15/head_config.json
Module weights saved in ../adapter_qa_en-th_mbert_15/pytorch_model_head.bin


{'eval_runtime': 29.238, 'eval_samples_per_second': 36.152, 'eval_steps_per_second': 4.549, 'epoch': 15.0}
{'train_runtime': 1388.2718, 'train_samples_per_second': 11.421, 'train_steps_per_second': 1.437, 'train_loss': 0.6782757445983122, 'epoch': 15.0}


In [None]:
from transformers import pipeline

# Replace this with your own checkpoint
# model_checkpoint = adapter_checkpoint
question_answerer = pipeline("question-answering", model=model, tokenizer=tokenizer)

context = """
? Transformers is backed by the three most popular deep learning libraries ? Jax, PyTorch and TensorFlow ? with a seamless integration
between them. It's straightforward to train your models with one before loading them for inference with the other.
"""
question = "What is backed by deep learning libraries?"
question_answerer(question=question, context=context)

In [8]:
def get_predictions(dataset):
    
    predictions = []
    for example in tqdm(dataset):
        question = example['question']
        context = example['context']
        prediction = question_answerer(question=question, context=context)

        predictions.append(prediction)
    
    return predictions

In [9]:
# Need to convert the variables so that they can be used by the evaluation.compute function
def convert_for_evaluation(predictions, examples):
    ref = []
    pred = []
    for i, id in enumerate(examples['id']):
        ref.append({
            'answers': examples['answers'][i],
            'id': examples['id'][i]
        })
        pred.append({
            'prediction_text': predictions[i]['answer'],
            'id': examples['id'][i]
        })
        
    return pred, ref

In [17]:
squad_metric = evaluate.load("squad")
results_en2lang = {}
results_en = {}

for lang in langs:
    test = load_from_disk(f'../data/xquad_{lang}_test.hf/')

    adapter_name = 'squad_adapter'
    model.load_adapter("../adapter_weights/adapter_qa_en_mbert_4ep/")
    model.train_adapter(adapter_name)
    model.set_active_adapters(adapter_name)

    question_answerer = pipeline("question-answering", model=model, tokenizer=tokenizer)
    print('Running predictions for', lang)
    predictions = get_predictions(test)

    predictions, references = convert_for_evaluation(predictions, test) 
    res = squad_metric.compute(predictions=predictions, references=references)
    
    results_en[lang] = res
    del res
    adapter_name = 'squad_adapter'
    model.load_adapter(f"../adapter_qa_en-{lang}_mbert_15")
    model.train_adapter(adapter_name)
    model.set_active_adapters(adapter_name)

    question_answerer = pipeline("question-answering", model=model, tokenizer=tokenizer)
    print(f'Running predictions for en2{lang}')
    predictions = get_predictions(test)

    predictions, references = convert_for_evaluation(predictions, test) 
    res = squad_metric.compute(predictions=predictions, references=references)
    results_en2lang[lang] = res

Overwriting existing adapter 'squad_adapter'.


Running predictions for en


  tensor = as_tensor(value)
  p_mask = np.asarray(
100%|██████████| 238/238 [00:57<00:00,  4.14it/s]
Overwriting existing adapter 'squad_adapter'.


Running predictions for en2en


100%|██████████| 238/238 [00:56<00:00,  4.18it/s]
Overwriting existing adapter 'squad_adapter'.


Running predictions for es


100%|██████████| 238/238 [01:12<00:00,  3.30it/s]
Overwriting existing adapter 'squad_adapter'.


Running predictions for en2es


100%|██████████| 238/238 [01:11<00:00,  3.32it/s]
Overwriting existing adapter 'squad_adapter'.


Running predictions for de


100%|██████████| 238/238 [01:09<00:00,  3.44it/s]
Overwriting existing adapter 'squad_adapter'.


Running predictions for en2de


100%|██████████| 238/238 [01:09<00:00,  3.44it/s]
Overwriting existing adapter 'squad_adapter'.


Running predictions for el


100%|██████████| 238/238 [02:54<00:00,  1.37it/s]
Overwriting existing adapter 'squad_adapter'.


Running predictions for en2el


100%|██████████| 238/238 [02:45<00:00,  1.44it/s]
Overwriting existing adapter 'squad_adapter'.


Running predictions for ru


100%|██████████| 238/238 [01:30<00:00,  2.62it/s]
Overwriting existing adapter 'squad_adapter'.


Running predictions for en2ru


100%|██████████| 238/238 [01:28<00:00,  2.69it/s]
Overwriting existing adapter 'squad_adapter'.


Running predictions for tr


100%|██████████| 238/238 [01:28<00:00,  2.68it/s]
Overwriting existing adapter 'squad_adapter'.


Running predictions for en2tr


100%|██████████| 238/238 [01:28<00:00,  2.68it/s]
Overwriting existing adapter 'squad_adapter'.


Running predictions for ar


100%|██████████| 238/238 [01:28<00:00,  2.70it/s]
Overwriting existing adapter 'squad_adapter'.


Running predictions for en2ar


100%|██████████| 238/238 [01:28<00:00,  2.68it/s]
Overwriting existing adapter 'squad_adapter'.


Running predictions for vi


100%|██████████| 238/238 [01:13<00:00,  3.25it/s]
Overwriting existing adapter 'squad_adapter'.


Running predictions for en2vi


100%|██████████| 238/238 [01:12<00:00,  3.30it/s]
Overwriting existing adapter 'squad_adapter'.


Running predictions for zh


100%|██████████| 238/238 [01:29<00:00,  2.66it/s]
Overwriting existing adapter 'squad_adapter'.


Running predictions for en2zh


100%|██████████| 238/238 [01:28<00:00,  2.67it/s]
Overwriting existing adapter 'squad_adapter'.


Running predictions for hi


100%|██████████| 238/238 [01:58<00:00,  2.00it/s]
Overwriting existing adapter 'squad_adapter'.


Running predictions for en2hi


100%|██████████| 238/238 [02:01<00:00,  1.96it/s]
Overwriting existing adapter 'squad_adapter'.


Running predictions for ro


100%|██████████| 238/238 [01:37<00:00,  2.44it/s]
Overwriting existing adapter 'squad_adapter'.


Running predictions for en2ro


100%|██████████| 238/238 [01:47<00:00,  2.22it/s]
Overwriting existing adapter 'squad_adapter'.


Running predictions for th


100%|██████████| 238/238 [04:17<00:00,  1.08s/it]
Overwriting existing adapter 'squad_adapter'.


Running predictions for en2th


100%|██████████| 238/238 [04:12<00:00,  1.06s/it]


In [18]:
results_en  # only using english task adapter on subset

{'en': {'exact_match': 70.16806722689076, 'f1': 83.0434082312481},
 'es': {'exact_match': 52.94117647058823, 'f1': 71.35611779333979},
 'de': {'exact_match': 47.05882352941177, 'f1': 65.20696731288513},
 'el': {'exact_match': 40.33613445378151, 'f1': 52.35982282670372},
 'ru': {'exact_match': 46.63865546218487, 'f1': 65.09564781623608},
 'tr': {'exact_match': 30.672268907563026, 'f1': 44.25426155653541},
 'ar': {'exact_match': 35.714285714285715, 'f1': 54.152242893592366},
 'vi': {'exact_match': 44.95798319327731, 'f1': 64.0526791542217},
 'zh': {'exact_match': 40.33613445378151, 'f1': 46.42256902761106},
 'hi': {'exact_match': 39.91596638655462, 'f1': 53.30651607962533},
 'ro': {'exact_match': 51.260504201680675, 'f1': 66.74265149780281},
 'th': {'exact_match': 21.84873949579832, 'f1': 29.869947979191675}}

In [19]:
results_en2lang  # using english pretrained and langspecific finetuned adapter on subset

{'en': {'exact_match': 64.28571428571429, 'f1': 77.2663503904236},
 'es': {'exact_match': 50.42016806722689, 'f1': 71.2341397809544},
 'de': {'exact_match': 50.84033613445378, 'f1': 64.97960133802835},
 'el': {'exact_match': 43.27731092436975, 'f1': 56.886697912402326},
 'ru': {'exact_match': 44.95798319327731, 'f1': 63.18026744497336},
 'tr': {'exact_match': 42.857142857142854, 'f1': 57.84173962063021},
 'ar': {'exact_match': 40.33613445378151, 'f1': 57.93892004902878},
 'vi': {'exact_match': 48.739495798319325, 'f1': 68.90732971065079},
 'zh': {'exact_match': 42.436974789915965, 'f1': 51.20915032679739},
 'hi': {'exact_match': 45.79831932773109, 'f1': 61.46453289940687},
 'ro': {'exact_match': 51.260504201680675, 'f1': 66.10247046851102},
 'th': {'exact_match': 40.7563025210084, 'f1': 46.51496962421332}}