In [1]:
from transformers import AutoTokenizer
from transformers import TrainingArguments
from transformers import AdapterTrainer
from datasets import load_dataset, load_from_disk, concatenate_datasets
from transformers import AutoModelForQuestionAnswering
from torch.utils.data import DataLoader
from transformers import default_data_collator
import evaluate
from tqdm import tqdm
from transformers import pipeline

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_checkpoint = "bert-base-multilingual-cased"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

In [3]:
max_length = 512
stride = 128

def preprocess_training_examples(examples):
    questions = [q.strip() for q in examples["question"]]
    context = examples["context"]
    
    # Tokenize question and context together into one input,
    # they will seperable with a special token between them.
    # Tokenizer will also split context into multiple chuncks,
    # if the max_length is exceeded.
    inputs = tokenizer(
        questions,
        context,
        max_length=max_length,
        truncation="only_second",
        stride=stride,
        return_overflowing_tokens=True,
        return_offsets_mapping=True,
        padding="max_length",
    )

    # offset_mapping is the result of the split into
    # multiple chunks
    offset_mapping = inputs.pop("offset_mapping")
    sample_map = inputs.pop("overflow_to_sample_mapping")
    answers = examples["answers"]
    start_positions = []
    end_positions = []

    # This step is required to see if an answer is inside
    # the context chunks or not. It labels the multiple chunks
    # generated by the tokenizer into either not having
    # the answer, or where the answer is located
    for i, offset in enumerate(offset_mapping):
        sample_idx = sample_map[i]
        answer = answers[sample_idx]
        start_char = answer["answer_start"][0]
        end_char = answer["answer_start"][0] + len(answer["text"][0])
        sequence_ids = inputs.sequence_ids(i)

        # Find the start and end of the context
        idx = 0
        while sequence_ids[idx] != 1:
            idx += 1
        context_start = idx
        while sequence_ids[idx] == 1:
            idx += 1
        context_end = idx - 1

        # If the answer is not fully inside the context, label is (0, 0)
        if offset[context_start][0] > start_char or offset[context_end][1] < end_char:
            start_positions.append(0)
            end_positions.append(0)
        else:
            # Otherwise it's the start and end token positions
            idx = context_start
            while idx <= context_end and offset[idx][0] <= start_char:
                idx += 1
            start_positions.append(idx - 1)

            idx = context_end
            while idx >= context_start and offset[idx][1] >= end_char:
                idx -= 1
            end_positions.append(idx + 1)

    inputs["start_positions"] = start_positions
    inputs["end_positions"] = end_positions
    return inputs

In [4]:
def preprocess_validation_examples(examples):
    questions = [q.strip() for q in examples["question"]]
    context = examples["context"]
    
    # It's the same thing as with the preprocess_training_examples
    # tokenizer, but here 
    inputs = tokenizer(
        questions,
        context,
        max_length=max_length,
        truncation="only_second",
        stride=stride,
        return_overflowing_tokens=True,
        return_offsets_mapping=True,
        padding="max_length",
    )

    sample_map = inputs.pop("overflow_to_sample_mapping")
    example_ids = []

    for i in range(len(inputs["input_ids"])):
        sample_idx = sample_map[i]
        example_ids.append(examples["id"][sample_idx])

        sequence_ids = inputs.sequence_ids(i)
        offset = inputs["offset_mapping"][i]
        inputs["offset_mapping"][i] = [
            o if sequence_ids[k] == 1 else None for k, o in enumerate(offset)
        ]

    inputs["example_id"] = example_ids
    return inputs

In [5]:
langs = ["en", "es", "de", "el", "ru", "tr", "ar", "vi", "zh", "hi", "ro", "th"]

In [6]:
model = AutoModelForQuestionAnswering.from_pretrained(model_checkpoint)

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForQuestionAnswering: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at bert-bas

In [7]:
adapter1 = model.load_adapter("../adapter_weights/adapter_qa_en_mbert_4ep/")
adapter_es = model.load_adapter(f"../adapter_weights/adapter_qa_en-es_mbert_15/", load_as=f"xquad_es")
adapter_de = model.load_adapter(f"../adapter_weights/adapter_qa_en-de_mbert_15/", load_as=f"xquad_de")
adapter_el = model.load_adapter(f"../adapter_weights/adapter_qa_en-el_mbert_15/", load_as=f"xquad_el")
adapter_ru = model.load_adapter(f"../adapter_weights/adapter_qa_en-ru_mbert_15/", load_as=f"xquad_ru")
adapter_tr = model.load_adapter(f"../adapter_weights/adapter_qa_en-tr_mbert_15/", load_as=f"xquad_tr")
adapter_ar = model.load_adapter(f"../adapter_weights/adapter_qa_en-ar_mbert_15/", load_as=f"xquad_ar")
adapter_vi = model.load_adapter(f"../adapter_weights/adapter_qa_en-vi_mbert_15/", load_as=f"xquad_vi")
adapter_zh = model.load_adapter(f"../adapter_weights/adapter_qa_en-zh_mbert_15/", load_as=f"xquad_zh")
adapter_hi = model.load_adapter(f"../adapter_weights/adapter_qa_en-hi_mbert_15/", load_as=f"xquad_hi")
adapter_ro = model.load_adapter(f"../adapter_weights/adapter_qa_en-ro_mbert_15/", load_as=f"xquad_ro")
adapter_th = model.load_adapter(f"../adapter_weights/adapter_qa_en-th_mbert_15/", load_as=f"xquad_th")

In [8]:
adapter_setup = [[
    adapter1,
    adapter_es,
    adapter_de,
    adapter_el,
    adapter_ru,
    adapter_tr,
    adapter_ar,
    adapter_vi,
    adapter_zh,
    adapter_hi,
    adapter_ro,
    adapter_th
]]

In [9]:
batch_size = 8
epochs = 4

In [10]:
train_en = load_from_disk("../data/xquad_en_train.hf/")
train_es = load_from_disk("../data/xquad_es_train.hf/")
train_de = load_from_disk("../data/xquad_de_train.hf/")
train_el = load_from_disk("../data/xquad_el_train.hf/")
train_ru = load_from_disk("../data/xquad_ru_train.hf/")
train_tr = load_from_disk("../data/xquad_tr_train.hf/")
train_ar = load_from_disk("../data/xquad_ar_train.hf/")
train_vi = load_from_disk("../data/xquad_vi_train.hf/")
train_zh = load_from_disk("../data/xquad_zh_train.hf/")
train_hi = load_from_disk("../data/xquad_hi_train.hf/")
train_ro = load_from_disk("../data/xquad_ro_train.hf/")
train_th = load_from_disk("../data/xquad_th_train.hf/")
val_en = load_from_disk("../data/xquad_en_val.hf/")
val_es = load_from_disk("../data/xquad_es_val.hf/")
val_de = load_from_disk("../data/xquad_de_val.hf/")
val_el = load_from_disk("../data/xquad_el_val.hf/")
val_ru = load_from_disk("../data/xquad_ru_val.hf/")
val_tr = load_from_disk("../data/xquad_tr_val.hf/")
val_ar = load_from_disk("../data/xquad_ar_val.hf/")
val_vi = load_from_disk("../data/xquad_vi_val.hf/")
val_zh = load_from_disk("../data/xquad_zh_val.hf/")
val_hi = load_from_disk("../data/xquad_hi_val.hf/")
val_ro = load_from_disk("../data/xquad_ro_val.hf/")
val_th = load_from_disk("../data/xquad_th_val.hf/")
train = concatenate_datasets([
    train_en,
    train_es,
    train_de,
    train_el,
    train_ru,
    train_tr,
    train_ar,
    train_vi,
    train_zh,
    train_hi,
    train_ro,
    train_th
])
val = concatenate_datasets([
    val_en,
    val_es,
    val_de,
    val_el,
    val_ru,
    val_tr,
    val_ar,
    val_vi,
    val_zh,
    val_hi,
    val_ro,
    val_th
])
del train_en, train_es, train_de, train_el, train_ru, train_tr, train_ar, train_vi, train_zh, train_hi, train_ro, train_th, val_en, val_es, val_de, val_el, val_ru, val_tr, val_ar, val_vi, val_zh, val_hi, val_ro, val_th

In [11]:
model.add_adapter_fusion(adapter_setup[0])
model.set_active_adapters(adapter_setup)

In [13]:
# for lang in langs:
#     adapter_setup[0].append(model.load_adapter(f"../adapter_weights/adapter_qa_en-{lang}_mbert_15/", load_as=f"xquad_{lang}"))

train_dataset = train.map(
    preprocess_training_examples,
    batched=True,
    remove_columns=train.column_names,
)
validation_dataset = val.map(
    preprocess_validation_examples,
    batched=True,
    remove_columns=val.column_names,
)
train_dataset.set_format("torch")
validation_set = validation_dataset.remove_columns(["example_id", "offset_mapping"])
validation_set.set_format("torch")
train_dataloader = DataLoader(
    train_dataset,
    shuffle=True,
    collate_fn=default_data_collator,
    batch_size=8,
)
validation_dataloader = DataLoader(
    validation_dataset,
    shuffle=True,
    collate_fn=default_data_collator,
    batch_size=8,
)
model_name = model_checkpoint.split("/")[-1]
args = TrainingArguments(
    f"{model_name}-adapter-squad",
    evaluation_strategy = "epoch",
    learning_rate=1e-3,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=epochs,
    weight_decay=1e-3
)
trainer = AdapterTrainer(
    model,
    args,
    train_dataset=train_dataset,
    eval_dataset=validation_dataset,
    data_collator=default_data_collator,
    tokenizer=tokenizer
)
model.train_fusion(adapter_setup[0])
trainer.train()
del train, val

Loading cached processed dataset at ../data/xquad_en_train.hf/cache-08c30c0771adfa45.arrow
Loading cached processed dataset at ../data/xquad_en_val.hf/cache-ccf4090fa1f531cb.arrow
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 9336
  Num Epochs = 4
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 4668
                                        
  0%|          | 0/4668 [03:54<?, ?it/s]          Saving model checkpoint to bert-base-multilingual-cased-adapter-squad/checkpoint-500
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/squad_adapter/adap

{'loss': 6.2406, 'learning_rate': 0.0008928877463581834, 'epoch': 0.43}


Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/xquad_ru/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/xquad_ru/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/xquad_ru/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/xquad_tr/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/xquad_tr/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/xquad_tr/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/xquad_tr/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/xquad_ar/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-500/xquad_ar/pytorch_adapter.bin
Confi

{'loss': 6.2391, 'learning_rate': 0.0007857754927163667, 'epoch': 0.86}


Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/xquad_ru/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/xquad_tr/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/xquad_tr/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/xquad_tr/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/xquad_tr/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/xquad_ar/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/xquad_ar/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/xquad_ar/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1000/xquad_ar/pytorch_model_he

{'eval_runtime': 455.5329, 'eval_samples_per_second': 20.495, 'eval_steps_per_second': 2.562, 'epoch': 1.0}


                                        
  0%|          | 0/4668 [18:38<?, ?it/s]           Saving model checkpoint to bert-base-multilingual-cased-adapter-squad/checkpoint-1500
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1500/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1500/xquad_es/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1500/xquad_es/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1500/xquad_es/head_config.json
Module weights saved in b

{'loss': 6.238, 'learning_rate': 0.0006786632390745502, 'epoch': 1.29}


Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1500/xquad_tr/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1500/xquad_ar/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1500/xquad_ar/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1500/xquad_ar/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1500/xquad_ar/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1500/xquad_vi/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1500/xquad_vi/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1500/xquad_vi/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-1500/xquad_vi/pytorch_model_he

{'loss': 6.2392, 'learning_rate': 0.0005715509854327336, 'epoch': 1.71}


Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-2000/xquad_ar/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-2000/xquad_ar/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-2000/xquad_ar/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-2000/xquad_vi/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-2000/xquad_vi/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-2000/xquad_vi/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-2000/xquad_vi/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-2000/xquad_zh/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-2000/xquad_zh/pytorch_adapter.

{'eval_runtime': 410.8069, 'eval_samples_per_second': 22.726, 'eval_steps_per_second': 2.841, 'epoch': 2.0}


                                        
  0%|          | 0/4668 [31:58<?, ?it/s]           Saving model checkpoint to bert-base-multilingual-cased-adapter-squad/checkpoint-2500
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-2500/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-2500/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-2500/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-2500/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-2500/xquad_es/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-2500/xquad_es/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-2500/xquad_es/head_config.json
Module weights saved in b

{'loss': 6.2391, 'learning_rate': 0.0004644387317909169, 'epoch': 2.14}


Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-2500/xquad_ar/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-2500/xquad_ar/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-2500/xquad_ar/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-2500/xquad_vi/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-2500/xquad_vi/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-2500/xquad_vi/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-2500/xquad_vi/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-2500/xquad_zh/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-2500/xquad_zh/pytorch_adapter.

{'loss': 6.2394, 'learning_rate': 0.0003573264781491002, 'epoch': 2.57}


Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-3000/xquad_ar/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-3000/xquad_ar/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-3000/xquad_ar/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-3000/xquad_ar/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-3000/xquad_vi/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-3000/xquad_vi/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-3000/xquad_vi/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-3000/xquad_vi/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-3000/xquad_zh/adapter_config.js

{'loss': 6.2383, 'learning_rate': 0.00025021422450728365, 'epoch': 3.0}


Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-3500/xquad_ru/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-3500/xquad_tr/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-3500/xquad_tr/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-3500/xquad_tr/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-3500/xquad_tr/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-3500/xquad_ar/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-3500/xquad_ar/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-3500/xquad_ar/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-3500/xquad_ar/pytorch_model_he

{'eval_runtime': 452.0529, 'eval_samples_per_second': 20.652, 'eval_steps_per_second': 2.582, 'epoch': 3.0}


                                        
  0%|          | 0/4668 [49:53<?, ?it/s]           Saving model checkpoint to bert-base-multilingual-cased-adapter-squad/checkpoint-4000
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-4000/squad_adapter/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-4000/squad_adapter/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-4000/squad_adapter/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-4000/squad_adapter/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-4000/xquad_es/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-4000/xquad_es/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-4000/xquad_es/head_config.json
Module weights saved in b

{'loss': 6.2386, 'learning_rate': 0.000143101970865467, 'epoch': 3.43}


Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-4000/xquad_ru/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-4000/xquad_tr/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-4000/xquad_tr/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-4000/xquad_tr/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-4000/xquad_tr/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-4000/xquad_ar/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-4000/xquad_ar/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-4000/xquad_ar/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-4000/xquad_ar/pytorch_model_he

{'loss': 6.2393, 'learning_rate': 3.598971722365039e-05, 'epoch': 3.86}


Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-4500/xquad_tr/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-4500/xquad_tr/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-4500/xquad_tr/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-4500/xquad_ar/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-4500/xquad_ar/pytorch_adapter.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-4500/xquad_ar/head_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-4500/xquad_ar/pytorch_model_head.bin
Configuration saved in bert-base-multilingual-cased-adapter-squad/checkpoint-4500/xquad_vi/adapter_config.json
Module weights saved in bert-base-multilingual-cased-adapter-squad/checkpoint-4500/xquad_vi/pytorch_adapter.

{'eval_runtime': 426.3603, 'eval_samples_per_second': 21.897, 'eval_steps_per_second': 2.737, 'epoch': 4.0}


                                          
100%|██████████| 4668/4668 [1:01:32<00:00,  1.26it/s]

{'train_runtime': 3692.8469, 'train_samples_per_second': 10.113, 'train_steps_per_second': 1.264, 'train_loss': 6.239040606841126, 'epoch': 4.0}





In [16]:
model.save_adapter_fusion("./adapter_weights", ",".join(adapter_setup[0]))

Configuration saved in ./adapter_weights/adapter_fusion_config.json
Module weights saved in ./adapter_weights/pytorch_model_adapter_fusion.bin


In [17]:
model.save_all_adapters("../adapter_weights")

Configuration saved in ../adapter_weights/squad_adapter/adapter_config.json
Module weights saved in ../adapter_weights/squad_adapter/pytorch_adapter.bin
Configuration saved in ../adapter_weights/squad_adapter/head_config.json
Module weights saved in ../adapter_weights/squad_adapter/pytorch_model_head.bin
Configuration saved in ../adapter_weights/xquad_es/adapter_config.json
Module weights saved in ../adapter_weights/xquad_es/pytorch_adapter.bin
Configuration saved in ../adapter_weights/xquad_es/head_config.json
Module weights saved in ../adapter_weights/xquad_es/pytorch_model_head.bin
Configuration saved in ../adapter_weights/xquad_de/adapter_config.json
Module weights saved in ../adapter_weights/xquad_de/pytorch_adapter.bin
Configuration saved in ../adapter_weights/xquad_de/head_config.json
Module weights saved in ../adapter_weights/xquad_de/pytorch_model_head.bin
Configuration saved in ../adapter_weights/xquad_el/adapter_config.json
Module weights saved in ../adapter_weights/xquad_el

In [7]:
def get_predictions(dataset):
    
    predictions = []
    for example in tqdm(dataset):
        question = example['question']
        context = example['context']
        prediction = question_answerer(question=question, context=context)

        predictions.append(prediction)
    
    return predictions

In [8]:
# Need to convert the variables so that they can be used by the evaluation.compute function
def convert_for_evaluation(predictions, examples):
    ref = []
    pred = []
    for i, id in enumerate(examples['id']):
        ref.append({
            'answers': examples['answers'][i],
            'id': examples['id'][i]
        })
        pred.append({
            'prediction_text': predictions[i]['answer'],
            'id': examples['id'][i]
        })
        
    return pred, ref

In [13]:
squad_metric = evaluate.load("squad")
results_en2lang = {}
results_en = {}

model.load_adapter("../adapter_weights/squad_adapter/")
for lang in langs[1::]:
    model.load_adapter(f"../adapter_weights/xquad_{lang}")

adapter_setup = [[
    "squad_adapter",
    "xquad_es",
    "xquad_de",
    "xquad_el",
    "xquad_ru",
    "xquad_tr",
    "xquad_ar",
    "xquad_vi",
    "xquad_zh",
    "xquad_hi",
    "xquad_ro",
    "xquad_th"
]]
model.load_adapter_fusion("./adapter_weights/")
model.set_active_adapters(adapter_setup)
    
for lang in langs:
    test = load_from_disk(f'../data/xquad_{lang}_test.hf/')

    question_answerer = pipeline("question-answering", model=model, tokenizer=tokenizer)
    print('Running predictions for', lang)
    predictions = get_predictions(test)

    predictions, references = convert_for_evaluation(predictions, test) 
    res = squad_metric.compute(predictions=predictions, references=references)
    results_en2lang[lang] = res

Overwriting existing adapter 'squad_adapter'.
Overwriting existing adapter 'xquad_es'.
Overwriting existing adapter 'xquad_de'.
Overwriting existing adapter 'xquad_el'.
Overwriting existing adapter 'xquad_ru'.
Overwriting existing adapter 'xquad_tr'.
Overwriting existing adapter 'xquad_ar'.
Overwriting existing adapter 'xquad_vi'.
Overwriting existing adapter 'xquad_zh'.
Overwriting existing adapter 'xquad_hi'.
Overwriting existing adapter 'xquad_ro'.
Overwriting existing adapter 'xquad_th'.
Overwriting existing adapter fusion module 'squad_adapter,xquad_es,xquad_de,xquad_el,xquad_ru,xquad_tr,xquad_ar,xquad_vi,xquad_zh,xquad_hi,xquad_ro,xquad_th'


Running predictions for en


  tensor = as_tensor(value)
  p_mask = np.asarray(
100%|██████████| 238/238 [02:33<00:00,  1.55it/s]


Running predictions for es


100%|██████████| 238/238 [03:18<00:00,  1.20it/s]


Running predictions for de


100%|██████████| 238/238 [03:03<00:00,  1.30it/s]


Running predictions for el


100%|██████████| 238/238 [06:32<00:00,  1.65s/it]


Running predictions for ru


100%|██████████| 238/238 [03:44<00:00,  1.06it/s]


Running predictions for tr


100%|██████████| 238/238 [03:47<00:00,  1.04it/s]


Running predictions for ar


100%|██████████| 238/238 [03:52<00:00,  1.03it/s]


Running predictions for vi


100%|██████████| 238/238 [03:04<00:00,  1.29it/s]


Running predictions for zh


100%|██████████| 238/238 [03:20<00:00,  1.19it/s]


Running predictions for hi


100%|██████████| 238/238 [04:52<00:00,  1.23s/it]


Running predictions for ro


100%|██████████| 238/238 [04:06<00:00,  1.04s/it]


Running predictions for th


100%|██████████| 238/238 [09:07<00:00,  2.30s/it]


In [14]:
results_en2lang

{'en': {'exact_match': 65.12605042016807, 'f1': 78.08605349448159},
 'es': {'exact_match': 46.63865546218487, 'f1': 68.6356736350034},
 'de': {'exact_match': 46.63865546218487, 'f1': 62.37268723044148},
 'el': {'exact_match': 42.857142857142854, 'f1': 56.19565299977405},
 'ru': {'exact_match': 44.95798319327731, 'f1': 63.83269041882486},
 'tr': {'exact_match': 35.714285714285715, 'f1': 46.79782359514781},
 'ar': {'exact_match': 36.554621848739494, 'f1': 55.785820567086056},
 'vi': {'exact_match': 41.596638655462186, 'f1': 62.09767256232905},
 'zh': {'exact_match': 39.91596638655462, 'f1': 46.14845938375351},
 'hi': {'exact_match': 42.857142857142854, 'f1': 55.906644709165725},
 'ro': {'exact_match': 50.0, 'f1': 67.96220499862176},
 'th': {'exact_match': 30.672268907563026, 'f1': 36.914765906362554}}