# How Multilingual is Multilingual BERT?

In [1]:
import wandb
wandb.init(mode="disabled")

In [2]:
import warnings

warnings.filterwarnings("ignore", category=UserWarning, module="transformers")
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)

In [3]:
!pip install conllu transformers datasets

Collecting conllu
  Downloading conllu-6.0.0-py3-none-any.whl.metadata (21 kB)
Downloading conllu-6.0.0-py3-none-any.whl (16 kB)
Installing collected packages: conllu
Successfully installed conllu-6.0.0


In [4]:
import conllu
import numpy as np
import pandas as pd

from datasets import Dataset
from sklearn.metrics import accuracy_score
from transformers import AutoModelForTokenClassification, TrainingArguments, Trainer, AutoTokenizer

from typing import List, Dict, Generator, Tuple

In [5]:
FR_TRAIN_PATH = "/kaggle/input/ud-dataset/fr_sequoia-ud-train.conllu"
FR_DEV_PATH = "/kaggle/input/ud-dataset/fr_sequoia-ud-dev.conllu"
FR_TEST_PATH = "/kaggle/input/ud-dataset/fr_sequoia-ud-test.conllu"
FR_MODEL_PATH = "/kaggle/working/fr"

AR_TRAIN_PATH = "/kaggle/input/ud-dataset/ar_padt-ud-train.conllu"
AR_DEV_PATH = "/kaggle/input/ud-dataset/ar_padt-ud-dev.conllu"
AR_TEST_PATH = "/kaggle/input/ud-dataset/ar_padt-ud-test.conllu"
AR_MODEL_PATH = "/kaggle/working/ar"

EN_TRAIN_PATH = "/kaggle/input/ud-dataset/en_ewt-ud-train.conllu"
EN_DEV_PATH = "/kaggle/input/ud-dataset/en_ewt-ud-dev.conllu"
EN_TEST_PATH = "/kaggle/input/ud-dataset/en_ewt-ud-test.conllu"
EN_MODEL_PATH = "/kaggle/working/en"

JA_TRAIN_PATH = "/kaggle/input/ud-dataset/ja_gsd-ud-train.conllu"
JA_DEV_PATH = "/kaggle/input/ud-dataset/ja_gsd-ud-dev.conllu"
JA_TEST_PATH = "/kaggle/input/ud-dataset/ja_gsd-ud-test.conllu"
JA_MODEL_PATH = "/kaggle/working/ja"

FA_TRAIN_PATH = "/kaggle/input/ud-dataset/fa_seraji-ud-train.conllu"
FA_DEV_PATH = "/kaggle/input/ud-dataset/fa_seraji-ud-dev.conllu"
FA_TEST_PATH = "/kaggle/input/ud-dataset/fa_seraji-ud-test.conllu"
FA_MODEL_PATH = "/kaggle/working/fa"

CHECKPOINT = "bert-base-multilingual-cased"
PAD_TAG = "<pad>"

BATCH_SIZE = 16
MAX_LENGTH = 512

tokenizer = AutoTokenizer.from_pretrained(CHECKPOINT)

tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/625 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/996k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

In [7]:
def load_conllu(filename: str) -> Generator[Tuple[List[str], List[str], List[str]], None, None]:
    for sentence in conllu.parse(open(filename, "rt", encoding="utf-8").read()):
        ids = [token["id"] for token in sentence] 
        tokenized_words = [token["form"] for token in sentence] 
        gold_tags = [token["upos"] for token in sentence]
        yield ids, tokenized_words, gold_tags

        
def assign_label_to_multitoken(ids: List[int], tokens: List[str], gold_labels: List[str]) -> Tuple[List[str], List[str]]:
    
    df = pd.DataFrame({
        'id': ids,
        'token': tokens,
        'pos': gold_labels
    })

    final_tokens = []
    final_pos_tags = []

    skip_indices = set()

    for _, row in df.iterrows():
        id_ = row['id']

        if isinstance(id_, tuple):
            start, end = id_[0], id_[2]
            skip_indices.update(range(start, end + 1))
            pos_tags = df.loc[
                (df['id'].apply(lambda x: isinstance(x, int) and start <= x <= end)), 'pos'
            ]
            concatenated_pos = '+'.join(pos_tags)
            final_tokens.append(row['token'])
            final_pos_tags.append(concatenated_pos)
        else:
            if id_ not in skip_indices: 
                final_tokens.append(row['token'])
                final_pos_tags.append(row['pos'])

    return final_tokens, final_pos_tags


def label2int(labels: List[str], label2ids: Dict[str, int]) -> List[int]:
    return [label2ids.get(label, -100) for label in labels]


def align_labels(texts: List[List[str]], gold_labels: List[List[str]], pos2id: dict):
    max_length = 512
    info = tokenizer(
        texts,
        return_offsets_mapping=True,
        is_split_into_words=True,
        padding=True,
        truncation=True,
        max_length=max_length
    )
    new_labels = []

    for index, offsets in enumerate(info['offset_mapping']):
        labels = gold_labels[index]
        for i, (start, end) in enumerate(offsets):
            if start == 0 and end == 0:
                labels.insert(i, PAD_TAG)
            if start != 0:
                labels.insert(i, PAD_TAG)

        new_labels.append(labels[:max_length])

    aligned_data = [
        {"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels}\
        for input_ids, attention_mask, labels in zip(info['input_ids'], info['attention_mask'], new_labels)
    ]

    for entry in aligned_data:
        entry["labels"] = label2int(entry["labels"], pos2id)

    return aligned_data

In [23]:
def create_dataset(data_file_path, pos2id=None, split="train"):
    corpus = list(load_conllu(data_file_path))
    corpus = [(ids, [token.replace(" ", "") for token in tokens], tags) for ids, tokens, tags in corpus]
    corpus = [assign_label_to_multitoken(ids, tokens, gold_labels) for ids, tokens, gold_labels in corpus]

    texts = [tokens for tokens, _ in corpus]
    gold_labels = [labels for _, labels in corpus]
    
    if split == "train":
        pos_labels = sorted(set(label for labels in gold_labels for label in labels if label))
        pos2id = {label: idx for idx, label in enumerate(pos_labels)}

    aligned_corpus = align_labels(texts, gold_labels, pos2id)
    return Dataset.from_list(aligned_corpus), pos2id 

In [8]:
def compute_metrics(pred):
    logits, labels = pred
    predictions = np.argmax(logits, axis=-1)

    true_labels = labels[labels != -100]
    true_predictions = predictions[labels != -100]

    accuracy = accuracy_score(true_labels, true_predictions)
    return {"accuracy": accuracy}

In [9]:
def evaluate_model(test_dataset, model_path, metrics=compute_metrics):

    model = AutoModelForTokenClassification.from_pretrained(model_path)

    trainer = Trainer(
        model=model,
        compute_metrics=metrics,
    )
    return  trainer.evaluate(test_dataset)

In [10]:
def train_pos_tagger(model, train_set, dev_set, output_dir):

    training_args = TrainingArguments(
        output_dir=output_dir,
        eval_strategy = "epoch",
        learning_rate=2e-5,
        per_device_train_batch_size=BATCH_SIZE,
        per_device_eval_batch_size=BATCH_SIZE,
        num_train_epochs=5,
        weight_decay=0.01,
        logging_steps=10)

    trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_set,
    eval_dataset=dev_set,
    compute_metrics=compute_metrics)
    trainer.train()
    model.save_pretrained(training_args.output_dir)

# Arabic

In [41]:
train_dataset, pos2id = create_dataset(AR_TRAIN_PATH, split="train")
dev_dataset, _ = create_dataset(AR_DEV_PATH, pos2id=pos2id, split="dev")
test_dataset, _ = create_dataset(AR_TEST_PATH, pos2id=pos2id, split="test")

In [49]:
pos2id

{'ADJ': 0,
 'ADJ+ADP': 1,
 'ADJ+ADP+NOUN': 2,
 'ADJ+ADP+PRON': 3,
 'ADJ+PRON': 4,
 'ADP': 5,
 'ADP+ADJ': 6,
 'ADP+ADJ+PRON': 7,
 'ADP+ADP': 8,
 'ADP+CCONJ': 9,
 'ADP+DET': 10,
 'ADP+NOUN': 11,
 'ADP+NOUN+ADP': 12,
 'ADP+NOUN+DET': 13,
 'ADP+NOUN+PRON': 14,
 'ADP+NUM': 15,
 'ADP+PART': 16,
 'ADP+PRON': 17,
 'ADP+PROPN': 18,
 'ADP+SCONJ': 19,
 'ADP+SCONJ+PART': 20,
 'ADP+SCONJ+PRON': 21,
 'ADP+X': 22,
 'ADV': 23,
 'ADV+DET': 24,
 'AUX': 25,
 'AUX+AUX': 26,
 'AUX+VERB': 27,
 'AUX+VERB+PRON': 28,
 'CCONJ': 29,
 'CCONJ+ADJ': 30,
 'CCONJ+ADJ+PRON': 31,
 'CCONJ+ADP': 32,
 'CCONJ+ADP+ADJ': 33,
 'CCONJ+ADP+ADP': 34,
 'CCONJ+ADP+CCONJ': 35,
 'CCONJ+ADP+CCONJ+DET': 36,
 'CCONJ+ADP+DET': 37,
 'CCONJ+ADP+NOUN': 38,
 'CCONJ+ADP+NOUN+PRON': 39,
 'CCONJ+ADP+PART': 40,
 'CCONJ+ADP+PRON': 41,
 'CCONJ+ADP+SCONJ': 42,
 'CCONJ+ADP+SCONJ+PRON': 43,
 'CCONJ+ADV': 44,
 'CCONJ+AUX': 45,
 'CCONJ+AUX+AUX': 46,
 'CCONJ+AUX+VERB': 47,
 'CCONJ+AUX+VERB+PRON': 48,
 'CCONJ+CCONJ': 49,
 'CCONJ+CCONJ+DET': 50,
 'CCONJ+

In [50]:
model = AutoModelForTokenClassification.from_pretrained(CHECKPOINT, num_labels=len(pos2id.keys()))

model.safetensors:   0%|          | 0.00/714M [00:00<?, ?B/s]

Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [52]:
train_pos_tagger(model, train_dataset, dev_dataset, output_dir=AR_MODEL_PATH)



Epoch,Training Loss,Validation Loss,Accuracy
1,0.3949,0.321314,0.928261
2,0.2281,0.219653,0.948736
3,0.1966,0.1907,0.954701
4,0.1654,0.181023,0.956356
5,0.1581,0.177774,0.957203


In [53]:
ar_acc_ar = evaluate_model(test_dataset=test_dataset, model_path=AR_MODEL_PATH)
ar_acc_ar

{'eval_loss': 0.19070009887218475,
 'eval_accuracy': 0.9544628099173553,
 'eval_runtime': 12.1491,
 'eval_samples_per_second': 55.971,
 'eval_steps_per_second': 3.539}

In [54]:
en_test_dataset, _ = create_dataset(EN_TEST_PATH, pos2id=pos2id, split="test")
fr_test_dataset, _ = create_dataset(FR_TEST_PATH, pos2id=pos2id, split="test")
fa_test_dataset, _ = create_dataset(FA_TEST_PATH, pos2id=pos2id, split="test")
ja_test_dataset, _ = create_dataset(JA_TEST_PATH, pos2id=pos2id, split="test")

In [55]:
ar_acc_fa = evaluate_model(test_dataset=fa_test_dataset, model_path=AR_MODEL_PATH)
ar_acc_fa

{'eval_loss': 1.17914879322052,
 'eval_accuracy': 0.7322923880878928,
 'eval_runtime': 5.6807,
 'eval_samples_per_second': 105.621,
 'eval_steps_per_second': 6.689}

In [56]:
ar_acc_en = evaluate_model(test_dataset=en_test_dataset, model_path=AR_MODEL_PATH)
ar_acc_en

{'eval_loss': 1.9181584119796753,
 'eval_accuracy': 0.5907077831542689,
 'eval_runtime': 30.1348,
 'eval_samples_per_second': 68.924,
 'eval_steps_per_second': 4.314}

In [57]:
ar_acc_fr = evaluate_model(test_dataset=fr_test_dataset, model_path=AR_MODEL_PATH)
ar_acc_fr

{'eval_loss': 1.7958828210830688,
 'eval_accuracy': 0.6269776042736799,
 'eval_runtime': 3.9251,
 'eval_samples_per_second': 116.176,
 'eval_steps_per_second': 7.388}

In [58]:
ar_acc_ja = evaluate_model(test_dataset=ja_test_dataset, model_path=AR_MODEL_PATH)
ar_acc_ja

{'eval_loss': 2.641343593597412,
 'eval_accuracy': 0.4167561761546724,
 'eval_runtime': 5.1326,
 'eval_samples_per_second': 105.794,
 'eval_steps_per_second': 6.624}

# English

In [60]:
train_dataset, pos2id = create_dataset(EN_TRAIN_PATH, split="train")
dev_dataset, _ = create_dataset(EN_DEV_PATH, pos2id=pos2id, split="dev")
test_dataset, _ = create_dataset(EN_TEST_PATH, pos2id=pos2id, split="test")

In [61]:
pos2id

{'ADJ': 0,
 'ADJ+PART': 1,
 'ADP': 2,
 'ADP+ADP': 3,
 'ADV': 4,
 'ADV+AUX': 5,
 'AUX': 6,
 'AUX+AUX': 7,
 'AUX+PART': 8,
 'AUX+PART+VERB': 9,
 'CCONJ': 10,
 'DET': 11,
 'DET+NOUN': 12,
 'INTJ': 13,
 'NOUN': 14,
 'NOUN+ADP': 15,
 'NOUN+AUX': 16,
 'NOUN+PART': 17,
 'NUM': 18,
 'NUM+PART': 19,
 'PART': 20,
 'PRON': 21,
 'PRON+AUX': 22,
 'PRON+PART': 23,
 'PRON+VERB': 24,
 'PROPN': 25,
 'PROPN+AUX': 26,
 'PROPN+PART': 27,
 'PROPN+PROPN': 28,
 'PUNCT': 29,
 'SCONJ': 30,
 'SYM': 31,
 'VERB': 32,
 'VERB+ADV': 33,
 'VERB+PART': 34,
 'VERB+PRON': 35,
 'X': 36}

In [62]:
model = AutoModelForTokenClassification.from_pretrained(CHECKPOINT, num_labels=len(pos2id.keys()))

Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [63]:
train_pos_tagger(model, train_dataset, dev_dataset, output_dir=EN_MODEL_PATH)

A ConfigError was raised whilst setting the number of model parameters in Weights & Biases config.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.1321,0.161382,0.959698
2,0.0857,0.13221,0.966839
3,0.0787,0.13126,0.968937
4,0.0393,0.130647,0.969985
5,0.0303,0.133803,0.970187


In [64]:
en_acc_en = evaluate_model(test_dataset=test_dataset, model_path=EN_MODEL_PATH)
en_acc_en

A ConfigError was raised whilst setting the number of model parameters in Weights & Biases config.


{'eval_loss': 0.132149338722229,
 'eval_accuracy': 0.9723109260681515,
 'eval_runtime': 30.1918,
 'eval_samples_per_second': 68.794,
 'eval_steps_per_second': 4.306}

In [65]:
ar_test_dataset, _ = create_dataset(AR_TEST_PATH, pos2id=pos2id, split="test")
fr_test_dataset, _ = create_dataset(FR_TEST_PATH, pos2id=pos2id, split="test")
fa_test_dataset, _ = create_dataset(FA_TEST_PATH, pos2id=pos2id, split="test")
ja_test_dataset, _ = create_dataset(JA_TEST_PATH, pos2id=pos2id, split="test")

In [66]:
en_acc_ar = evaluate_model(test_dataset=ar_test_dataset, model_path=EN_MODEL_PATH)
en_acc_ar

A ConfigError was raised whilst setting the number of model parameters in Weights & Biases config.


{'eval_loss': 2.4292356967926025,
 'eval_accuracy': 0.5486691086691087,
 'eval_runtime': 12.5002,
 'eval_samples_per_second': 54.399,
 'eval_steps_per_second': 3.44}

In [68]:
en_acc_fa = evaluate_model(test_dataset=fa_test_dataset, model_path=EN_MODEL_PATH)
en_acc_fa

A ConfigError was raised whilst setting the number of model parameters in Weights & Biases config.


{'eval_loss': 1.5560001134872437,
 'eval_accuracy': 0.7197500473395191,
 'eval_runtime': 5.6612,
 'eval_samples_per_second': 105.985,
 'eval_steps_per_second': 6.712}

In [67]:
en_acc_fr = evaluate_model(test_dataset=fr_test_dataset, model_path=EN_MODEL_PATH)
en_acc_fr

A ConfigError was raised whilst setting the number of model parameters in Weights & Biases config.


{'eval_loss': 0.6498637795448303,
 'eval_accuracy': 0.8708616298811545,
 'eval_runtime': 3.9437,
 'eval_samples_per_second': 115.627,
 'eval_steps_per_second': 7.353}

In [69]:
en_acc_ja = evaluate_model(test_dataset=ja_test_dataset, model_path=EN_MODEL_PATH)
en_acc_ja

A ConfigError was raised whilst setting the number of model parameters in Weights & Biases config.


{'eval_loss': 2.606447458267212,
 'eval_accuracy': 0.5057541813717968,
 'eval_runtime': 5.0785,
 'eval_samples_per_second': 106.921,
 'eval_steps_per_second': 6.695}

In [14]:
%mkdir fa

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


# Farsi

In [24]:
train_dataset, pos2id = create_dataset(FA_TRAIN_PATH, split="train")
dev_dataset, _ = create_dataset(FA_DEV_PATH, pos2id=pos2id, split="dev")
test_dataset, _ = create_dataset(FA_TEST_PATH, pos2id=pos2id, split="test")

In [25]:
train_dataset = train_dataset.filter(lambda row: len(row['input_ids']) == len(row['labels']))

Filter:   0%|          | 0/4798 [00:00<?, ? examples/s]

In [26]:
for index, row in enumerate(train_dataset):
    if len(row['input_ids']) != len(row['labels']):
        print(len(tokenizer.convert_ids_to_tokens(row['input_ids'])))
        print(len(row['labels']))

In [12]:
pos2id

{'ADJ': 0,
 'ADJ+AUX': 1,
 'ADJ+PRON': 2,
 'ADJ+VERB': 3,
 'ADP': 4,
 'ADP+DET': 5,
 'ADP+PRON': 6,
 'ADP+PRON+AUX': 7,
 'ADV': 8,
 'ADV+AUX': 9,
 'ADV+PRON': 10,
 'ADV+PRON+VERB': 11,
 'ADV+VERB': 12,
 'AUX': 13,
 'AUX+PRON': 14,
 'CCONJ': 15,
 'DET': 16,
 'INTJ': 17,
 'NOUN': 18,
 'NOUN+AUX': 19,
 'NOUN+PRON': 20,
 'NOUN+PRON+AUX': 21,
 'NOUN+SCONJ': 22,
 'NOUN+VERB': 23,
 'NUM': 24,
 'NUM+PRON': 25,
 'PART': 26,
 'PRON': 27,
 'PRON+AUX': 28,
 'PRON+PART': 29,
 'PRON+PRON': 30,
 'PRON+VERB': 31,
 'PUNCT': 32,
 'SCONJ': 33,
 'VERB': 34,
 'VERB+PRON': 35,
 'X': 36}

In [27]:
model = AutoModelForTokenClassification.from_pretrained(CHECKPOINT, num_labels=len(pos2id.keys()))

Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [28]:
train_pos_tagger(model, train_dataset, dev_dataset, output_dir=FA_MODEL_PATH)

Epoch,Training Loss,Validation Loss,Accuracy
1,0.2395,0.204508,0.950682
2,0.1532,0.139884,0.964456
3,0.1327,0.123899,0.96966
4,0.087,0.11855,0.971692
5,0.0807,0.11682,0.971692


In [29]:
fa_acc_fa = evaluate_model(test_dataset=test_dataset, model_path=FA_MODEL_PATH)
fa_acc_fa

{'eval_loss': 0.1167830303311348,
 'eval_accuracy': 0.9710481693148276,
 'eval_runtime': 5.6502,
 'eval_samples_per_second': 106.19,
 'eval_steps_per_second': 6.725}

In [30]:
ar_test_dataset, _ = create_dataset(AR_TEST_PATH, pos2id=pos2id, split="test")
en_test_dataset, _ = create_dataset(EN_TEST_PATH, pos2id=pos2id, split="test")
fr_test_dataset, _ = create_dataset(FR_TEST_PATH, pos2id=pos2id, split="test")
ja_test_dataset, _ = create_dataset(JA_TEST_PATH, pos2id=pos2id, split="test")

In [31]:
fa_acc_ar = evaluate_model(test_dataset=ar_test_dataset, model_path=FA_MODEL_PATH)
fa_acc_ar

{'eval_loss': 1.2246183156967163,
 'eval_accuracy': 0.7295698670326031,
 'eval_runtime': 12.8392,
 'eval_samples_per_second': 52.963,
 'eval_steps_per_second': 3.349}

In [32]:
fa_acc_en = evaluate_model(test_dataset=en_test_dataset, model_path=FA_MODEL_PATH)
fa_acc_en

{'eval_loss': 0.8581011891365051,
 'eval_accuracy': 0.7684797146678556,
 'eval_runtime': 31.1743,
 'eval_samples_per_second': 66.625,
 'eval_steps_per_second': 4.17}

In [33]:
fa_acc_fr = evaluate_model(test_dataset=fr_test_dataset, model_path=FA_MODEL_PATH)
fa_acc_fr

{'eval_loss': 0.9863608479499817,
 'eval_accuracy': 0.7535667963683528,
 'eval_runtime': 3.8456,
 'eval_samples_per_second': 118.578,
 'eval_steps_per_second': 7.541}

In [34]:
fa_acc_ja = evaluate_model(test_dataset=ja_test_dataset, model_path=FA_MODEL_PATH)
fa_acc_ja

{'eval_loss': 2.1976442337036133,
 'eval_accuracy': 0.4956129950201565,
 'eval_runtime': 5.0514,
 'eval_samples_per_second': 107.496,
 'eval_steps_per_second': 6.731}

# French

In [83]:
train_dataset, pos2id = create_dataset(FR_TRAIN_PATH, split="train")
dev_dataset, _ = create_dataset(FR_DEV_PATH, pos2id=pos2id, split="dev")
test_dataset, _ = create_dataset(FR_TEST_PATH, pos2id=pos2id, split="test")

In [84]:
pos2id

{'ADJ': 0,
 'ADP': 1,
 'ADP+DET': 2,
 'ADP+PRON': 3,
 'ADV': 4,
 'AUX': 5,
 'CCONJ': 6,
 'DET': 7,
 'INTJ': 8,
 'NOUN': 9,
 'NUM': 10,
 'PRON': 11,
 'PROPN': 12,
 'PUNCT': 13,
 'SCONJ': 14,
 'SYM': 15,
 'VERB': 16,
 'X': 17}

In [85]:
model = AutoModelForTokenClassification.from_pretrained(CHECKPOINT, num_labels=len(pos2id.keys()))

Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [86]:
train_pos_tagger(model, train_dataset, dev_dataset, output_dir=FR_MODEL_PATH)

A ConfigError was raised whilst setting the number of model parameters in Weights & Biases config.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.2015,0.148662,0.965319
2,0.0834,0.085666,0.978697
3,0.055,0.071142,0.982711
4,0.0429,0.0633,0.984975
5,0.0422,0.062753,0.984975


In [87]:
fr_acc_fr = evaluate_model(test_dataset=test_dataset, model_path=FR_MODEL_PATH)
fr_acc_fr

A ConfigError was raised whilst setting the number of model parameters in Weights & Biases config.


{'eval_loss': 0.06161851808428764,
 'eval_accuracy': 0.9859256215327717,
 'eval_runtime': 4.0838,
 'eval_samples_per_second': 111.66,
 'eval_steps_per_second': 7.101}

In [88]:
ar_test_dataset, _ = create_dataset(AR_TEST_PATH, pos2id=pos2id, split="test")
en_test_dataset, _ = create_dataset(EN_TEST_PATH, pos2id=pos2id, split="test")
fa_test_dataset, _ = create_dataset(FA_TEST_PATH, pos2id=pos2id, split="test")
ja_test_dataset, _ = create_dataset(JA_TEST_PATH, pos2id=pos2id, split="test")

In [89]:
fr_acc_ar = evaluate_model(test_dataset=ar_test_dataset, model_path=FR_MODEL_PATH)
fr_acc_ar

A ConfigError was raised whilst setting the number of model parameters in Weights & Biases config.


{'eval_loss': 1.6999986171722412,
 'eval_accuracy': 0.6484466588511137,
 'eval_runtime': 12.2502,
 'eval_samples_per_second': 55.509,
 'eval_steps_per_second': 3.51}

In [90]:
fr_acc_en = evaluate_model(test_dataset=en_test_dataset, model_path=FR_MODEL_PATH)
fr_acc_en

A ConfigError was raised whilst setting the number of model parameters in Weights & Biases config.


{'eval_loss': 0.7981430292129517,
 'eval_accuracy': 0.7683462262178549,
 'eval_runtime': 29.912,
 'eval_samples_per_second': 69.437,
 'eval_steps_per_second': 4.346}

In [91]:
fr_acc_fa = evaluate_model(test_dataset=fa_test_dataset, model_path=FR_MODEL_PATH)
fr_acc_fa

A ConfigError was raised whilst setting the number of model parameters in Weights & Biases config.


{'eval_loss': 0.9296019673347473,
 'eval_accuracy': 0.7952957764532461,
 'eval_runtime': 5.6703,
 'eval_samples_per_second': 105.815,
 'eval_steps_per_second': 6.702}

In [92]:
fr_acc_ja = evaluate_model(test_dataset=ja_test_dataset, model_path=FR_MODEL_PATH)
fr_acc_ja

A ConfigError was raised whilst setting the number of model parameters in Weights & Biases config.


{'eval_loss': 2.020470380783081,
 'eval_accuracy': 0.5058112505811251,
 'eval_runtime': 5.0698,
 'eval_samples_per_second': 107.105,
 'eval_steps_per_second': 6.706}

In [107]:
import shutil

# shutil.rmtree("/kaggle/working/tmp_trainer")
shutil.rmtree(JA_MODEL_PATH)

# Japanese

In [95]:
train_dataset, pos2id = create_dataset(JA_TRAIN_PATH, split="train")
dev_dataset, _ = create_dataset(JA_DEV_PATH, pos2id=pos2id, split="dev")
test_dataset, _ = create_dataset(JA_TEST_PATH, pos2id=pos2id, split="test")

In [96]:
pos2id

{'ADJ': 0,
 'ADP': 1,
 'ADV': 2,
 'AUX': 3,
 'CCONJ': 4,
 'DET': 5,
 'INTJ': 6,
 'NOUN': 7,
 'NUM': 8,
 'PART': 9,
 'PRON': 10,
 'PROPN': 11,
 'PUNCT': 12,
 'SCONJ': 13,
 'SYM': 14,
 'VERB': 15}

In [97]:
model = AutoModelForTokenClassification.from_pretrained(CHECKPOINT, num_labels=len(pos2id.keys()))

Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [98]:
train_pos_tagger(model, train_dataset, dev_dataset, output_dir=JA_MODEL_PATH)

A ConfigError was raised whilst setting the number of model parameters in Weights & Biases config.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.1185,0.09799,0.970782
2,0.0828,0.074452,0.979165
3,0.0493,0.069792,0.980304
4,0.0412,0.066122,0.981851
5,0.0357,0.070287,0.981362


In [99]:
ja_acc_ja = evaluate_model(test_dataset=test_dataset, model_path=JA_MODEL_PATH)
ja_acc_ja

A ConfigError was raised whilst setting the number of model parameters in Weights & Biases config.


{'eval_loss': 0.08363685011863708,
 'eval_accuracy': 0.9792849470615314,
 'eval_runtime': 5.1184,
 'eval_samples_per_second': 106.087,
 'eval_steps_per_second': 6.643}

In [100]:
ar_test_dataset, _ = create_dataset(AR_TEST_PATH, pos2id=pos2id, split="test")
en_test_dataset, _ = create_dataset(EN_TEST_PATH, pos2id=pos2id, split="test")
fa_test_dataset, _ = create_dataset(FA_TEST_PATH, pos2id=pos2id, split="test")
fr_test_dataset, _ = create_dataset(FR_TEST_PATH, pos2id=pos2id, split="test")

In [101]:
ja_acc_ar = evaluate_model(test_dataset=ar_test_dataset, model_path=JA_MODEL_PATH)
ja_acc_ar

A ConfigError was raised whilst setting the number of model parameters in Weights & Biases config.


{'eval_loss': 1.744842529296875,
 'eval_accuracy': 0.5950160687002792,
 'eval_runtime': 12.2449,
 'eval_samples_per_second': 55.533,
 'eval_steps_per_second': 3.512}

In [102]:
ja_acc_en = evaluate_model(test_dataset=en_test_dataset, model_path=JA_MODEL_PATH)
ja_acc_en

A ConfigError was raised whilst setting the number of model parameters in Weights & Biases config.


{'eval_loss': 1.6725890636444092,
 'eval_accuracy': 0.5541728273369476,
 'eval_runtime': 30.0275,
 'eval_samples_per_second': 69.17,
 'eval_steps_per_second': 4.329}

In [103]:
ja_acc_fa = evaluate_model(test_dataset=fa_test_dataset, model_path=JA_MODEL_PATH)
ja_acc_fa

A ConfigError was raised whilst setting the number of model parameters in Weights & Biases config.


{'eval_loss': 1.7782609462738037,
 'eval_accuracy': 0.5892913584929516,
 'eval_runtime': 5.6201,
 'eval_samples_per_second': 106.76,
 'eval_steps_per_second': 6.761}

In [105]:
ja_acc_fr = evaluate_model(test_dataset=fr_test_dataset, model_path=JA_MODEL_PATH)
ja_acc_fr

A ConfigError was raised whilst setting the number of model parameters in Weights & Biases config.


{'eval_loss': 1.4159342050552368,
 'eval_accuracy': 0.606199403493822,
 'eval_runtime': 3.9477,
 'eval_samples_per_second': 115.51,
 'eval_steps_per_second': 7.346}