In [2]:
import pandas as pd
import os

# Data Reading (ignore if have CSVs)

In [2]:
dir = 'X:\\PhD\\SemEval Task4\\Data'


anno_subtask1_train = pd.read_json('X:\\PhD\\SemEval Task4\\Data\\annotations\\data\\subtask1\\train.json')
anno_subtask1_val = pd.read_json('X:\\PhD\\SemEval Task4\\Data\\annotations\\data\\subtask1\\validation.json')
anno_subtask1_dev = pd.read_json('X:\\PhD\\SemEval Task4\\Data\\annotations\\data\\subtask1\\dev_unlabeled.json')

anno_subtask1_combined = pd.concat([anno_subtask1_train, anno_subtask1_val])
anno_subtask1_combined

Unnamed: 0,id,text,labels,link
0,65635,THIS IS WHY YOU NEED\n\nA SHARPIE WITH YOU AT ...,[Black-and-white Fallacy/Dictatorship],https://www.facebook.com/photo/?fbid=402355213...
1,67927,GOOD NEWS!\n\nNAZANIN ZAGHARI-RATCLIFFE AND AN...,"[Loaded Language, Glittering generalities (Vir...",https://www.facebook.com/amnesty/photos/531198...
2,68031,PAING PHYO MIN IS FREE!,[],https://www.facebook.com/amnesty/photos/427419...
3,77490,Move your ships away!\n\noooook\n\nMove your s...,[],https://www.facebook.com/rightpatriots/photos/...
4,67641,"WHEN YOU'RE THE FBI, THEY LET YOU DO IT.",[Thought-terminating cliché],https://www.facebook.com/AddictingInfoOrg/phot...
...,...,...,...,...
495,78774,Level: Easy\n\nLevel: Hard\n\nLevel: For Mothe...,"[Slogans, Name calling/Labeling, Repetition]",https://www.facebook.com/themotherlandcalls/ph...
496,78299,Europeans:\n\nYou are trespassing,[],https://www.facebook.com/communism101/photos/5...
497,67096,\NONSENSE DETECTED! \nFIRE AT WILL!\\n\n\Time ...,[],https://www.facebook.com/photo/?fbid=181158225...
498,68464,ANNA AND VIRA:\nATTACKED FOR DEFENDING LGBTI A...,"[Causal Oversimplification, Loaded Language]",https://www.facebook.com/amnesty/photos/490311...


In [19]:
anno_subtask1_train.drop(columns=['link']).to_csv('./testdata/train/s1_train.csv')
anno_subtask1_val.drop(columns=['link']).to_csv('./testdata/val/s1_val.csv')

In [13]:
labels = list(set([x for xs in anno_subtask1_combined['labels'].to_list() for x in xs]))
labels

['Reductio ad hitlerum',
 "Misrepresentation of Someone's Position (Straw Man)",
 'Repetition',
 'Name calling/Labeling',
 'Presenting Irrelevant Data (Red Herring)',
 'Glittering generalities (Virtue)',
 'Bandwagon',
 'Loaded Language',
 'Smears',
 'Exaggeration/Minimisation',
 'Black-and-white Fallacy/Dictatorship',
 'Thought-terminating cliché',
 'Appeal to fear/prejudice',
 'Causal Oversimplification',
 'Obfuscation, Intentional vagueness, Confusion',
 'Flag-waving',
 'Slogans',
 'Whataboutism',
 'Doubt',
 'Appeal to authority']

# Transformers

Modified version of: https://github.com/kinit-sk/semeval2023-task3-persuasion-techniques

In [3]:
#reqs
import torch
import transformers
from torch.utils.data import Dataset
import torch
from sklearn.preprocessing import MultiLabelBinarizer
from transformers import AutoTokenizer
import ast
import numpy as np
from torch.utils.data import DataLoader
from collections import Counter
from tqdm import tqdm
import pandas as pd
import os
from sklearn import metrics

## Transformer Classes

In [4]:
class BERTBase(torch.nn.Module):
    def __init__(self):
        super(BERTBase, self).__init__()
        self.l1 = transformers.BertModel.from_pretrained('bert-base-uncased', return_dict=False)
        self.l2 = torch.nn.Dropout(0.3)
        self.l3 = torch.nn.Linear(768, 19) # number of classes/techniques

    def forward(self, ids, mask, token_type_ids):
        _, output_1 = self.l1(ids, attention_mask=mask, token_type_ids=token_type_ids)
        output_2 = self.l2(output_1)
        output = self.l3(output_2)
        return output


class EnglishRobertaBase(torch.nn.Module):
    def __init__(self):
        super(EnglishRobertaBase, self).__init__()
        self.l1 = transformers.RobertaModel.from_pretrained('roberta-base', return_dict=False)
        self.l2 = torch.nn.Dropout(0.3)
        self.l3 = torch.nn.Linear(768, 19)

    def forward(self, ids, mask, token_type_ids):
        _, output_1 = self.l1(ids, attention_mask=mask, token_type_ids=token_type_ids)
        output_2 = self.l2(output_1)
        output = self.l3(output_2)
        return output


class EnglishRobertaLarge(torch.nn.Module):
    def __init__(self):
        super(EnglishRobertaLarge, self).__init__()
        self.l1 = transformers.RobertaModel.from_pretrained('roberta-large', return_dict=False)
        self.l2 = torch.nn.Dropout(0.3)
        self.l3 = torch.nn.Linear(1024, 19)

    def forward(self, ids, mask, token_type_ids):
        _, output_1 = self.l1(ids, attention_mask=mask, token_type_ids=token_type_ids)
        output_2 = self.l2(output_1)
        output = self.l3(output_2)
        return output


class mBERTBase(torch.nn.Module):
    def __init__(self):
        super(mBERTBase, self).__init__()
        self.l1 = transformers.BertModel.from_pretrained('bert-base-multilingual-cased', return_dict=False)
        self.l2 = torch.nn.Dropout(0.3)
        self.l3 = torch.nn.Linear(768, 19)

    def forward(self, ids, mask, token_type_ids):
        _, output_1 = self.l1(ids, attention_mask=mask, token_type_ids=token_type_ids)
        output_2 = self.l2(output_1)
        output = self.l3(output_2)
        return output


class XLMRobertaBase(torch.nn.Module):
    def __init__(self):
        super(XLMRobertaBase, self).__init__()
        self.l1 = transformers.XLMRobertaModel.from_pretrained('xlm-roberta-base', return_dict=False)
        self.l2 = torch.nn.Dropout(0.3)
        self.l3 = torch.nn.Linear(768, 19)

    def forward(self, ids, mask, token_type_ids):
        _, output_1 = self.l1(ids, attention_mask=mask, token_type_ids=token_type_ids)
        output_2 = self.l2(output_1)
        output = self.l3(output_2)
        return output


class XLMRobertaLarge(torch.nn.Module):
    def __init__(self):
        super(XLMRobertaLarge, self).__init__()
        self.l1 = transformers.XLMRobertaModel.from_pretrained('xlm-roberta-large', return_dict=False)
        self.l2 = torch.nn.Dropout(0.3)
        self.l3 = torch.nn.Linear(1024, 19)

    def forward(self, ids, mask, token_type_ids):
        _, output_1 = self.l1(ids, attention_mask=mask, token_type_ids=token_type_ids)
        output_2 = self.l2(output_1)
        output = self.l3(output_2)
        return output

## Data Loader

In [5]:
class CustomDataset(Dataset):

    def __init__(self, dataframe, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.text = dataframe.text
        self.targets = self.data.techniques_encoded
        self.max_len = max_len

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        text = str(self.text[index])
        text = " ".join(text.split())

        inputs = self.tokenizer.encode_plus(
            text,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            pad_to_max_length=True,
            return_token_type_ids=True
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs['token_type_ids']


        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'targets': torch.tensor(self.targets[index], dtype=torch.float)
        }


def create_dataset_training(MODEL_NAME, PATHS_TO_TRAIN, PATHS_TO_VALIDATION, TRAINING_LANGUAGES):
    """
    Function to load and process the _input_data into suitable format. Includes setting up tokenizer, one hot encoding of
    techniques, concatenation of csv files if necessary, language filtering and loading _input_data into CustomDataset class.

    :param MODEL_NAME: name of the model as per Hugging face to use for tokenizer.
    :param PATHS_TO_TRAIN: List of paths to csv files we want to use for training.
    :param PATHS_TO_VALIDATION: List of paths to csv files we want to use for validation.
    :param TRAINING_LANGUAGES: List of languages we are using.
    :return: Tuple of training and testing set in Dataset format.
    """

    #Create tokenizer
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, return_dict=False)
    MAX_LEN = tokenizer.model_max_length
    if MAX_LEN > 1024:
        print(f'hmm, seems the tokenizer is saying too much: {MAX_LEN}, setting down to 512.')
        MAX_LEN = 512
    print(f'max length of tokens for < {MODEL_NAME} > is: < {MAX_LEN} >')

    #Concatenate csv files
    df_train = pd.DataFrame()
    for train_path in PATHS_TO_TRAIN:
        df_train = pd.concat([df_train, pd.read_csv(train_path)], ignore_index=True)

    #One hot encoding of techniques
    df_train['labels'] = df_train['labels'].apply(ast.literal_eval)
    one_hot = MultiLabelBinarizer()
    df_train['techniques_encoded'] = one_hot.fit_transform(df_train['labels'])[:, 1:].tolist()

    #Load validation dataset and encode
    if PATHS_TO_VALIDATION:
        df_dev = pd.DataFrame()
        for dev_path in PATHS_TO_VALIDATION:
            df_dev = pd.concat([df_dev, pd.read_csv(dev_path)], ignore_index=True)

        df_dev['labels'] = df_dev['labels'].apply(ast.literal_eval)
        # use fitted one-hot encoder transform in case some techniques are present in training but not in dev
        df_dev['techniques_encoded'] = one_hot.transform(df_dev['labels'])[:, 1:].tolist()
    else:
        df_dev = pd.DataFrame(
            columns=[['id', 'text', 'labels', 'techniques_encoded']])

    #Language filtering - Not Needed yet
    # df_train = df_train[df_train['language'].isin(TRAINING_LANGUAGES)]
    # df_dev = df_dev[df_dev['language'].isin(TRAINING_LANGUAGES)]
    
    train_dataset = df_train
    test_dataset = df_dev

    print(f"TRAIN Dataset: {train_dataset.shape}")
    print(f"TEST Dataset: {test_dataset.shape}")

    training_set = CustomDataset(train_dataset, tokenizer, MAX_LEN)
    testing_set = CustomDataset(test_dataset, tokenizer, MAX_LEN)

    return training_set, testing_set


def get_tokenizer(model_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name, return_dict=False)
    MAX_LEN = tokenizer.model_max_length
    if MAX_LEN > 1024:
        print(f'hmm, seems the tokenizer is saying too much: {MAX_LEN}, setting down to 512.')
        MAX_LEN = 512
    print(f'max length of tokens for < {MAX_LEN} > is: < {MAX_LEN} >')
    return tokenizer, MAX_LEN


def load_dataframe(args):
    test_df = pd.read_csv(args["EVALUATION_SET"])

    # language_to_texts = {lan: list(text) for lan, text in test_df.groupby('language')['text']}
    # for language, texts in language_to_texts.items():
    #     print(f'language {language} has {len(texts)} lines of data.')

    
    print('Encoding techniques into one-hot-encoded lists!\n')
        # Create sentence and label lists
    test_sentences = test_df.text.values

    test_df['labels'] = test_df['labels'].apply(ast.literal_eval)
    one_hot = MultiLabelBinarizer()
    test_df['techniques_encoded'] = one_hot.fit_transform(test_df['labels'])[:, 1:].tolist()
    test_labels = test_df.techniques_encoded.values
    test_labels = np.array([labels for labels in test_labels])

    mappings = one_hot.classes_[1:]    

    return test_df, mappings

def create_dataloader(args, test_df, language, language_counts):
    # print(
    #     f'testing on {language_counts[language]} data points (around {language_counts[language] / args["BATCH_SIZE"]} iterations)')
    filtered_test_df = test_df.copy()

    # not required until we use multilingual data
    # filtered_test_df = filtered_test_df[filtered_test_df['language'] == language]
    # filtered_test_df = filtered_test_df.reset_index()
    
    tokenizer, MAX_LEN = get_tokenizer(args['MODEL_NAME'])

    testing_set = CustomDataset(
        test_df,
        tokenizer,
        MAX_LEN
    )

    testing_loader = DataLoader(
        testing_set,
        batch_size=args['BATCH_SIZE'],
        shuffle=args['SHUFFLE'],
        num_workers=args['NUM_WORKERS']
    )

    return testing_loader, filtered_test_df

## Training

In [6]:
def run_training(args):
    """
    Function creates a dataset based on given arguments. Afterwards it trains and validates a model.

    :param args: dictionary of arguments needed to create the model
    """
    training_set, testing_set = create_dataset_training(MODEL_NAME=args['MODEL_NAME'], PATHS_TO_TRAIN=args['PATHS_TO_TRAIN'], PATHS_TO_VALIDATION=args['PATHS_TO_VALIDATION'], TRAINING_LANGUAGES=args['TRAINING_LANGUAGES'])

    training_loader = DataLoader(
        training_set,
        batch_size=args['TRAIN_BATCH_SIZE'],
        shuffle=args['TRAIN_SHUFFLE'],
        num_workers=args['TRAIN_NUM_WORKERS']
    )

    validation_loader = DataLoader(
        testing_set,
        batch_size=args['VALIDATION_BATCH_SIZE'],
        shuffle=args['VALIDATION_SHUFFLE'],
        num_workers=args['VALIDATION_NUM_WORKERS']
    )

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    lm = LanguageModel(training_loader=training_loader, validation_loader=validation_loader, device=device, args=args)
    return lm.train_over_epochs()


class LanguageModel:
    """
    Class to hold the model as well as train and validate it.
    """

    def __init__(self, training_loader, validation_loader, device, args):

        self.args = args
        self.device = device
        self.validation_loader = validation_loader
        self.training_loader = training_loader
        if args['MODEL_NAME'] == 'roberta-large':
            self.model = EnglishRobertaLarge()
        elif args['MODEL_NAME'] == 'roberta-base':
            self.model = EnglishRobertaBase()
        elif args['MODEL_NAME'] == 'bert-base-uncased':
            self.model = BERTBase()
        elif args['MODEL_NAME'] == 'bert-base-multilingual-cased':
            self.model = mBERTBase()
        elif args['MODEL_NAME'] == 'xlm-roberta-base':
            self.model = XLMRobertaBase()
        elif args['MODEL_NAME'] == 'xlm-roberta-large':
            self.model = XLMRobertaLarge()

        self.model.to(self.device)
        self.optimizer = torch.optim.Adam(params=self.model.parameters(), lr=self.args['LEARNING_RATE'])
        if args['FREEZE']:
            counter = 0
            no_layers = len([param for param in self.model.parameters()])
            for param in self.model.parameters():
                counter += 1
                param.requires_grad = False
                if counter == (no_layers//5) * 4:
                    break


    def save_model_and_weights(self, epoch):
        print(f'\n\n --- Currently saving model! --- \n')
        model_path = f'{self.args["MODELS_DIR"]}{self.args["MODEL_NAME_PREFIX"]}_model_{epoch}epoch_batch{self.args["TRAIN_BATCH_SIZE"]}_lr{self.args["LEARNING_RATE"]}.pth'
        model_weights_path = f'{self.args["MODELS_DIR"]}{self.args["MODEL_NAME_PREFIX"]}_model_weights_{epoch}epoch_batch{self.args["TRAIN_BATCH_SIZE"]}_lr{self.args["LEARNING_RATE"]}.pth'
        torch.save(self.model.state_dict(), model_weights_path)
        torch.save(self.model, model_path)
        return model_path

    def train(self, epoch):
        print(f'\nNow training on Epoch {epoch}.')
        print(
            f'Testing on {len(self.training_loader) * self.training_loader.batch_size - 1} data points (around {len(self.training_loader)} iterations)')

        self.args['CURRENT_TRAIN_LOSS'] = 0
        self.model.train()
        for itr, data in tqdm(enumerate(self.training_loader, 0)):
            ids = data['ids'].to(self.device, dtype=torch.long)
            mask = data['mask'].to(self.device, dtype=torch.long)
            token_type_ids = data['token_type_ids'].to(self.device, dtype=torch.long)
            targets = data['targets'].to(self.device, dtype=torch.float)

            outputs = self.model(ids, mask, token_type_ids)

            loss = torch.nn.BCEWithLogitsLoss()(outputs, targets)
            if itr % 200 == 0:
                print(f'Epoch: {epoch}, Training Loss:  {loss.item()}')

            self.optimizer.zero_grad()

            loss.backward()
            self.optimizer.step()

            self.args['CURRENT_TRAIN_LOSS'] += loss

        print(f'Accumulated Training Loss after Epoch {epoch} is: {self.args["CURRENT_TRAIN_LOSS"]}')

    def validate(self, epoch):
        print(f'\nNow validating on Epoch {epoch}.')
        print(
            f'Validating on {len(self.validation_loader) * self.validation_loader.batch_size} data points (around {len(self.validation_loader)} iterations)')
        self.model.eval()
        self.args['CURRENT_VALIDATION_LOSS'] = 0
        with torch.no_grad():
            for val_itr, data in tqdm(enumerate(self.validation_loader)):
                ids = data['ids'].to(self.device, dtype=torch.long)
                mask = data['mask'].to(self.device, dtype=torch.long)
                token_type_ids = data['token_type_ids'].to(self.device, dtype=torch.long)
                targets = data['targets'].to(self.device, dtype=torch.float)

                outputs = self.model(ids, mask, token_type_ids)
                loss = torch.nn.BCEWithLogitsLoss()(outputs, targets)

                if val_itr % 200 == 0:
                    print(f'Epoch: {epoch}, Training Loss:  {loss.item()}')

                # accumulate the loss across all batches
                self.args['CURRENT_VALIDATION_LOSS'] += loss

                # clear cache
                torch.cuda.empty_cache()

        print(f'Accumulated Validation Loss after Epoch {epoch} is: {self.args["CURRENT_VALIDATION_LOSS"]}')

        # Early stopping if necessary
        if self.args['CURRENT_VALIDATION_LOSS'] < self.args['BEST_LOSS']:
            self.args['BEST_LOSS'] = self.args['CURRENT_VALIDATION_LOSS']
            self.save_model_and_weights(epoch)

            return True
        else:
            # stop training
            print('Validation loss increased! Stopping training...')
            print(f'Last validation loss was: {self.args["BEST_LOSS"]}')
            print(f'Current validation loss is: {self.args["CURRENT_VALIDATION_LOSS"]}')

            return False

    def train_over_epochs(self):
        for epoch in range(self.args['EPOCHS']):
            self.train(epoch)
            if self.args['PATHS_TO_VALIDATION'] and not self.validate(epoch):
                break  # the last epoch was NOT counted due to early stopping
            else:
                # store latest version
                path = self.save_model_and_weights(epoch)
        print("Done.")
        return path

# Training Different Models (should be a loop but is not a loop)

In [42]:
args_train = {
    # general
    'MODEL_NAME_PREFIX': '',                                # so that model names don't get overwritten, write a UNIQUE name prefix to distinguish this current model
    'EPOCHS': 5,                                            # Make sure to set the EXACT amount of epochs for non-validating runs. Typically BERT-like models won't need to go over 4 epochs
    'LEARNING_RATE': 1e-05,
    'MODEL_NAME': 'xlm-roberta-large',                      # model name as per hugging face library, currently supported model names are: roberta-large, roberta-base, bert-base-uncased, bert-base-multilingual-cased, xlm-roberta-base and xlm-roberta-large
    'DROPOUT_RATE': 0.3,
    'MODELS_DIR': './unimodal-baselines',               # name of subdirectory within 'models' folder where the trained models will be saved, e.g., 'distilbert_base_threshold_0_4'
    'PATHS_TO_TRAIN': ['./testdata/s1_train.csv'],    # useful when training on more sets (e.g. train and dev), list of paths to the csv files
    'PATHS_TO_VALIDATION': ['./testdata/s1_val.csv'],                              # leave empty if no validation required

    # for training
    'TRAIN_BATCH_SIZE': 8,
    'TRAIN_SHUFFLE': True,
    'TRAIN_NUM_WORKERS': 0,
    'CURRENT_EPOCH': 0,
    'CURRENT_TRAIN_LOSS': 0,
    'TRAINING_LANGUAGES': ['en'], # full list is ['en', 'fr', 'ge', 'it', 'po', 'ru']
    'FREEZE': False,                                        # If True first 80% of the layers will be frozen during training
                                                            # unfreeze for PTC fine-tuning?

    # for validation
    'VALIDATION_BATCH_SIZE': 8,
    'VALIDATION_SHUFFLE': False,
    'VALIDATION_NUM_WORKERS': 0,
    'BEST_LOSS': float('inf'),
    'CURRENT_VALIDATION_LOSS': 0
}

model_file = run_training(args_train)

max length of tokens for < xlm-roberta-large > is: < 512 >
TRAIN Dataset: (7000, 5)
TEST Dataset: (500, 5)

Now training on Epoch 0.
Testing on 6999 data points (around 875 iterations)


0it [00:00, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Epoch: 0, Training Loss:  0.6796550750732422


200it [38:37, 11.59s/it]

Epoch: 0, Training Loss:  0.22325746715068817


400it [1:17:15, 11.58s/it]

Epoch: 0, Training Loss:  0.1882757842540741


600it [1:56:20, 11.54s/it]

Epoch: 0, Training Loss:  0.2372388392686844


800it [2:34:48, 11.54s/it]

Epoch: 0, Training Loss:  0.24288859963417053


875it [2:49:13, 11.60s/it]


Accumulated Training Loss after Epoch 0 is: 212.21343994140625

Now validating on Epoch 0.
Validating on 504 data points (around 63 iterations)


1it [00:09,  9.99s/it]

Epoch: 0, Training Loss:  0.24209730327129364


63it [00:31,  2.01it/s]


Accumulated Validation Loss after Epoch 0 is: 13.597033500671387


 --- Currently saving model! --- 



 --- Currently saving model! --- 


Now training on Epoch 1.
Testing on 6999 data points (around 875 iterations)


0it [00:00, ?it/s]

Epoch: 1, Training Loss:  0.17399227619171143


200it [38:52, 11.67s/it]

Epoch: 1, Training Loss:  0.22938214242458344


400it [1:17:44, 11.66s/it]

Epoch: 1, Training Loss:  0.14239315688610077


600it [1:56:37, 11.66s/it]

Epoch: 1, Training Loss:  0.21883244812488556


800it [2:35:29, 11.66s/it]

Epoch: 1, Training Loss:  0.23012728989124298


875it [2:50:04, 11.66s/it]


Accumulated Training Loss after Epoch 1 is: 176.20091247558594

Now validating on Epoch 1.
Validating on 504 data points (around 63 iterations)


0it [00:00, ?it/s]

Epoch: 1, Training Loss:  0.21410717070102692


63it [00:13,  4.76it/s]


Accumulated Validation Loss after Epoch 1 is: 12.640756607055664


 --- Currently saving model! --- 



 --- Currently saving model! --- 


Now training on Epoch 2.
Testing on 6999 data points (around 875 iterations)


0it [00:00, ?it/s]

Epoch: 2, Training Loss:  0.1780775934457779


200it [53:18, 16.11s/it]

Epoch: 2, Training Loss:  0.16186870634555817


400it [1:46:36, 16.11s/it]

Epoch: 2, Training Loss:  0.2536025941371918


600it [2:39:55, 16.11s/it]

Epoch: 2, Training Loss:  0.22132770717144012


800it [3:33:14, 16.11s/it]

Epoch: 2, Training Loss:  0.12633655965328217


875it [3:53:13, 15.99s/it]


Accumulated Training Loss after Epoch 2 is: 153.43455505371094

Now validating on Epoch 2.
Validating on 504 data points (around 63 iterations)


0it [00:00, ?it/s]

Epoch: 2, Training Loss:  0.2355465143918991


63it [03:27,  3.30s/it]


Accumulated Validation Loss after Epoch 2 is: 12.39066219329834


 --- Currently saving model! --- 



 --- Currently saving model! --- 


Now training on Epoch 3.
Testing on 6999 data points (around 875 iterations)


0it [00:00, ?it/s]

Epoch: 3, Training Loss:  0.16450731456279755


200it [52:22, 15.71s/it]

Epoch: 3, Training Loss:  0.14485228061676025


400it [1:44:50, 15.75s/it]

Epoch: 3, Training Loss:  0.10938655585050583


600it [2:39:07, 16.07s/it]

Epoch: 3, Training Loss:  0.2023271769285202


800it [3:40:44, 16.56s/it]

Epoch: 3, Training Loss:  0.14098167419433594


875it [4:04:24, 16.76s/it]


Accumulated Training Loss after Epoch 3 is: 133.15428161621094

Now validating on Epoch 3.
Validating on 504 data points (around 63 iterations)


0it [00:00, ?it/s]

Epoch: 3, Training Loss:  0.22417588531970978


63it [04:21,  4.15s/it]


Accumulated Validation Loss after Epoch 3 is: 12.381818771362305


 --- Currently saving model! --- 



 --- Currently saving model! --- 


Now training on Epoch 4.
Testing on 6999 data points (around 875 iterations)


0it [00:00, ?it/s]

Epoch: 4, Training Loss:  0.1778809130191803


200it [58:06, 18.50s/it]

Epoch: 4, Training Loss:  0.06397739052772522


400it [1:54:36, 16.13s/it]

Epoch: 4, Training Loss:  0.0672091469168663


600it [2:50:01, 15.70s/it]

Epoch: 4, Training Loss:  0.1909969300031662


800it [3:42:19, 15.69s/it]

Epoch: 4, Training Loss:  0.1478714942932129


875it [4:01:56, 16.59s/it]


Accumulated Training Loss after Epoch 4 is: 113.25543975830078

Now validating on Epoch 4.
Validating on 504 data points (around 63 iterations)


0it [00:00, ?it/s]

Epoch: 4, Training Loss:  0.20653827488422394


63it [04:00,  3.82s/it]


Accumulated Validation Loss after Epoch 4 is: 12.515527725219727
Validation loss increased! Stopping training...
Last validation loss was: 12.381818771362305
Current validation loss is: 12.515527725219727
Done.


In [44]:
args_train = {
    # general
    'MODEL_NAME_PREFIX': 'bert-base-multilingual-cased',                                # so that model names don't get overwritten, write a UNIQUE name prefix to distinguish this current model
    'EPOCHS': 5,                                            # Make sure to set the EXACT amount of epochs for non-validating runs. Typically BERT-like models won't need to go over 4 epochs
    'LEARNING_RATE': 1e-05,
    'MODEL_NAME': 'bert-base-multilingual-cased',                      # model name as per hugging face library, currently supported model names are: roberta-large, roberta-base, bert-base-uncased, bert-base-multilingual-cased, xlm-roberta-base and xlm-roberta-large
    'DROPOUT_RATE': 0.3,
    'MODELS_DIR': './unimodal-baselines/',               # name of subdirectory within 'models' folder where the trained models will be saved, e.g., 'distilbert_base_threshold_0_4'
    'PATHS_TO_TRAIN': ['./testdata/s1_train.csv'],    # useful when training on more sets (e.g. train and dev), list of paths to the csv files
    'PATHS_TO_VALIDATION': ['./testdata/s1_val.csv'],                              # leave empty if no validation required

    # for training
    'TRAIN_BATCH_SIZE': 8,
    'TRAIN_SHUFFLE': True,
    'TRAIN_NUM_WORKERS': 0,
    'CURRENT_EPOCH': 0,
    'CURRENT_TRAIN_LOSS': 0,
    'TRAINING_LANGUAGES': ['en'], # full list is ['en', 'fr', 'ge', 'it', 'po', 'ru']
    'FREEZE': False,                                        # If True first 80% of the layers will be frozen during training

    # for validation
    'VALIDATION_BATCH_SIZE': 8,
    'VALIDATION_SHUFFLE': False,
    'VALIDATION_NUM_WORKERS': 0,
    'BEST_LOSS': float('inf'),
    'CURRENT_VALIDATION_LOSS': 0
}

model_file = run_training(args_train)

max length of tokens for < bert-base-multilingual-cased > is: < 512 >
TRAIN Dataset: (7000, 5)
TEST Dataset: (500, 5)

Now training on Epoch 0.
Testing on 6999 data points (around 875 iterations)


0it [00:00, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Epoch: 0, Training Loss:  0.6773495674133301


200it [08:57,  2.67s/it]

Epoch: 0, Training Loss:  0.28486016392707825


400it [18:08,  2.61s/it]

Epoch: 0, Training Loss:  0.2414165884256363


600it [26:55,  2.67s/it]

Epoch: 0, Training Loss:  0.27533861994743347


800it [35:55,  2.79s/it]

Epoch: 0, Training Loss:  0.2402072250843048


875it [39:31,  2.71s/it]


Accumulated Training Loss after Epoch 0 is: 231.0172576904297

Now validating on Epoch 0.
Validating on 504 data points (around 63 iterations)


1it [00:00,  1.54it/s]

Epoch: 0, Training Loss:  0.25061848759651184


63it [00:09,  6.37it/s]


Accumulated Validation Loss after Epoch 0 is: 14.706016540527344


 --- Currently saving model! --- 



 --- Currently saving model! --- 


Now training on Epoch 1.
Testing on 6999 data points (around 875 iterations)


0it [00:00, ?it/s]

Epoch: 1, Training Loss:  0.2030051052570343


200it [14:52,  4.47s/it]

Epoch: 1, Training Loss:  0.18518470227718353


400it [29:44,  4.47s/it]

Epoch: 1, Training Loss:  0.17730334401130676


600it [44:36,  4.47s/it]

Epoch: 1, Training Loss:  0.2838349938392639


800it [59:28,  4.47s/it]

Epoch: 1, Training Loss:  0.3091575801372528


875it [1:05:03,  4.46s/it]


Accumulated Training Loss after Epoch 1 is: 199.37339782714844

Now validating on Epoch 1.
Validating on 504 data points (around 63 iterations)


1it [00:01,  1.80s/it]

Epoch: 1, Training Loss:  0.2498093694448471


63it [00:05, 12.33it/s]


Accumulated Validation Loss after Epoch 1 is: 14.063506126403809


 --- Currently saving model! --- 



 --- Currently saving model! --- 


Now training on Epoch 2.
Testing on 6999 data points (around 875 iterations)


0it [00:00, ?it/s]

Epoch: 2, Training Loss:  0.19667458534240723


200it [16:13,  4.95s/it]

Epoch: 2, Training Loss:  0.2836587131023407


400it [32:46,  4.96s/it]

Epoch: 2, Training Loss:  0.2297101765871048


600it [48:53,  4.70s/it]

Epoch: 2, Training Loss:  0.17762181162834167


800it [1:04:27,  4.60s/it]

Epoch: 2, Training Loss:  0.18972259759902954


875it [1:10:12,  4.81s/it]


Accumulated Training Loss after Epoch 2 is: 187.03121948242188

Now validating on Epoch 2.
Validating on 504 data points (around 63 iterations)


1it [00:01,  1.83s/it]

Epoch: 2, Training Loss:  0.2492693066596985


63it [00:07,  8.48it/s]


Accumulated Validation Loss after Epoch 2 is: 14.015921592712402


 --- Currently saving model! --- 



 --- Currently saving model! --- 


Now training on Epoch 3.
Testing on 6999 data points (around 875 iterations)


0it [00:00, ?it/s]

Epoch: 3, Training Loss:  0.254861980676651


200it [15:38,  4.69s/it]

Epoch: 3, Training Loss:  0.14487753808498383


400it [31:32,  4.72s/it]

Epoch: 3, Training Loss:  0.12822268903255463


600it [46:48,  4.31s/it]

Epoch: 3, Training Loss:  0.17995351552963257


800it [1:01:25,  4.40s/it]

Epoch: 3, Training Loss:  0.155323788523674


875it [1:06:53,  4.59s/it]


Accumulated Training Loss after Epoch 3 is: 173.15943908691406

Now validating on Epoch 3.
Validating on 504 data points (around 63 iterations)


1it [00:01,  1.86s/it]

Epoch: 3, Training Loss:  0.2323458343744278


63it [00:07,  7.95it/s]


Accumulated Validation Loss after Epoch 3 is: 13.681135177612305


 --- Currently saving model! --- 



 --- Currently saving model! --- 


Now training on Epoch 4.
Testing on 6999 data points (around 875 iterations)


0it [00:00, ?it/s]

Epoch: 4, Training Loss:  0.1548755168914795


200it [11:57,  3.58s/it]

Epoch: 4, Training Loss:  0.21165232360363007


400it [23:46,  3.57s/it]

Epoch: 4, Training Loss:  0.11364316940307617


600it [35:34,  3.57s/it]

Epoch: 4, Training Loss:  0.11460655927658081


800it [47:51,  3.78s/it]

Epoch: 4, Training Loss:  0.15762251615524292


875it [52:32,  3.60s/it]


Accumulated Training Loss after Epoch 4 is: 159.1197052001953

Now validating on Epoch 4.
Validating on 504 data points (around 63 iterations)


1it [00:02,  2.02s/it]

Epoch: 4, Training Loss:  0.23882943391799927


63it [00:16,  3.83it/s]

Accumulated Validation Loss after Epoch 4 is: 13.922234535217285
Validation loss increased! Stopping training...
Last validation loss was: 13.681135177612305
Current validation loss is: 13.922234535217285
Done.





In [45]:
args_train = {
    # general
    'MODEL_NAME_PREFIX': 'roberta-large',                                # so that model names don't get overwritten, write a UNIQUE name prefix to distinguish this current model
    'EPOCHS': 5,                                            # Make sure to set the EXACT amount of epochs for non-validating runs. Typically BERT-like models won't need to go over 4 epochs
    'LEARNING_RATE': 1e-05,
    'MODEL_NAME': 'roberta-large',                      # model name as per hugging face library, currently supported model names are: roberta-large, roberta-base, bert-base-uncased, bert-base-multilingual-cased, xlm-roberta-base and xlm-roberta-large
    'DROPOUT_RATE': 0.3,
    'MODELS_DIR': './unimodal-baselines/',               # name of subdirectory within 'models' folder where the trained models will be saved, e.g., 'distilbert_base_threshold_0_4'
    'PATHS_TO_TRAIN': ['./testdata/s1_train.csv'],    # useful when training on more sets (e.g. train and dev), list of paths to the csv files
    'PATHS_TO_VALIDATION': ['./testdata/s1_val.csv'],                              # leave empty if no validation required

    # for training
    'TRAIN_BATCH_SIZE': 8,
    'TRAIN_SHUFFLE': True,
    'TRAIN_NUM_WORKERS': 0,
    'CURRENT_EPOCH': 0,
    'CURRENT_TRAIN_LOSS': 0,
    'TRAINING_LANGUAGES': ['en'], # full list is ['en', 'fr', 'ge', 'it', 'po', 'ru']
    'FREEZE': False,                                        # If True first 80% of the layers will be frozen during training

    # for validation
    'VALIDATION_BATCH_SIZE': 8,
    'VALIDATION_SHUFFLE': False,
    'VALIDATION_NUM_WORKERS': 0,
    'BEST_LOSS': float('inf'),
    'CURRENT_VALIDATION_LOSS': 0
}

model_file = run_training(args_train)

Downloading config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

Downloading vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

max length of tokens for < roberta-large > is: < 512 >
TRAIN Dataset: (7000, 5)
TEST Dataset: (500, 5)


Downloading model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Now training on Epoch 0.
Testing on 6999 data points (around 875 iterations)


0it [00:00, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Epoch: 0, Training Loss:  0.7196247577667236


200it [02:58,  1.12it/s]

Epoch: 0, Training Loss:  0.14794889092445374


400it [05:57,  1.12it/s]

Epoch: 0, Training Loss:  0.19413061439990997


600it [08:55,  1.12it/s]

Epoch: 0, Training Loss:  0.1944851577281952


800it [11:54,  1.12it/s]

Epoch: 0, Training Loss:  0.2094479501247406


875it [13:01,  1.12it/s]


Accumulated Training Loss after Epoch 0 is: 212.99249267578125

Now validating on Epoch 0.
Validating on 504 data points (around 63 iterations)


0it [00:00, ?it/s]

Epoch: 0, Training Loss:  0.2421095073223114


63it [00:17,  3.51it/s]


Accumulated Validation Loss after Epoch 0 is: 13.489632606506348


 --- Currently saving model! --- 



 --- Currently saving model! --- 


Now training on Epoch 1.
Testing on 6999 data points (around 875 iterations)


0it [00:00, ?it/s]

Epoch: 1, Training Loss:  0.20842787623405457


200it [01:40,  1.98it/s]

Epoch: 1, Training Loss:  0.23920315504074097


400it [03:21,  1.98it/s]

Epoch: 1, Training Loss:  0.18115803599357605


600it [05:02,  1.98it/s]

Epoch: 1, Training Loss:  0.20896288752555847


800it [06:44,  1.98it/s]

Epoch: 1, Training Loss:  0.13941295444965363


875it [07:21,  1.98it/s]


Accumulated Training Loss after Epoch 1 is: 173.28250122070312

Now validating on Epoch 1.
Validating on 504 data points (around 63 iterations)


1it [00:00,  4.31it/s]

Epoch: 1, Training Loss:  0.20948049426078796


63it [00:10,  6.10it/s]


Accumulated Validation Loss after Epoch 1 is: 12.475747108459473


 --- Currently saving model! --- 



 --- Currently saving model! --- 


Now training on Epoch 2.
Testing on 6999 data points (around 875 iterations)


0it [00:00, ?it/s]

Epoch: 2, Training Loss:  0.14728398621082306


200it [01:42,  1.98it/s]

Epoch: 2, Training Loss:  0.19809940457344055


400it [03:23,  1.98it/s]

Epoch: 2, Training Loss:  0.18450607359409332


600it [05:04,  1.98it/s]

Epoch: 2, Training Loss:  0.17762085795402527


800it [06:45,  1.98it/s]

Epoch: 2, Training Loss:  0.17555472254753113


875it [07:23,  1.97it/s]


Accumulated Training Loss after Epoch 2 is: 150.40342712402344

Now validating on Epoch 2.
Validating on 504 data points (around 63 iterations)


0it [00:00, ?it/s]

Epoch: 2, Training Loss:  0.20318101346492767


63it [00:10,  6.10it/s]


Accumulated Validation Loss after Epoch 2 is: 12.033329010009766


 --- Currently saving model! --- 



 --- Currently saving model! --- 


Now training on Epoch 3.
Testing on 6999 data points (around 875 iterations)


0it [00:00, ?it/s]

Epoch: 3, Training Loss:  0.18346114456653595


200it [01:41,  1.98it/s]

Epoch: 3, Training Loss:  0.1284378319978714


400it [03:22,  1.98it/s]

Epoch: 3, Training Loss:  0.17629924416542053


600it [05:03,  1.98it/s]

Epoch: 3, Training Loss:  0.16613200306892395


800it [06:44,  1.98it/s]

Epoch: 3, Training Loss:  0.1318967193365097


875it [07:22,  1.98it/s]


Accumulated Training Loss after Epoch 3 is: 127.09971618652344

Now validating on Epoch 3.
Validating on 504 data points (around 63 iterations)


0it [00:00, ?it/s]

Epoch: 3, Training Loss:  0.20633022487163544


63it [00:10,  6.10it/s]


Accumulated Validation Loss after Epoch 3 is: 12.021674156188965


 --- Currently saving model! --- 



 --- Currently saving model! --- 


Now training on Epoch 4.
Testing on 6999 data points (around 875 iterations)


0it [00:00, ?it/s]

Epoch: 4, Training Loss:  0.1886201947927475


200it [01:41,  1.98it/s]

Epoch: 4, Training Loss:  0.10580242425203323


400it [03:22,  1.98it/s]

Epoch: 4, Training Loss:  0.21124684810638428


600it [05:03,  1.98it/s]

Epoch: 4, Training Loss:  0.061535660177469254


800it [06:44,  1.98it/s]

Epoch: 4, Training Loss:  0.1605372130870819


875it [07:22,  1.98it/s]


Accumulated Training Loss after Epoch 4 is: 103.2136001586914

Now validating on Epoch 4.
Validating on 504 data points (around 63 iterations)


0it [00:00, ?it/s]

Epoch: 4, Training Loss:  0.18887023627758026


63it [00:10,  6.09it/s]


Accumulated Validation Loss after Epoch 4 is: 12.16552734375
Validation loss increased! Stopping training...
Last validation loss was: 12.021674156188965
Current validation loss is: 12.16552734375
Done.


In [46]:
args_train = {
    # general
    'MODEL_NAME_PREFIX': 'roberta-base',                                # so that model names don't get overwritten, write a UNIQUE name prefix to distinguish this current model
    'EPOCHS': 5,                                            # Make sure to set the EXACT amount of epochs for non-validating runs. Typically BERT-like models won't need to go over 4 epochs
    'LEARNING_RATE': 1e-05,
    'MODEL_NAME': 'roberta-base',                      # model name as per hugging face library, currently supported model names are: roberta-large, roberta-base, bert-base-uncased, bert-base-multilingual-cased, xlm-roberta-base and xlm-roberta-large
    'DROPOUT_RATE': 0.3,
    'MODELS_DIR': './unimodal-baselines/',               # name of subdirectory within 'models' folder where the trained models will be saved, e.g., 'distilbert_base_threshold_0_4'
    'PATHS_TO_TRAIN': ['./testdata/s1_train.csv'],    # useful when training on more sets (e.g. train and dev), list of paths to the csv files
    'PATHS_TO_VALIDATION': ['./testdata/s1_val.csv'],                              # leave empty if no validation required

    # for training
    'TRAIN_BATCH_SIZE': 8,
    'TRAIN_SHUFFLE': True,
    'TRAIN_NUM_WORKERS': 0,
    'CURRENT_EPOCH': 0,
    'CURRENT_TRAIN_LOSS': 0,
    'TRAINING_LANGUAGES': ['en'], # full list is ['en', 'fr', 'ge', 'it', 'po', 'ru']
    'FREEZE': False,                                        # If True first 80% of the layers will be frozen during training

    # for validation
    'VALIDATION_BATCH_SIZE': 8,
    'VALIDATION_SHUFFLE': False,
    'VALIDATION_NUM_WORKERS': 0,
    'BEST_LOSS': float('inf'),
    'CURRENT_VALIDATION_LOSS': 0
}

model_file = run_training(args_train)

Downloading config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

Downloading vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

max length of tokens for < roberta-base > is: < 512 >
TRAIN Dataset: (7000, 5)
TEST Dataset: (500, 5)


Downloading model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Now training on Epoch 0.
Testing on 6999 data points (around 875 iterations)


0it [00:00, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Epoch: 0, Training Loss:  0.7152462005615234


201it [00:32,  6.14it/s]

Epoch: 0, Training Loss:  0.2909088134765625


401it [01:04,  6.17it/s]

Epoch: 0, Training Loss:  0.2278163880109787


601it [01:37,  6.13it/s]

Epoch: 0, Training Loss:  0.27329012751579285


801it [02:09,  6.13it/s]

Epoch: 0, Training Loss:  0.15474048256874084


875it [02:22,  6.16it/s]


Accumulated Training Loss after Epoch 0 is: 225.4221954345703

Now validating on Epoch 0.
Validating on 504 data points (around 63 iterations)


1it [00:00,  9.32it/s]

Epoch: 0, Training Loss:  0.2550155222415924


63it [00:03, 19.02it/s]


Accumulated Validation Loss after Epoch 0 is: 14.2190523147583


 --- Currently saving model! --- 



 --- Currently saving model! --- 


Now training on Epoch 1.
Testing on 6999 data points (around 875 iterations)


1it [00:00,  2.44it/s]

Epoch: 1, Training Loss:  0.2324271947145462


201it [00:32,  6.13it/s]

Epoch: 1, Training Loss:  0.16518595814704895


401it [01:05,  6.13it/s]

Epoch: 1, Training Loss:  0.1875995248556137


601it [01:38,  6.13it/s]

Epoch: 1, Training Loss:  0.28147995471954346


801it [02:10,  6.09it/s]

Epoch: 1, Training Loss:  0.1673932522535324


875it [02:22,  6.12it/s]


Accumulated Training Loss after Epoch 1 is: 186.32395935058594

Now validating on Epoch 1.
Validating on 504 data points (around 63 iterations)


3it [00:00, 21.30it/s]

Epoch: 1, Training Loss:  0.24053393304347992


63it [00:03, 19.10it/s]


Accumulated Validation Loss after Epoch 1 is: 13.328704833984375


 --- Currently saving model! --- 



 --- Currently saving model! --- 


Now training on Epoch 2.
Testing on 6999 data points (around 875 iterations)


1it [00:00,  2.54it/s]

Epoch: 2, Training Loss:  0.24704541265964508


201it [00:33,  6.13it/s]

Epoch: 2, Training Loss:  0.21066510677337646


401it [01:05,  6.12it/s]

Epoch: 2, Training Loss:  0.15657512843608856


601it [01:38,  6.11it/s]

Epoch: 2, Training Loss:  0.2531723976135254


801it [02:11,  6.12it/s]

Epoch: 2, Training Loss:  0.22788824141025543


875it [02:23,  6.11it/s]


Accumulated Training Loss after Epoch 2 is: 167.74563598632812

Now validating on Epoch 2.
Validating on 504 data points (around 63 iterations)


3it [00:00, 21.45it/s]

Epoch: 2, Training Loss:  0.24051441252231598


63it [00:03, 19.16it/s]


Accumulated Validation Loss after Epoch 2 is: 12.881023406982422


 --- Currently saving model! --- 



 --- Currently saving model! --- 


Now training on Epoch 3.
Testing on 6999 data points (around 875 iterations)


1it [00:00,  3.02it/s]

Epoch: 3, Training Loss:  0.1890919804573059


201it [00:32,  6.12it/s]

Epoch: 3, Training Loss:  0.16152901947498322


401it [01:05,  6.12it/s]

Epoch: 3, Training Loss:  0.1279347985982895


601it [01:38,  6.13it/s]

Epoch: 3, Training Loss:  0.13523069024085999


801it [02:10,  6.12it/s]

Epoch: 3, Training Loss:  0.21843478083610535


875it [02:22,  6.12it/s]


Accumulated Training Loss after Epoch 3 is: 152.10427856445312

Now validating on Epoch 3.
Validating on 504 data points (around 63 iterations)


3it [00:00, 21.74it/s]

Epoch: 3, Training Loss:  0.23517917096614838


63it [00:03, 19.16it/s]


Accumulated Validation Loss after Epoch 3 is: 12.661491394042969


 --- Currently saving model! --- 



 --- Currently saving model! --- 


Now training on Epoch 4.
Testing on 6999 data points (around 875 iterations)


1it [00:00,  2.46it/s]

Epoch: 4, Training Loss:  0.12344952672719955


201it [00:33,  6.05it/s]

Epoch: 4, Training Loss:  0.15436004102230072


401it [01:06,  6.07it/s]

Epoch: 4, Training Loss:  0.09720074385404587


601it [01:38,  6.12it/s]

Epoch: 4, Training Loss:  0.09447702765464783


801it [02:11,  6.11it/s]

Epoch: 4, Training Loss:  0.13167890906333923


875it [02:23,  6.09it/s]


Accumulated Training Loss after Epoch 4 is: 136.10641479492188

Now validating on Epoch 4.
Validating on 504 data points (around 63 iterations)


3it [00:00, 21.30it/s]

Epoch: 4, Training Loss:  0.23383788764476776


63it [00:03, 19.12it/s]

Accumulated Validation Loss after Epoch 4 is: 12.972590446472168
Validation loss increased! Stopping training...
Last validation loss was: 12.661491394042969
Current validation loss is: 12.972590446472168
Done.





In [10]:
args_train = {
    # general
    'MODEL_NAME_PREFIX': 'bert-base-uncased',                                # so that model names don't get overwritten, write a UNIQUE name prefix to distinguish this current model
    'EPOCHS': 1,                                            # Make sure to set the EXACT amount of epochs for non-validating runs. Typically BERT-like models won't need to go over 4 epochs
    'LEARNING_RATE': 1e-05,
    'MODEL_NAME': 'bert-base-uncased',                      # model name as per hugging face library, currently supported model names are: roberta-large, roberta-base, bert-base-uncased, bert-base-multilingual-cased, xlm-roberta-base and xlm-roberta-large
    'DROPOUT_RATE': 0.3,
    'MODELS_DIR': './unimodal-baselines/',               # name of subdirectory within 'models' folder where the trained models will be saved, e.g., 'distilbert_base_threshold_0_4'
    'PATHS_TO_TRAIN': ['./testdata/s1_train.csv'],    # useful when training on more sets (e.g. train and dev), list of paths to the csv files
    'PATHS_TO_VALIDATION': ['./testdata/s1_val.csv'],                              # leave empty if no validation required

    # for training
    'TRAIN_BATCH_SIZE': 8,
    'TRAIN_SHUFFLE': True,
    'TRAIN_NUM_WORKERS': 0,
    'CURRENT_EPOCH': 0,
    'CURRENT_TRAIN_LOSS': 0,
    'TRAINING_LANGUAGES': ['en'], # full list is ['en', 'fr', 'ge', 'it', 'po', 'ru']
    'FREEZE': False,                                        # If True first 80% of the layers will be frozen during training

    # for validation
    'VALIDATION_BATCH_SIZE': 8,
    'VALIDATION_SHUFFLE': False,
    'VALIDATION_NUM_WORKERS': 0,
    'BEST_LOSS': float('inf'),
    'CURRENT_VALIDATION_LOSS': 0
}

model_file = run_training(args_train)

max length of tokens for < bert-base-uncased > is: < 512 >
TRAIN Dataset: (7000, 5)
TEST Dataset: (500, 5)

Now training on Epoch 0.
Testing on 6999 data points (around 875 iterations)


0it [00:00, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Epoch: 0, Training Loss:  0.6837515830993652


200it [08:00,  2.44s/it]

Epoch: 0, Training Loss:  0.2346428781747818


206it [08:17,  2.42s/it]


KeyboardInterrupt: 

In [48]:
args_train = {
    # general
    'MODEL_NAME_PREFIX': 'bert-base-multilingual-cased',                                # so that model names don't get overwritten, write a UNIQUE name prefix to distinguish this current model
    'EPOCHS': 5,                                            # Make sure to set the EXACT amount of epochs for non-validating runs. Typically BERT-like models won't need to go over 4 epochs
    'LEARNING_RATE': 1e-05,
    'MODEL_NAME': 'bert-base-multilingual-cased',                      # model name as per hugging face library, currently supported model names are: roberta-large, roberta-base, bert-base-uncased, bert-base-multilingual-cased, xlm-roberta-base and xlm-roberta-large
    'DROPOUT_RATE': 0.3,
    'MODELS_DIR': './unimodal-baselines/',               # name of subdirectory within 'models' folder where the trained models will be saved, e.g., 'distilbert_base_threshold_0_4'
    'PATHS_TO_TRAIN': ['./testdata/s1_train.csv'],    # useful when training on more sets (e.g. train and dev), list of paths to the csv files
    'PATHS_TO_VALIDATION': ['./testdata/s1_val.csv'],                              # leave empty if no validation required

    # for training
    'TRAIN_BATCH_SIZE': 8,
    'TRAIN_SHUFFLE': True,
    'TRAIN_NUM_WORKERS': 0,
    'CURRENT_EPOCH': 0,
    'CURRENT_TRAIN_LOSS': 0,
    'TRAINING_LANGUAGES': ['en'], # full list is ['en', 'fr', 'ge', 'it', 'po', 'ru']
    'FREEZE': False,                                        # If True first 80% of the layers will be frozen during training

    # for validation
    'VALIDATION_BATCH_SIZE': 8,
    'VALIDATION_SHUFFLE': False,
    'VALIDATION_NUM_WORKERS': 0,
    'BEST_LOSS': float('inf'),
    'CURRENT_VALIDATION_LOSS': 0
}

model_file = run_training(args_train)

max length of tokens for < bert-base-multilingual-cased > is: < 512 >
TRAIN Dataset: (7000, 5)
TEST Dataset: (500, 5)

Now training on Epoch 0.
Testing on 6999 data points (around 875 iterations)


0it [00:00, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Epoch: 0, Training Loss:  0.6666839718818665

1it [00:00,  2.95it/s]




201it [00:33,  5.94it/s]

Epoch: 0, Training Loss:  0.23557336628437042


401it [01:07,  5.92it/s]

Epoch: 0, Training Loss:  0.2597467601299286


601it [01:41,  5.91it/s]

Epoch: 0, Training Loss:  0.256039559841156


801it [02:15,  5.89it/s]

Epoch: 0, Training Loss:  0.26465317606925964


875it [02:27,  5.93it/s]


Accumulated Training Loss after Epoch 0 is: 228.52093505859375

Now validating on Epoch 0.
Validating on 504 data points (around 63 iterations)


3it [00:00, 21.66it/s]

Epoch: 0, Training Loss:  0.24523764848709106


63it [00:03, 18.58it/s]


Accumulated Validation Loss after Epoch 0 is: 14.681147575378418


 --- Currently saving model! --- 



 --- Currently saving model! --- 


Now training on Epoch 1.
Testing on 6999 data points (around 875 iterations)


1it [00:00,  2.28it/s]

Epoch: 1, Training Loss:  0.261654794216156


201it [00:34,  5.89it/s]

Epoch: 1, Training Loss:  0.2212589681148529


401it [01:08,  5.89it/s]

Epoch: 1, Training Loss:  0.22010673582553864


601it [01:42,  5.88it/s]

Epoch: 1, Training Loss:  0.24758169054985046


801it [02:16,  5.90it/s]

Epoch: 1, Training Loss:  0.1730608195066452


875it [02:28,  5.89it/s]


Accumulated Training Loss after Epoch 1 is: 196.7558135986328

Now validating on Epoch 1.
Validating on 504 data points (around 63 iterations)


3it [00:00, 20.88it/s]

Epoch: 1, Training Loss:  0.2491392344236374


63it [00:03, 18.49it/s]


Accumulated Validation Loss after Epoch 1 is: 13.986952781677246


 --- Currently saving model! --- 



 --- Currently saving model! --- 


Now training on Epoch 2.
Testing on 6999 data points (around 875 iterations)


1it [00:00,  2.67it/s]

Epoch: 2, Training Loss:  0.21785090863704681


201it [00:34,  5.90it/s]

Epoch: 2, Training Loss:  0.1267060935497284


401it [01:08,  5.88it/s]

Epoch: 2, Training Loss:  0.18764036893844604


601it [01:42,  5.88it/s]

Epoch: 2, Training Loss:  0.1653461754322052


801it [02:16,  5.86it/s]

Epoch: 2, Training Loss:  0.1524803787469864


875it [02:28,  5.88it/s]


Accumulated Training Loss after Epoch 2 is: 183.50254821777344

Now validating on Epoch 2.
Validating on 504 data points (around 63 iterations)


3it [00:00, 20.74it/s]

Epoch: 2, Training Loss:  0.25220245122909546


63it [00:03, 18.48it/s]


Accumulated Validation Loss after Epoch 2 is: 13.564451217651367


 --- Currently saving model! --- 



 --- Currently saving model! --- 


Now training on Epoch 3.
Testing on 6999 data points (around 875 iterations)


1it [00:00,  2.30it/s]

Epoch: 3, Training Loss:  0.15715853869915009


201it [00:34,  5.92it/s]

Epoch: 3, Training Loss:  0.18217726051807404


401it [01:07,  5.91it/s]

Epoch: 3, Training Loss:  0.21207435429096222


601it [01:41,  5.92it/s]

Epoch: 3, Training Loss:  0.19616058468818665


801it [02:15,  5.91it/s]

Epoch: 3, Training Loss:  0.13681496679782867


875it [02:27,  5.92it/s]


Accumulated Training Loss after Epoch 3 is: 168.31837463378906

Now validating on Epoch 3.
Validating on 504 data points (around 63 iterations)


3it [00:00, 20.55it/s]

Epoch: 3, Training Loss:  0.2412455677986145


63it [00:03, 18.66it/s]


Accumulated Validation Loss after Epoch 3 is: 13.5440092086792


 --- Currently saving model! --- 



 --- Currently saving model! --- 


Now training on Epoch 4.
Testing on 6999 data points (around 875 iterations)


1it [00:00,  2.68it/s]

Epoch: 4, Training Loss:  0.16728709638118744


201it [00:34,  5.92it/s]

Epoch: 4, Training Loss:  0.1010003387928009


401it [01:07,  5.92it/s]

Epoch: 4, Training Loss:  0.2247316986322403


601it [01:41,  5.90it/s]

Epoch: 4, Training Loss:  0.2190481722354889


801it [02:15,  5.92it/s]

Epoch: 4, Training Loss:  0.1160217821598053


875it [02:27,  5.92it/s]


Accumulated Training Loss after Epoch 4 is: 153.6635284423828

Now validating on Epoch 4.
Validating on 504 data points (around 63 iterations)


3it [00:00, 20.54it/s]

Epoch: 4, Training Loss:  0.2382974624633789


63it [00:03, 18.65it/s]


Accumulated Validation Loss after Epoch 4 is: 13.570302963256836
Validation loss increased! Stopping training...
Last validation loss was: 13.5440092086792
Current validation loss is: 13.570302963256836
Done.


In [49]:
args_train = {
    # general
    'MODEL_NAME_PREFIX': 'xlm-roberta-base',                                # so that model names don't get overwritten, write a UNIQUE name prefix to distinguish this current model
    'EPOCHS': 5,                                            # Make sure to set the EXACT amount of epochs for non-validating runs. Typically BERT-like models won't need to go over 4 epochs
    'LEARNING_RATE': 1e-05,
    'MODEL_NAME': 'xlm-roberta-base',                      # model name as per hugging face library, currently supported model names are: roberta-large, roberta-base, bert-base-uncased, bert-base-multilingual-cased, xlm-roberta-base and xlm-roberta-large
    'DROPOUT_RATE': 0.3,
    'MODELS_DIR': './unimodal-baselines/',               # name of subdirectory within 'models' folder where the trained models will be saved, e.g., 'distilbert_base_threshold_0_4'
    'PATHS_TO_TRAIN': ['./testdata/s1_train.csv'],    # useful when training on more sets (e.g. train and dev), list of paths to the csv files
    'PATHS_TO_VALIDATION': ['./testdata/s1_val.csv'],                              # leave empty if no validation required

    # for training
    'TRAIN_BATCH_SIZE': 8,
    'TRAIN_SHUFFLE': True,
    'TRAIN_NUM_WORKERS': 0,
    'CURRENT_EPOCH': 0,
    'CURRENT_TRAIN_LOSS': 0,
    'TRAINING_LANGUAGES': ['en'], # full list is ['en', 'fr', 'ge', 'it', 'po', 'ru']
    'FREEZE': False,                                        # If True first 80% of the layers will be frozen during training

    # for validation
    'VALIDATION_BATCH_SIZE': 8,
    'VALIDATION_SHUFFLE': False,
    'VALIDATION_NUM_WORKERS': 0,
    'BEST_LOSS': float('inf'),
    'CURRENT_VALIDATION_LOSS': 0
}

model_file = run_training(args_train)

Downloading config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

Downloading (…)tencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

max length of tokens for < xlm-roberta-base > is: < 512 >
TRAIN Dataset: (7000, 5)
TEST Dataset: (500, 5)


Downloading model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]


Now training on Epoch 0.
Testing on 6999 data points (around 875 iterations)


0it [00:00, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
1it [00:00,  5.10it/s]

Epoch: 0, Training Loss:  0.7227253913879395


201it [00:35,  5.60it/s]

Epoch: 0, Training Loss:  0.23435835540294647


401it [01:11,  5.57it/s]

Epoch: 0, Training Loss:  0.2802368104457855


601it [01:47,  5.56it/s]

Epoch: 0, Training Loss:  0.2693515717983246


801it [02:23,  5.56it/s]

Epoch: 0, Training Loss:  0.2652474045753479


875it [02:36,  5.59it/s]


Accumulated Training Loss after Epoch 0 is: 236.19778442382812

Now validating on Epoch 0.
Validating on 504 data points (around 63 iterations)


3it [00:00, 21.15it/s]

Epoch: 0, Training Loss:  0.2521989345550537


63it [00:03, 18.87it/s]


Accumulated Validation Loss after Epoch 0 is: 14.92094898223877


 --- Currently saving model! --- 



 --- Currently saving model! --- 


Now training on Epoch 1.
Testing on 6999 data points (around 875 iterations)


1it [00:00,  1.98it/s]

Epoch: 1, Training Loss:  0.22549903392791748


201it [00:36,  5.55it/s]

Epoch: 1, Training Loss:  0.1729382574558258


401it [01:12,  5.54it/s]

Epoch: 1, Training Loss:  0.20894300937652588


601it [01:48,  5.58it/s]

Epoch: 1, Training Loss:  0.20800527930259705


801it [02:24,  5.55it/s]

Epoch: 1, Training Loss:  0.2513335943222046


875it [02:37,  5.55it/s]


Accumulated Training Loss after Epoch 1 is: 203.90008544921875

Now validating on Epoch 1.
Validating on 504 data points (around 63 iterations)


3it [00:00, 20.75it/s]

Epoch: 1, Training Loss:  0.25143009424209595


63it [00:03, 18.86it/s]


Accumulated Validation Loss after Epoch 1 is: 14.571717262268066


 --- Currently saving model! --- 



 --- Currently saving model! --- 


Now training on Epoch 2.
Testing on 6999 data points (around 875 iterations)


1it [00:00,  1.98it/s]

Epoch: 2, Training Loss:  0.22406335175037384


201it [00:36,  5.57it/s]

Epoch: 2, Training Loss:  0.23353950679302216


401it [01:12,  5.56it/s]

Epoch: 2, Training Loss:  0.26549744606018066


601it [01:48,  5.38it/s]

Epoch: 2, Training Loss:  0.21081149578094482


801it [02:25,  5.35it/s]

Epoch: 2, Training Loss:  0.14100949466228485


875it [02:38,  5.51it/s]


Accumulated Training Loss after Epoch 2 is: 199.7682342529297

Now validating on Epoch 2.
Validating on 504 data points (around 63 iterations)


2it [00:00, 19.53it/s]

Epoch: 2, Training Loss:  0.255021333694458


63it [00:03, 18.88it/s]


Accumulated Validation Loss after Epoch 2 is: 14.353126525878906


 --- Currently saving model! --- 



 --- Currently saving model! --- 


Now training on Epoch 3.
Testing on 6999 data points (around 875 iterations)


1it [00:00,  2.11it/s]

Epoch: 3, Training Loss:  0.1511322259902954


201it [00:36,  5.27it/s]

Epoch: 3, Training Loss:  0.23527321219444275


401it [01:12,  5.60it/s]

Epoch: 3, Training Loss:  0.2530156672000885


601it [01:48,  5.62it/s]

Epoch: 3, Training Loss:  0.19035103917121887


801it [02:23,  5.61it/s]

Epoch: 3, Training Loss:  0.24796320497989655


875it [02:36,  5.58it/s]


Accumulated Training Loss after Epoch 3 is: 192.7353973388672

Now validating on Epoch 3.
Validating on 504 data points (around 63 iterations)


3it [00:00, 20.67it/s]

Epoch: 3, Training Loss:  0.25359097123146057


63it [00:03, 19.11it/s]


Accumulated Validation Loss after Epoch 3 is: 13.806134223937988


 --- Currently saving model! --- 



 --- Currently saving model! --- 


Now training on Epoch 4.
Testing on 6999 data points (around 875 iterations)


1it [00:00,  2.12it/s]

Epoch: 4, Training Loss:  0.17935985326766968


201it [00:35,  5.63it/s]

Epoch: 4, Training Loss:  0.20921297371387482


401it [01:11,  5.61it/s]

Epoch: 4, Training Loss:  0.19051088392734528


601it [01:47,  5.61it/s]

Epoch: 4, Training Loss:  0.1933344006538391


801it [02:22,  5.61it/s]

Epoch: 4, Training Loss:  0.25753507018089294


875it [02:35,  5.62it/s]


Accumulated Training Loss after Epoch 4 is: 180.2266082763672

Now validating on Epoch 4.
Validating on 504 data points (around 63 iterations)


3it [00:00, 20.70it/s]

Epoch: 4, Training Loss:  0.23572345077991486


63it [00:03, 19.12it/s]


Accumulated Validation Loss after Epoch 4 is: 13.673762321472168


 --- Currently saving model! --- 



 --- Currently saving model! --- 

Done.


# Evaluation

In [35]:
def test(model, testing_loader, only_test):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model.eval()
    fin_targets = []
    fin_outputs = []
    with torch.no_grad():
        for _, data in tqdm(enumerate(testing_loader, 0)):
            ids = data['ids'].to(device, dtype=torch.long)
            mask = data['mask'].to(device, dtype=torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype=torch.long)

            # if there are no targets available
            if not only_test:
                targets = data['targets'].to(device, dtype=torch.float)
                fin_targets.extend(targets.cpu().detach().numpy().tolist())

            outputs = model(ids, mask, token_type_ids)
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())
    return fin_outputs, fin_targets


def run_tests(model, testing_loader, evaluation_threshold, only_test, lang, results_list):
    outputs, targets = test(model, testing_loader, only_test)
    outputs = np.array(outputs) >= evaluation_threshold

    if targets:
        accuracy = metrics.accuracy_score(targets, outputs)
        f1_score_micro = metrics.f1_score(targets, outputs, average='micro')
        f1_score_macro = metrics.f1_score(targets, outputs, average='macro')
        print(f"Accuracy Score = {accuracy}")
        print(f"F1 Score (Micro) = {f1_score_micro}")
        print(f"F1 Score (Macro) = {f1_score_macro}")
        results_list.append([lang, accuracy, f1_score_micro, f1_score_macro])
        print(results_list)

    return outputs, results_list

def run_eval(args):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = torch.load(args["MODEL_FILE_NAME"])
    model.to(device)

    test_df, mappings = load_dataframe(args)
    language_counts = 1 # hard code to one since just testing on English
    language = args['LANGUAGES']

    results_list = []
    
    testing_loader, filtered_test_df = create_dataloader(args, test_df, args['LANGUAGES'], language_counts)

    outputs, results_list = run_tests(model, testing_loader, args['EVALUATION_THRESHOLD'], args['ONLY_TEST'], language, results_list)

    #path = f'results/{args["RESULT_DIR_NAME"]}/{language}.txt'

    df_results = pd.DataFrame(results_list, columns=['language', 'accuracy', 'F1_micro', 'F1_macro'])
    df_results['model'] = args['MODEL_TITLE']
    return df_results


In [37]:
#models names: roberta-large, roberta-base, bert-base-uncased, bert-base-multilingual-cased, xlm-roberta-base and xlm-roberta-large

args_eval = {
    'MODEL_FILE_NAME': path, # path to saved model
    'RESULT_DIR_NAME': '',                                  # name of subdirectory within 'results' folder where to store the txt result files (for submission), e.g., 'distilbert_base_threshold_0_4'
    'EVALUATION_THRESHOLD': 0.5,                            # select the confidence threshold for when to assign a label. Default is 0.5
    'MODEL_NAME': 'bert-base-uncased',
    'MODEL_TITLE': 'title',
    'ONLY_TEST': False,                                      # set to `True` ONLY IF running on test set (not dev set or anything else)
    'EVALUATION_SET': './testdata/s1_val.csv',       # path to csv with evaluation set
    'LANGUAGES': ['en'],    # select which languages to evaluate, packaged in a list. # e.g. ['en', 'fr', 'ge', 'it', 'po', 'ru', 'ka', 'gr', 'es'],

    # test params
    'BATCH_SIZE': 32,
    'SHUFFLE': False,  
    'NUM_WORKERS': 0
}

#run_eval(args_eval)

In [39]:
path = r'change this to where the models are saved'
models_path = os.listdir(r'change this to where the models are saved')

models = [i for i in models_path if 'weights' not in i]
print(models)

['bert_base.pth', 'mbert.pth', 'roberta_base.pth', 'roberta_large.pth', 'xlm_roberta_base.pth', 'xlm_roberta_large.pth']


In [42]:
results_df = pd.DataFrame()

for i in models:
    args_eval['MODEL_FILE_NAME'] = os.path.join(path, i)
    args_eval['MODEL_TITLE'] = i.split('.')[0]
    results = run_eval(args_eval)
    results_df = pd.concat([results_df, results])
    results_df

Encoding techniques into one-hot-encoded lists!

max length of tokens for < 512 > is: < 512 >


0it [00:00, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
16it [00:03,  4.64it/s]


Accuracy Score = 0.214
F1 Score (Micro) = 0.291015625
F1 Score (Macro) = 0.10118576656955787
[[['en'], 0.214, 0.291015625, 0.10118576656955787]]
Encoding techniques into one-hot-encoded lists!

max length of tokens for < 512 > is: < 512 >


0it [00:00, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
16it [00:03,  4.68it/s]


Accuracy Score = 0.21
F1 Score (Micro) = 0.1218961625282167
F1 Score (Macro) = 0.028919578560726886
[[['en'], 0.21, 0.1218961625282167, 0.028919578560726886]]
Encoding techniques into one-hot-encoded lists!

max length of tokens for < 512 > is: < 512 >


0it [00:00, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
16it [00:03,  4.82it/s]


Accuracy Score = 0.136
F1 Score (Micro) = 0.15894039735099338
F1 Score (Macro) = 0.03899035700807065
[[['en'], 0.136, 0.15894039735099338, 0.03899035700807065]]
Encoding techniques into one-hot-encoded lists!

max length of tokens for < 512 > is: < 512 >


0it [00:00, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
16it [00:10,  1.59it/s]


Accuracy Score = 0.116
F1 Score (Micro) = 0.13050847457627118
F1 Score (Macro) = 0.033793371329977485
[[['en'], 0.116, 0.13050847457627118, 0.033793371329977485]]
Encoding techniques into one-hot-encoded lists!

max length of tokens for < 512 > is: < 512 >


0it [00:00, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
16it [00:03,  4.92it/s]


Accuracy Score = 0.2
F1 Score (Micro) = 0.0731995277449823
F1 Score (Macro) = 0.026169902367859946
[[['en'], 0.2, 0.0731995277449823, 0.026169902367859946]]
Encoding techniques into one-hot-encoded lists!

max length of tokens for < 512 > is: < 512 >


0it [00:00, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
16it [00:03,  4.89it/s]

Accuracy Score = 0.188
F1 Score (Micro) = 0.010217113665389528
F1 Score (Macro) = 0.0029651593773165306
[[['en'], 0.188, 0.010217113665389528, 0.0029651593773165306]]





In [44]:
results_df

Unnamed: 0,language,accuracy,F1_micro,F1_macro,model
0,[en],0.214,0.291016,0.101186,bert_base
0,[en],0.21,0.121896,0.02892,mbert
0,[en],0.136,0.15894,0.03899,roberta_base
0,[en],0.116,0.130508,0.033793,roberta_large
0,[en],0.2,0.0732,0.02617,xlm_roberta_base
0,[en],0.188,0.010217,0.002965,xlm_roberta_large
