# Test "Supervised" BERT Models

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
import loader
from const import ExpDataset

import os
import json
from tqdm import tqdm
import numpy as np
import pandas as pd
from train_common import TAU
from collections import defaultdict

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# device selection: you can choose gpu 0 with cuda:0 and gpu 1 with cuda:1
device = "cuda:1" if torch.cuda.is_available() else "cpu"

## Test functions
The following snippet contains all the functions that, given a model and a set of (or a single) data loaders, will return the labels and predictions.

In [4]:
def run_test_multi_label_per_document(model, data_loaders):

    model.to(device)
    model.eval()

    docs = {}

    for doc_name, data_loader in tqdm(data_loaders.items()):
        all_labels = []
        all_preds = []

        with torch.no_grad():
            for batch in data_loader:

                input_ids = torch.as_tensor(batch["input_ids"]).to(device)
                attn_mask = torch.as_tensor(batch["attention_mask"]).to(device)
                labels = torch.as_tensor(batch["labels"]).to(device)
                all_labels.extend(labels.cpu().numpy())
                out = model(input_ids=input_ids, attention_mask=attn_mask)
                probs = out.logits.sigmoid()
                preds = torch.where(probs > TAU, 1.0, 0.0)
                all_preds.extend(preds.cpu().numpy())

        all_labels = np.clip(np.sum(all_labels, axis=0, dtype=int), 0, 1).reshape(1, -1)
        all_preds = np.clip(np.sum(all_preds, axis=0, dtype=int), 0, 1).reshape(1, -1)

        docs[doc_name] = {"labels": all_labels, "preds": all_preds}

    return docs


def run_test_multi_label_per_sentence(model, data_loader):
    model.to(device)
    model.eval()

    all_labels = []
    all_preds = []

    with torch.no_grad():
        for batch in tqdm(data_loader):

            input_ids = torch.as_tensor(batch["input_ids"]).to(device)
            attn_mask = torch.as_tensor(batch["attention_mask"]).to(device)
            labels = torch.as_tensor(batch["labels"]).to(device)
            all_labels.extend(labels.cpu().numpy())
            out = model(input_ids=input_ids, attention_mask=attn_mask)
            probs = out.logits.sigmoid()
            preds = torch.where(probs > TAU, 1.0, 0.0)
            all_preds.extend(preds.cpu().numpy())

    docs = {"labels": all_labels, "preds": all_preds}

    return docs


def run_test_single_label_per_sentence(model, data_loader):
    raise NotImplementedError


def run_test_single_label_per_document(model, data_loaders):
    raise NotImplementedError


# def run_test_single_label_per_document(model, data_loaders):
#     model.to(device)
#     model.eval()
#     docs = {}

#     for doc_name, data_loader in tqdm(data_loaders):
#         all_labels = []
#         all_preds = []

#         with torch.no_grad():
#             for batch in data_loader:

#                 input_ids = torch.as_tensor(batch["input_ids"]).to(device)
#                 attn_mask = torch.as_tensor(batch["attention_mask"]).to(device)
#                 labels = torch.as_tensor(batch["labels"]).to(device)
#                 all_labels.extend(labels.cpu().numpy())
#                 out = model(input_ids=input_ids, attention_mask=attn_mask)
#                 probs = torch.softmax(out.logits, dim=1)
#                 max_indices = torch.argmax(probs, dim=1, keepdim=True)
#                 preds = torch.zeros_like(probs)
#                 # Scatter 1's at the max indices
#                 preds.scatter_(1, max_indices, 1)
#                 all_preds.extend(preds.cpu().numpy())

#         all_labels = np.clip(np.sum(all_labels, axis=0, dtype=int), 0, 1).reshape(1, -1)
#         all_preds = np.clip(np.sum(all_preds, axis=0, dtype=int), 0, 1).reshape(1, -1)

#         docs[doc_name] = {
#             "labels": all_labels,
#             "preds": all_preds
#         }

#     return docs


# def run_test_single_label_per_sentence(model, data_loader):
#     model.to(device)
#     model.eval()

#     all_labels = []
#     all_preds = []

#     with torch.no_grad():
#         for batch in tqdm(data_loader):

#             input_ids = torch.as_tensor(batch["input_ids"]).to(device)
#             attn_mask = torch.as_tensor(batch["attention_mask"]).to(device)
#             labels = torch.as_tensor(batch["labels"]).to(device)
#             all_labels.extend(labels.cpu().numpy())
#             out = model(input_ids=input_ids, attention_mask=attn_mask)
#             probs = torch.softmax(out.logits, dim=1)
#             max_indices = torch.argmax(probs, dim=1, keepdim=True)
#             preds = torch.zeros_like(probs)
#             # Scatter 1's at the max indices
#             preds.scatter_(1, max_indices, 1)
#             all_preds.extend(preds.cpu().numpy())

#     docs = {
#         "all": {
#             "labels": all_labels,
#             "preds": all_preds
#         }
#     }

#     return docs

## Test on single experimental setup
Each setup is saved inside a folder named `"fine_tuned/{problem_type}/{conf\_id}\_{dataset}\_{model\_name}"`.
Select the folder by its name and run the tests!

In [5]:
from test_common import calc_results_per_document, calc_results_per_sentence


def run_test(model_dir, model_name, dataset_name, per_document=True):
    model, tokenizer = loader.load_finetuned_model(model_dir)
    _, _, data_loaders = loader.load_datasets(
        dataset_name, 16, tokenizer, per_document=per_document
    )
    is_single_label = None
    if (
        dataset_name == ExpDataset.BOSCH_TECHNIQUES_SL.value
        or dataset_name == ExpDataset.TRAM_TECHNIQUES_SL.value
    ):
        is_single_label = True
    else:
        is_single_label = False

    test_f = None
    if per_document and is_single_label:
        test_f = run_test_single_label_per_document
    elif per_document and not is_single_label:
        test_f = run_test_multi_label_per_document
    elif not per_document and is_single_label:
        test_f = run_test_single_label_per_sentence
    else:
        test_f = run_test_multi_label_per_sentence

    print("Chosen experimental setup:")
    print(
        f"\tdataset_name={dataset_name}\n\tmodel_name={model_name}\n\tis_single_label={is_single_label}\n\tper_document={per_document}"
    )
    print(f"\ttest_function={test_f}")

    results = test_f(model, data_loaders)

    # data loader contains the dataset labels as they are presented to the model when training
    # we can either load them from the constants, or just retrieve from the data loader
    # target_names = data_loaders[list(data_loaders.keys())[0]].dataset.labels

    if per_document:
        out_df = calc_results_per_document(results, model.config.id2label)
    else:
        out_df = calc_results_per_sentence(results)

    return out_df

## Run a single test

### Per document

In [6]:
model_dir = "fine_tuned/tram_swipe/28_tram_FacebookAI-roberta-large"
model_name = "FacebookAI/roberta-large"
dataset_name = "tram"

out_df = run_test(model_dir, model_name, dataset_name)
print(f"Mean F1-score is: {round(out_df.f1.mean(), 4)*100}")

Chosen experimental setup:
	dataset_name=tram
	model_name=FacebookAI/roberta-large
	is_single_label=False
	per_document=True
	test_function=<function run_test_multi_label_per_document at 0x7feb7e6ea050>


100%|██████████| 31/31 [02:47<00:00,  5.40s/it]


Mean F1-score is: 70.55


### Per sentence

In [8]:
out_df = run_test(model_dir, model_name, dataset_name, per_document=False)
out_df

Chosen experimental setup:
	dataset_name=tram
	model_name=FacebookAI/roberta-large
	is_single_label=False
	per_document=False
	test_function=<function run_test_multi_label_per_sentence at 0x7feb7e6e9090>


100%|██████████| 239/239 [02:45<00:00,  1.44it/s]


Unnamed: 0,doc_title,micro_f1,macro_f1,samples_f1,weighted_f1,micro_precision,macro_precision,samples_precision,weighted_precision,micro_recall,macro_recall,samples_recall,weighted_recall,accuracy
0,per_sentence,0.582196,0.416301,0.115575,0.547435,0.659653,0.467605,0.123342,0.613874,0.521017,0.416903,0.113578,0.521017,0.831152


## Test Baseline

In [None]:
model_name = "baseline_tram"
model, tokenizer = loader.load_untrained_model("scibert_multi_label_model", "tram")
_, _, data_loaders = loader.load_datasets("tram", 16, tokenizer, per_document=True)
results = run_test_multi_label_per_document(model, data_loaders)
out_df = calc_results_per_document(results, model.config.id2label)
print(f"baseline f1 is: {round(out_df.f1.mean(), 4)}")

In [7]:
out_df.to_csv("baseline.csv")

In [None]:
_, _, test_loader = loader.load_datasets("tram", 16, tokenizer, per_document=False)
results = run_test_multi_label_per_sentence(model, test_loader)
out_df = calc_results_per_sentence(results)
out_df

## Run tests for all models

### Setups to test

In [7]:
data_augmentation = [
    "fine_tuned/data_augmentation/400_tram_artificial_FacebookAI-roberta-large", 
    "fine_tuned/data_augmentation/401_tram_ood_FacebookAI-roberta-large",
    "fine_tuned/data_augmentation/402_tram_ood_rebalanced_FacebookAI-roberta-large",
]
tram_champion = [
    "fine_tuned/tram_champion/21_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/22_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/23_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/24_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/25_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/26_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/27_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/28_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/29_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/30_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/31_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/32_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/33_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/34_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/35_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/0_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/10_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/11_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/12_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/13_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/14_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/15_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/16_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/17_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/18_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/19_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/1_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/20_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/2_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/36_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/37_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/38_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/39_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/3_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/40_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/41_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/42_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/43_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/44_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/45_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/46_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/47_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/48_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/49_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/4_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/50_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/51_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/54_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/55_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/56_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/57_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/58_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/59_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/5_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/60_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/61_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/62_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/63_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/64_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/65_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/66_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/67_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/68_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/69_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/6_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/70_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/71_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/7_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/8_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_champion/9_tram_FacebookAI-roberta-large",
]

tram_first_swipe = [
    "fine_tuned/tram_first_swipe/0_tram_10_bert-base-uncased",
    "fine_tuned/tram_first_swipe/1_tram_10_bert-base-uncased",
    "fine_tuned/tram_first_swipe/2_tram_25_bert-base-uncased",
    "fine_tuned/tram_first_swipe/3_tram_25_bert-base-uncased",
    "fine_tuned/tram_first_swipe/4_tram_bert-base-uncased",
    "fine_tuned/tram_first_swipe/5_tram_bert-base-uncased",
    "fine_tuned/tram_first_swipe/6_tram_10_bert-base-cased",
    "fine_tuned/tram_first_swipe/7_tram_10_bert-base-cased",
    "fine_tuned/tram_first_swipe/8_tram_25_bert-base-cased",
    "fine_tuned/tram_first_swipe/9_tram_25_bert-base-cased",
    "fine_tuned/tram_first_swipe/10_tram_bert-base-cased",
    "fine_tuned/tram_first_swipe/11_tram_bert-base-cased",
    "fine_tuned/tram_first_swipe/12_tram_10_FacebookAI-roberta-base",
    "fine_tuned/tram_first_swipe/13_tram_10_FacebookAI-roberta-base",
    "fine_tuned/tram_first_swipe/14_tram_25_FacebookAI-roberta-base",
    "fine_tuned/tram_first_swipe/15_tram_25_FacebookAI-roberta-base",
    "fine_tuned/tram_first_swipe/16_tram_FacebookAI-roberta-base",
    "fine_tuned/tram_first_swipe/17_tram_FacebookAI-roberta-base",
    "fine_tuned/tram_first_swipe/18_tram_10_FacebookAI-xlm-roberta-base",
    "fine_tuned/tram_first_swipe/19_tram_10_FacebookAI-xlm-roberta-base",
    "fine_tuned/tram_first_swipe/20_tram_25_FacebookAI-xlm-roberta-base",
    "fine_tuned/tram_first_swipe/21_tram_25_FacebookAI-xlm-roberta-base",
    "fine_tuned/tram_first_swipe/22_tram_FacebookAI-xlm-roberta-base",
    "fine_tuned/tram_first_swipe/23_tram_FacebookAI-xlm-roberta-base",
    "fine_tuned/tram_first_swipe/24_tram_10_FacebookAI-roberta-large",
    "fine_tuned/tram_first_swipe/25_tram_10_FacebookAI-roberta-large",
    "fine_tuned/tram_first_swipe/26_tram_25_FacebookAI-roberta-large",
    "fine_tuned/tram_first_swipe/27_tram_25_FacebookAI-roberta-large",
    "fine_tuned/tram_first_swipe/28_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_first_swipe/29_tram_FacebookAI-roberta-large",
    "fine_tuned/tram_first_swipe/30_tram_10_FacebookAI-xlm-roberta-large",
    "fine_tuned/tram_first_swipe/31_tram_10_FacebookAI-xlm-roberta-large",
    "fine_tuned/tram_first_swipe/32_tram_25_FacebookAI-xlm-roberta-large",
    "fine_tuned/tram_first_swipe/33_tram_25_FacebookAI-xlm-roberta-large",
    "fine_tuned/tram_first_swipe/34_tram_FacebookAI-xlm-roberta-large",
    "fine_tuned/tram_first_swipe/35_tram_FacebookAI-xlm-roberta-large",
    "fine_tuned/tram_first_swipe/36_tram_10_s2w-ai-DarkBERT",
    "fine_tuned/tram_first_swipe/37_tram_10_s2w-ai-DarkBERT",
    "fine_tuned/tram_first_swipe/38_tram_25_s2w-ai-DarkBERT",
    "fine_tuned/tram_first_swipe/39_tram_25_s2w-ai-DarkBERT",
    "fine_tuned/tram_first_swipe/40_tram_s2w-ai-DarkBERT",
    "fine_tuned/tram_first_swipe/41_tram_s2w-ai-DarkBERT",
    "fine_tuned/tram_first_swipe/42_tram_10_jackaduma-SecBERT",
    "fine_tuned/tram_first_swipe/43_tram_10_jackaduma-SecBERT",
    "fine_tuned/tram_first_swipe/44_tram_25_jackaduma-SecBERT",
    "fine_tuned/tram_first_swipe/45_tram_25_jackaduma-SecBERT",
    "fine_tuned/tram_first_swipe/46_tram_jackaduma-SecBERT",
    "fine_tuned/tram_first_swipe/47_tram_jackaduma-SecBERT",
    "fine_tuned/tram_first_swipe/48_tram_10_jackaduma-SecRoBERTa",
    "fine_tuned/tram_first_swipe/49_tram_10_jackaduma-SecRoBERTa",
    "fine_tuned/tram_first_swipe/50_tram_25_jackaduma-SecRoBERTa",
    "fine_tuned/tram_first_swipe/51_tram_25_jackaduma-SecRoBERTa",
    "fine_tuned/tram_first_swipe/52_tram_jackaduma-SecRoBERTa",
    "fine_tuned/tram_first_swipe/53_tram_jackaduma-SecRoBERTa",
    "fine_tuned/tram_first_swipe/54_tram_10_markusbayer-CySecBERT",
    "fine_tuned/tram_first_swipe/55_tram_10_markusbayer-CySecBERT",
    "fine_tuned/tram_first_swipe/56_tram_25_markusbayer-CySecBERT",
    "fine_tuned/tram_first_swipe/57_tram_25_markusbayer-CySecBERT",
    "fine_tuned/tram_first_swipe/58_tram_markusbayer-CySecBERT",
    "fine_tuned/tram_first_swipe/59_tram_markusbayer-CySecBERT",
    "fine_tuned/tram_first_swipe/60_tram_10_allenai-scibert_scivocab_cased",
    "fine_tuned/tram_first_swipe/61_tram_10_allenai-scibert_scivocab_cased",
    "fine_tuned/tram_first_swipe/62_tram_25_allenai-scibert_scivocab_cased",
    "fine_tuned/tram_first_swipe/63_tram_25_allenai-scibert_scivocab_cased",
    "fine_tuned/tram_first_swipe/64_tram_allenai-scibert_scivocab_cased",
    "fine_tuned/tram_first_swipe/65_tram_allenai-scibert_scivocab_cased",
    "fine_tuned/tram_first_swipe/66_tram_10_allenai-scibert_scivocab_uncased",
    "fine_tuned/tram_first_swipe/67_tram_10_allenai-scibert_scivocab_uncased",
    "fine_tuned/tram_first_swipe/68_tram_25_allenai-scibert_scivocab_uncased",
    "fine_tuned/tram_first_swipe/69_tram_25_allenai-scibert_scivocab_uncased",
    "fine_tuned/tram_first_swipe/70_tram_allenai-scibert_scivocab_uncased",
    "fine_tuned/tram_first_swipe/71_tram_allenai-scibert_scivocab_uncased",
    "fine_tuned/tram_first_swipe/72_tram_10_priyankaranade-cybert",
    "fine_tuned/tram_first_swipe/73_tram_10_priyankaranade-cybert",
    "fine_tuned/tram_first_swipe/74_tram_25_priyankaranade-cybert",
    "fine_tuned/tram_first_swipe/75_tram_25_priyankaranade-cybert",
    "fine_tuned/tram_first_swipe/76_tram_priyankaranade-cybert",
    "fine_tuned/tram_first_swipe/77_tram_priyankaranade-cybert",
    "fine_tuned/tram_first_swipe/78_tram_10_tram_multi_label_model",
    "fine_tuned/tram_first_swipe/79_tram_10_tram_multi_label_model",
    "fine_tuned/tram_first_swipe/80_tram_25_tram_multi_label_model",
    "fine_tuned/tram_first_swipe/81_tram_25_tram_multi_label_model",
    "fine_tuned/tram_first_swipe/82_tram_tram_multi_label_model",
    "fine_tuned/tram_first_swipe/83_tram_tram_multi_label_model",
    "fine_tuned/tram_first_swipe/84_tram_10_ehsanaghaei-SecureBERT",
    "fine_tuned/tram_first_swipe/85_tram_10_ehsanaghaei-SecureBERT",
    "fine_tuned/tram_first_swipe/86_tram_25_ehsanaghaei-SecureBERT",
    "fine_tuned/tram_first_swipe/87_tram_25_ehsanaghaei-SecureBERT",
    "fine_tuned/tram_first_swipe/88_tram_ehsanaghaei-SecureBERT",
    "fine_tuned/tram_first_swipe/89_tram_ehsanaghaei-SecureBERT",
]

pos_weight_0_first_swipe = [
    "fine_tuned/pos_weight_0_first_swipe/0_bosch_t10_bert-base-uncased",
    "fine_tuned/pos_weight_0_first_swipe/10_bosch_t10_FacebookAI-roberta-base",
    "fine_tuned/pos_weight_0_first_swipe/11_bosch_t25_FacebookAI-roberta-base",
    "fine_tuned/pos_weight_0_first_swipe/12_bosch_t_FacebookAI-roberta-base",
    "fine_tuned/pos_weight_0_first_swipe/13_tram_10_FacebookAI-roberta-base",
    "fine_tuned/pos_weight_0_first_swipe/14_tram_FacebookAI-roberta-base",
    "fine_tuned/pos_weight_0_first_swipe/15_bosch_t10_FacebookAI-xlm-roberta-base",
    "fine_tuned/pos_weight_0_first_swipe/16_bosch_t25_FacebookAI-xlm-roberta-base",
    "fine_tuned/pos_weight_0_first_swipe/17_bosch_t_FacebookAI-xlm-roberta-base",
    "fine_tuned/pos_weight_0_first_swipe/18_tram_10_FacebookAI-xlm-roberta-base",
    "fine_tuned/pos_weight_0_first_swipe/19_tram_FacebookAI-xlm-roberta-base",
    "fine_tuned/pos_weight_0_first_swipe/1_bosch_t25_bert-base-uncased",
    "fine_tuned/pos_weight_0_first_swipe/20_bosch_t10_FacebookAI-roberta-large",
    "fine_tuned/pos_weight_0_first_swipe/21_bosch_t25_FacebookAI-roberta-large",
    "fine_tuned/pos_weight_0_first_swipe/22_bosch_t_FacebookAI-roberta-large",
    "fine_tuned/pos_weight_0_first_swipe/23_tram_10_FacebookAI-roberta-large",
    "fine_tuned/pos_weight_0_first_swipe/24_tram_FacebookAI-roberta-large",
    "fine_tuned/pos_weight_0_first_swipe/25_bosch_t10_FacebookAI-xlm-roberta-large",
    "fine_tuned/pos_weight_0_first_swipe/26_bosch_t25_FacebookAI-xlm-roberta-large",
    "fine_tuned/pos_weight_0_first_swipe/27_bosch_t_FacebookAI-xlm-roberta-large",
    "fine_tuned/pos_weight_0_first_swipe/28_tram_10_FacebookAI-xlm-roberta-large",
    "fine_tuned/pos_weight_0_first_swipe/29_tram_FacebookAI-xlm-roberta-large",
    "fine_tuned/pos_weight_0_first_swipe/2_bosch_t_bert-base-uncased",
    "fine_tuned/pos_weight_0_first_swipe/30_bosch_t10_s2w-ai-DarkBERT",
    "fine_tuned/pos_weight_0_first_swipe/31_bosch_t25_s2w-ai-DarkBERT",
    "fine_tuned/pos_weight_0_first_swipe/32_bosch_t_s2w-ai-DarkBERT",
    "fine_tuned/pos_weight_0_first_swipe/33_tram_10_s2w-ai-DarkBERT",
    "fine_tuned/pos_weight_0_first_swipe/34_tram_s2w-ai-DarkBERT",
    "fine_tuned/pos_weight_0_first_swipe/35_bosch_t10_jackaduma-SecBERT",
    "fine_tuned/pos_weight_0_first_swipe/36_bosch_t25_jackaduma-SecBERT",
    "fine_tuned/pos_weight_0_first_swipe/37_bosch_t_jackaduma-SecBERT",
    "fine_tuned/pos_weight_0_first_swipe/38_tram_10_jackaduma-SecBERT",
    "fine_tuned/pos_weight_0_first_swipe/39_tram_jackaduma-SecBERT",
    "fine_tuned/pos_weight_0_first_swipe/3_tram_10_bert-base-uncased",
    "fine_tuned/pos_weight_0_first_swipe/40_bosch_t10_markusbayer-CySecBERT",
    "fine_tuned/pos_weight_0_first_swipe/41_bosch_t25_markusbayer-CySecBERT",
    "fine_tuned/pos_weight_0_first_swipe/42_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/pos_weight_0_first_swipe/43_tram_10_markusbayer-CySecBERT",
    "fine_tuned/pos_weight_0_first_swipe/44_tram_markusbayer-CySecBERT",
    "fine_tuned/pos_weight_0_first_swipe/45_bosch_t10_allenai-scibert_scivocab_cased",
    "fine_tuned/pos_weight_0_first_swipe/46_bosch_t25_allenai-scibert_scivocab_cased",
    "fine_tuned/pos_weight_0_first_swipe/47_bosch_t_allenai-scibert_scivocab_cased",
    "fine_tuned/pos_weight_0_first_swipe/48_tram_10_allenai-scibert_scivocab_cased",
    "fine_tuned/pos_weight_0_first_swipe/49_tram_allenai-scibert_scivocab_cased",
    "fine_tuned/pos_weight_0_first_swipe/4_tram_bert-base-uncased",
    "fine_tuned/pos_weight_0_first_swipe/50_bosch_t10_allenai-scibert_scivocab_uncased",
    "fine_tuned/pos_weight_0_first_swipe/51_bosch_t25_allenai-scibert_scivocab_uncased",
    "fine_tuned/pos_weight_0_first_swipe/52_bosch_t_allenai-scibert_scivocab_uncased",
    "fine_tuned/pos_weight_0_first_swipe/53_tram_10_allenai-scibert_scivocab_uncased",
    "fine_tuned/pos_weight_0_first_swipe/54_tram_allenai-scibert_scivocab_uncased",
    "fine_tuned/pos_weight_0_first_swipe/55_bosch_t10_priyankaranade-cybert",
    "fine_tuned/pos_weight_0_first_swipe/56_bosch_t25_priyankaranade-cybert",
    "fine_tuned/pos_weight_0_first_swipe/57_bosch_t_priyankaranade-cybert",
    "fine_tuned/pos_weight_0_first_swipe/58_tram_10_priyankaranade-cybert",
    "fine_tuned/pos_weight_0_first_swipe/59_tram_priyankaranade-cybert",
    "fine_tuned/pos_weight_0_first_swipe/5_bosch_t10_bert-base-cased",
    "fine_tuned/pos_weight_0_first_swipe/65_bosch_t10_ehsanaghaei-SecureBERT",
    "fine_tuned/pos_weight_0_first_swipe/66_bosch_t25_ehsanaghaei-SecureBERT",
    "fine_tuned/pos_weight_0_first_swipe/67_bosch_t_ehsanaghaei-SecureBERT",
    "fine_tuned/pos_weight_0_first_swipe/68_tram_10_ehsanaghaei-SecureBERT",
    "fine_tuned/pos_weight_0_first_swipe/69_tram_ehsanaghaei-SecureBERT",
    "fine_tuned/pos_weight_0_first_swipe/6_bosch_t25_bert-base-cased",
    "fine_tuned/pos_weight_0_first_swipe/70_bosch_t10_jackaduma-SecRoBERTa",
    "fine_tuned/pos_weight_0_first_swipe/71_bosch_t25_jackaduma-SecRoBERTa",
    "fine_tuned/pos_weight_0_first_swipe/72_bosch_t_jackaduma-SecRoBERTa",
    "fine_tuned/pos_weight_0_first_swipe/73_tram_10_jackaduma-SecRoBERTa",
    "fine_tuned/pos_weight_0_first_swipe/74_tram_jackaduma-SecRoBERTa",
    "fine_tuned/pos_weight_0_first_swipe/75_bosch_t10_tram_multi_label_model",
    "fine_tuned/pos_weight_0_first_swipe/76_bosch_t25_tram_multi_label_model",
    "fine_tuned/pos_weight_0_first_swipe/77_bosch_t_tram_multi_label_model",
    "fine_tuned/pos_weight_0_first_swipe/78_tram_10_tram_multi_label_model",
    "fine_tuned/pos_weight_0_first_swipe/79_tram_tram_multi_label_model",
    "fine_tuned/pos_weight_0_first_swipe/7_bosch_t_bert-base-cased",
    "fine_tuned/pos_weight_0_first_swipe/8_tram_10_bert-base-cased",
    "fine_tuned/pos_weight_0_first_swipe/9_tram_bert-base-cased",
]

pos_weight_20_first_swipe = [
    "fine_tuned/pos_weight_20_first_swipe/200_bosch_t10_bert-base-uncased",
    "fine_tuned/pos_weight_20_first_swipe/201_bosch_t25_bert-base-uncased",
    "fine_tuned/pos_weight_20_first_swipe/202_bosch_t_bert-base-uncased",
    "fine_tuned/pos_weight_20_first_swipe/203_tram_10_bert-base-uncased",
    "fine_tuned/pos_weight_20_first_swipe/204_tram_bert-base-uncased",
    "fine_tuned/pos_weight_20_first_swipe/205_bosch_t10_bert-base-cased",
    "fine_tuned/pos_weight_20_first_swipe/206_bosch_t25_bert-base-cased",
    "fine_tuned/pos_weight_20_first_swipe/207_bosch_t_bert-base-cased",
    "fine_tuned/pos_weight_20_first_swipe/208_tram_10_bert-base-cased",
    "fine_tuned/pos_weight_20_first_swipe/209_tram_bert-base-cased",
    "fine_tuned/pos_weight_20_first_swipe/210_bosch_t10_FacebookAI-roberta-base",
    "fine_tuned/pos_weight_20_first_swipe/211_bosch_t25_FacebookAI-roberta-base",
    "fine_tuned/pos_weight_20_first_swipe/212_bosch_t_FacebookAI-roberta-base",
    "fine_tuned/pos_weight_20_first_swipe/213_tram_10_FacebookAI-roberta-base",
    "fine_tuned/pos_weight_20_first_swipe/214_tram_FacebookAI-roberta-base",
    "fine_tuned/pos_weight_20_first_swipe/215_bosch_t10_FacebookAI-xlm-roberta-base",
    "fine_tuned/pos_weight_20_first_swipe/216_bosch_t25_FacebookAI-xlm-roberta-base",
    "fine_tuned/pos_weight_20_first_swipe/217_bosch_t_FacebookAI-xlm-roberta-base",
    "fine_tuned/pos_weight_20_first_swipe/218_tram_10_FacebookAI-xlm-roberta-base",
    "fine_tuned/pos_weight_20_first_swipe/219_tram_FacebookAI-xlm-roberta-base",
    "fine_tuned/pos_weight_20_first_swipe/220_bosch_t10_FacebookAI-roberta-large",
    "fine_tuned/pos_weight_20_first_swipe/221_bosch_t25_FacebookAI-roberta-large",
    "fine_tuned/pos_weight_20_first_swipe/222_bosch_t_FacebookAI-roberta-large",
    "fine_tuned/pos_weight_20_first_swipe/223_tram_10_FacebookAI-roberta-large",
    "fine_tuned/pos_weight_20_first_swipe/224_tram_FacebookAI-roberta-large",
    "fine_tuned/pos_weight_20_first_swipe/225_bosch_t10_FacebookAI-xlm-roberta-large",
    "fine_tuned/pos_weight_20_first_swipe/226_bosch_t25_FacebookAI-xlm-roberta-large",
    "fine_tuned/pos_weight_20_first_swipe/227_bosch_t_FacebookAI-xlm-roberta-large",
    "fine_tuned/pos_weight_20_first_swipe/228_tram_10_FacebookAI-xlm-roberta-large",
    "fine_tuned/pos_weight_20_first_swipe/229_tram_FacebookAI-xlm-roberta-large",
    "fine_tuned/pos_weight_20_first_swipe/230_bosch_t10_s2w-ai-DarkBERT",
    "fine_tuned/pos_weight_20_first_swipe/231_bosch_t25_s2w-ai-DarkBERT",
    "fine_tuned/pos_weight_20_first_swipe/232_bosch_t_s2w-ai-DarkBERT",
    "fine_tuned/pos_weight_20_first_swipe/233_tram_10_s2w-ai-DarkBERT",
    "fine_tuned/pos_weight_20_first_swipe/234_tram_s2w-ai-DarkBERT",
    "fine_tuned/pos_weight_20_first_swipe/235_bosch_t10_jackaduma-SecBERT",
    "fine_tuned/pos_weight_20_first_swipe/236_bosch_t25_jackaduma-SecBERT",
    "fine_tuned/pos_weight_20_first_swipe/237_bosch_t_jackaduma-SecBERT",
    "fine_tuned/pos_weight_20_first_swipe/238_tram_10_jackaduma-SecBERT",
    "fine_tuned/pos_weight_20_first_swipe/239_tram_jackaduma-SecBERT",
    "fine_tuned/pos_weight_20_first_swipe/240_bosch_t10_jackaduma-SecRoBERTa",
    "fine_tuned/pos_weight_20_first_swipe/241_bosch_t25_jackaduma-SecRoBERTa",
    "fine_tuned/pos_weight_20_first_swipe/242_bosch_t_jackaduma-SecRoBERTa",
    "fine_tuned/pos_weight_20_first_swipe/243_tram_10_jackaduma-SecRoBERTa",
    "fine_tuned/pos_weight_20_first_swipe/244_tram_jackaduma-SecRoBERTa",
    "fine_tuned/pos_weight_20_first_swipe/245_bosch_t10_markusbayer-CySecBERT",
    "fine_tuned/pos_weight_20_first_swipe/246_bosch_t25_markusbayer-CySecBERT",
    "fine_tuned/pos_weight_20_first_swipe/247_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/pos_weight_20_first_swipe/248_tram_10_markusbayer-CySecBERT",
    "fine_tuned/pos_weight_20_first_swipe/249_tram_markusbayer-CySecBERT",
    "fine_tuned/pos_weight_20_first_swipe/250_bosch_t10_allenai-scibert_scivocab_cased",
    "fine_tuned/pos_weight_20_first_swipe/251_bosch_t25_allenai-scibert_scivocab_cased",
    "fine_tuned/pos_weight_20_first_swipe/252_bosch_t_allenai-scibert_scivocab_cased",
    "fine_tuned/pos_weight_20_first_swipe/253_tram_10_allenai-scibert_scivocab_cased",
    "fine_tuned/pos_weight_20_first_swipe/254_tram_allenai-scibert_scivocab_cased",
    "fine_tuned/pos_weight_20_first_swipe/255_bosch_t10_allenai-scibert_scivocab_uncased",
    "fine_tuned/pos_weight_20_first_swipe/256_bosch_t25_allenai-scibert_scivocab_uncased",
    "fine_tuned/pos_weight_20_first_swipe/257_bosch_t_allenai-scibert_scivocab_uncased",
    "fine_tuned/pos_weight_20_first_swipe/258_tram_10_allenai-scibert_scivocab_uncased",
    "fine_tuned/pos_weight_20_first_swipe/259_tram_allenai-scibert_scivocab_uncased",
    "fine_tuned/pos_weight_20_first_swipe/260_bosch_t10_priyankaranade-cybert",
    "fine_tuned/pos_weight_20_first_swipe/261_bosch_t25_priyankaranade-cybert",
    "fine_tuned/pos_weight_20_first_swipe/262_bosch_t_priyankaranade-cybert",
    "fine_tuned/pos_weight_20_first_swipe/263_tram_10_priyankaranade-cybert",
    "fine_tuned/pos_weight_20_first_swipe/264_tram_priyankaranade-cybert",
    "fine_tuned/pos_weight_20_first_swipe/265_bosch_t10_tram_multi_label_model",
    "fine_tuned/pos_weight_20_first_swipe/266_bosch_t25_tram_multi_label_model",
    "fine_tuned/pos_weight_20_first_swipe/267_bosch_t_tram_multi_label_model",
    "fine_tuned/pos_weight_20_first_swipe/268_tram_10_tram_multi_label_model",
    "fine_tuned/pos_weight_20_first_swipe/269_tram_tram_multi_label_model",
    "fine_tuned/pos_weight_20_first_swipe/270_bosch_t10_ehsanaghaei-SecureBERT",
    "fine_tuned/pos_weight_20_first_swipe/271_bosch_t25_ehsanaghaei-SecureBERT",
    "fine_tuned/pos_weight_20_first_swipe/272_bosch_t_ehsanaghaei-SecureBERT",
    "fine_tuned/pos_weight_20_first_swipe/273_tram_10_ehsanaghaei-SecureBERT",
    "fine_tuned/pos_weight_20_first_swipe/274_tram_ehsanaghaei-SecureBERT",
]

extra_patience_models = [
    "fine_tuned/extra_patience/666_bosch_t_FacebookAI-roberta-large",
    "fine_tuned/extra_patience/667_bosch_t_FacebookAI-xlm-roberta-large",
    "fine_tuned/extra_patience/668_tram_FacebookAI-xlm-roberta-large",
]

pos_weight_0_20_first_swipe_t25_t50 = [
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1000_bosch_t50_bert-base-uncased",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1001_bosch_t50_bert-base-uncased",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1002_tram_25_bert-base-uncased",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1003_tram_25_bert-base-uncased",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1004_bosch_t50_bert-base-cased",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1005_bosch_t50_bert-base-cased",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1006_tram_25_bert-base-cased",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1007_tram_25_bert-base-cased",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1008_bosch_t50_FacebookAI-roberta-base",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1009_bosch_t50_FacebookAI-roberta-base",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1010_tram_25_FacebookAI-roberta-base",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1011_tram_25_FacebookAI-roberta-base",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1012_bosch_t50_FacebookAI-xlm-roberta-base",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1013_bosch_t50_FacebookAI-xlm-roberta-base",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1014_tram_25_FacebookAI-xlm-roberta-base",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1015_tram_25_FacebookAI-xlm-roberta-base",
    # "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1016_bosch_t50_FacebookAI-roberta-large",
    # "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1017_bosch_t50_FacebookAI-roberta-large",
    # "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1018_tram_25_FacebookAI-roberta-large",
    # "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1019_tram_25_FacebookAI-roberta-large",
    # "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1020_bosch_t50_FacebookAI-xlm-roberta-large",
    # "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1021_bosch_t50_FacebookAI-xlm-roberta-large",
    # "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1022_tram_25_FacebookAI-xlm-roberta-large",
    # "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1023_tram_25_FacebookAI-xlm-roberta-large",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1024_bosch_t50_s2w-ai-DarkBERT",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1025_bosch_t50_s2w-ai-DarkBERT",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1026_tram_25_s2w-ai-DarkBERT",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1027_tram_25_s2w-ai-DarkBERT",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1028_bosch_t50_jackaduma-SecBERT",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1029_bosch_t50_jackaduma-SecBERT",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1030_tram_25_jackaduma-SecBERT",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1031_tram_25_jackaduma-SecBERT",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1032_bosch_t50_jackaduma-SecRoBERTa",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1033_bosch_t50_jackaduma-SecRoBERTa",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1034_tram_25_jackaduma-SecRoBERTa",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1035_tram_25_jackaduma-SecRoBERTa",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1036_bosch_t50_markusbayer-CySecBERT",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1037_bosch_t50_markusbayer-CySecBERT",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1038_tram_25_markusbayer-CySecBERT",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1039_tram_25_markusbayer-CySecBERT",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1040_bosch_t50_allenai-scibert_scivocab_cased",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1041_bosch_t50_allenai-scibert_scivocab_cased",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1042_tram_25_allenai-scibert_scivocab_cased",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1043_tram_25_allenai-scibert_scivocab_cased",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1044_bosch_t50_allenai-scibert_scivocab_uncased",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1045_bosch_t50_allenai-scibert_scivocab_uncased",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1046_tram_25_allenai-scibert_scivocab_uncased",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1047_tram_25_allenai-scibert_scivocab_uncased",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1048_bosch_t50_priyankaranade-cybert",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1049_bosch_t50_priyankaranade-cybert",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1050_tram_25_priyankaranade-cybert",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1051_tram_25_priyankaranade-cybert",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1052_bosch_t50_tram_multi_label_model",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1053_bosch_t50_tram_multi_label_model",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1054_tram_25_tram_multi_label_model",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1055_tram_25_tram_multi_label_model",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1056_bosch_t50_ehsanaghaei-SecureBERT",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1057_bosch_t50_ehsanaghaei-SecureBERT",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1058_tram_25_ehsanaghaei-SecureBERT",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1059_tram_25_ehsanaghaei-SecureBERT",
]

cysecbert_bosch = [
    "fine_tuned/bosch_champion/300_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/301_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/302_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/303_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/304_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/305_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/306_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/307_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/308_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/309_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/310_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/311_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/312_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/313_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/314_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/315_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/316_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/317_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/318_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/320_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/321_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/322_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/323_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/324_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/325_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/326_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/327_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/328_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/329_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/330_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/331_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/332_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/333_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/334_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/335_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/336_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/337_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/338_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/339_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/340_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/341_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/342_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/343_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/360_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/361_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/362_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/363_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/364_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/365_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/366_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/367_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/380_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/381_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/382_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/383_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/384_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/385_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/386_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/387_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/388_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/389_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/390_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/391_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/392_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/393_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/394_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/395_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/400_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/401_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/402_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/403_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/404_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/405_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/406_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/407_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/408_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/409_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/410_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/411_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/412_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/413_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/414_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/420_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/421_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/422_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/423_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/424_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/425_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/426_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/427_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/428_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/429_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/430_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/431_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/432_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/433_bosch_t_markusbayer-CySecBERT",
    "fine_tuned/bosch_champion/434_bosch_t_markusbayer-CySecBERT",
]


missing_pos_weight_0_20 = [    
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1016_bosch_t50_FacebookAI-roberta-large",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1017_bosch_t50_FacebookAI-roberta-large",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1018_tram_25_FacebookAI-roberta-large",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1019_tram_25_FacebookAI-roberta-large",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1020_bosch_t50_FacebookAI-xlm-roberta-large",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1021_bosch_t50_FacebookAI-xlm-roberta-large",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1022_tram_25_FacebookAI-xlm-roberta-large",
    "fine_tuned/pos_weight_0_20_first_swipe_t25_t50/1023_tram_25_FacebookAI-xlm-roberta-large",
]



### Retrieve fine-tuned models from folders

Select model sets and results file!

In [None]:
print("Select list of models from the fine_tuned directory and the output file")
stop,

selected_models = data_augmentation
outfile = "test_results/data_augmentation.csv"

In [9]:
selected_models

['fine_tuned/data_augmentation/400_tram_artificial_FacebookAI-roberta-large',
 'fine_tuned/data_augmentation/401_tram_ood_FacebookAI-roberta-large',
 'fine_tuned/data_augmentation/402_tram_ood_rebalanced_FacebookAI-roberta-large']

In [10]:
def retrieve_setup_from_folder_name(folder):
    import os, json

    single_label = "single_label" in folder
    dataset = "_".join(os.path.basename(folder).split("_")[1:3])

    if "_".join(os.path.basename(folder).split("_")[1:4]) == "tram_ood_rebalanced":
        dataset = "tram_ood_rebalanced"
    # why didn't I just make better folder names
    elif dataset[:4] == "tram" and dataset not in ["tram", "tram_10", "tram_25", "tram_sl", "tram_artificial", "tram_ood", "tram_ood_rebalanced"]:
        dataset = "tram"

    try:
        with open(os.path.join(folder, "model_params.json"), "r") as f:
            params = json.load(f)

        batch_size = params.get("batch_size")
        freeze_layers = params.get("freeze_layers")
        learning_rate = params.get("learning_rate")
        # if pos_weight is missing, it's 25. the experiments were run with a previous version of the
        # code that didn't contain the pos_weight parameter, but it was generated inside the function.
        end_factor = params.get("end_factor")
        pos_weight = params.get("pos_weight")

        with open(os.path.join(folder, "config.json"), "r") as f:
            model_name = json.load(f)["_name_or_path"]

        return {
            "model_name": model_name,
            "dataset": dataset,
            "batch_size": batch_size,
            "freeze_layers": freeze_layers,
            "learning_rate": learning_rate,
            "pos_weight": pos_weight,
            "end_factor": end_factor,
            "is_single_label": single_label,
        }
    except:
        print("missing config for: %s" % folder)


setups = {}
for f in selected_models:
    setups[f] = retrieve_setup_from_folder_name(f)

missing config for: fine_tuned/data_augmentation/402_tram_ood_rebalanced_FacebookAI-roberta-large


In [11]:
setups

{'fine_tuned/data_augmentation/400_tram_artificial_FacebookAI-roberta-large': {'model_name': 'FacebookAI/roberta-large',
  'dataset': 'tram_artificial',
  'batch_size': 16,
  'freeze_layers': 0,
  'learning_rate': 2e-05,
  'pos_weight': 0,
  'end_factor': 0.7,
  'is_single_label': False},
 'fine_tuned/data_augmentation/401_tram_ood_FacebookAI-roberta-large': {'model_name': 'FacebookAI/roberta-large',
  'dataset': 'tram_ood',
  'batch_size': 16,
  'freeze_layers': 0,
  'learning_rate': 2e-05,
  'pos_weight': 0,
  'end_factor': 0.7,
  'is_single_label': False},
 'fine_tuned/data_augmentation/402_tram_ood_rebalanced_FacebookAI-roberta-large': None}

### Collect test results per document

In [13]:
final_results = {
    "model": [],
    "dataset": [],
    "freeze_layers": [],
    "batch_size": [],
    "learning_rate": [],
    "pos_weight": [],
    "end_factor": [],
    "accuracy_mean": [],
    "precision_mean": [],
    "recall_mean": [],
    "f1_mean": [],
}

for s in setups:
    try:
        model_dir = s
        model_name = setups[s]["model_name"]
        dataset_name = setups[s]["dataset"]
        freeze_layers = setups[s]["freeze_layers"]
        pos_weight = setups[s]["pos_weight"]
        end_factor = setups[s]["end_factor"]
        learning_rate = setups[s]["learning_rate"]
        batch_size = setups[s]["batch_size"]
        result_df = run_test(model_dir, model_name, dataset_name)

        f1_mean = result_df["f1"].mean()
        accuracy_mean = result_df["accuracy"].mean()
        precision_mean = result_df["precision"].mean()
        recall_mean = result_df["recall"].mean()

        final_results["model"].append(model_name)
        final_results["dataset"].append(dataset_name)
        final_results["freeze_layers"].append(freeze_layers)
        final_results["pos_weight"].append(pos_weight)
        final_results["end_factor"].append(end_factor)
        final_results["learning_rate"].append(learning_rate)
        final_results["batch_size"].append(batch_size)
        final_results["f1_mean"].append(f1_mean)
        final_results["accuracy_mean"].append(accuracy_mean)
        final_results["precision_mean"].append(precision_mean)
        final_results["recall_mean"].append(recall_mean)
        print(f"{model_name}: {f1_mean}")
    except:
        print("Skipping...")


Chosen experimental setup:
	dataset_name=tram_artificial
	model_name=FacebookAI/roberta-large
	is_single_label=False
	per_document=True
	test_function=<function run_test_multi_label_per_document at 0x7f297e7dd090>


100%|██████████| 31/31 [00:56<00:00,  1.82s/it]


FacebookAI/roberta-large: 0.6541670078154059
Chosen experimental setup:
	dataset_name=tram_ood
	model_name=FacebookAI/roberta-large
	is_single_label=False
	per_document=True
	test_function=<function run_test_multi_label_per_document at 0x7f297e7dd090>


100%|██████████| 31/31 [00:59<00:00,  1.91s/it]


FacebookAI/roberta-large: 0.7140720738602963
Skipping...


In [14]:
with pd.option_context("display.max_rows", None):
    final_df = pd.DataFrame(final_results).sort_values(
        by=[
            "dataset",
            "model",
            "f1_mean",
        ]
    )
    display(final_df)
    final_df.to_csv(outfile)

Unnamed: 0,model,dataset,freeze_layers,batch_size,learning_rate,pos_weight,end_factor,accuracy_mean,precision_mean,recall_mean,f1_mean
0,FacebookAI/roberta-large,tram_artificial,0,16,2e-05,0,0.7,0.535728,0.719613,0.631232,0.654167
1,FacebookAI/roberta-large,tram_ood,0,16,2e-05,0,0.7,0.593687,0.727605,0.745318,0.714072
