# Assessor Architecture Comparison

This notebook does a rudimentary comparison of different architectures for to use for assessors.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from dataclasses import dataclass

import lass.train
from lass.log_handling import LoaderArgs

In [3]:
%env CUDA_DEVICE_ORDER=PCI_BUS_ID
%env CUDA_VISIBLE_DEVICES=0

env: CUDA_DEVICE_ORDER=PCI_BUS_ID
env: CUDA_VISIBLE_DEVICES=0


In [4]:
@dataclass
class Architecture():
    name: str
    name_short: str
    batch_size: int
    gradient_accumulation_steps: int

ARCHITECTURES = [
    # Architecture(
    #     name="albert-base-v2",
    #     name_short="albert",
    #     batch_size=32,
    #     gradient_accumulation_steps=1,
    # ),
    # Architecture(
    #     name="bert-base-cased",
    #     name_short="bert",
    #     batch_size=32,
    #     gradient_accumulation_steps=1,
    # ),
    Architecture(
        name="roberta-base",
        name_short="roberta",
        batch_size=32,
        gradient_accumulation_steps=1,
    ),
    Architecture(
        name="microsoft/deberta-v3-base",
        name_short="deberta",
        batch_size=16,
        gradient_accumulation_steps=2,
    ),
    # Architecture(
    #     name="gpt2",
    #     name_short="gpt2",
    #     batch_size=8,
    #     gradient_accumulation_steps=4,
    # ),
]

In [5]:
for architecture in ARCHITECTURES:
    lass.train.train(
        data_args=LoaderArgs(
            logdir="../artifacts/logs",
            tasks="paper-full",
            model_families=["BIG-G T=0"],
            model_sizes=["128b"],
            shots=[0],
            query_types=["multiple_choice"],
        ),
        group="architecture-selection",
        split="instance",
        model_name=architecture.name,
        model_name_short=architecture.name_short,
        batch_size=architecture.batch_size,
        gradient_accumulation_steps=architecture.gradient_accumulation_steps,
        include_model_in_input=False,
        include_n_targets_in_input=False,
        output_dir="architectures",
        n_epochs=12,
        extra_training_args={
            "warmup_steps": 3000,
            "learning_rate": 2e-5,
        },
        # is_test_run=True,
    )

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[:, 'correct'] = df['correct'].astype(int)


{'text': "In what follows, we provide short narratives, each of which illustrates a common proverb. \nNarrative: A father brought his son to the local airport for a show. They got up close to an plane in the show. The pilot arrives and turns on the engine. The plane makes a considerable roar, scaring the child. The child cowers behind the father as the father reassures the child that it is just a loud noise and it won't hurt him.\nThis narrative is a good illustration of the following proverb: ", 'label': 0}




  0%|          | 0/44 [00:00<?, ?ba/s]

  0%|          | 0/11 [00:00<?, ?ba/s]

ERROR:wandb.jupyter:Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


[34m[1mwandb[0m: Currently logged in as: [33mwschella[0m. Use [1m`wandb login --relogin`[0m to force relogin






Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'roberta.pooler.dense.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


The following columns in the training set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.


***** Running training *****


  Num examples = 43246


  Num Epochs = 12


  Instantaneous batch size per device = 32


  Total train batch size (w. parallel, distributed & accumulation) = 64


  Gradient Accumulation steps = 1


  Total optimization steps = 8112


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"




Step,Training Loss,Validation Loss,Conf Distribution Accuracy,Conf Distribution Precision,Conf Distribution Recall,Conf Distribution F1,Conf Distribution Roc Auc,Conf Distribution Bs,Conf Distribution Bs Mcb,Conf Distribution Bs Dsc,Conf Distribution Bs Unc,Conf Distribution Balanced Accuracy,Conf Absolute Accuracy,Conf Absolute Precision,Conf Absolute Recall,Conf Absolute F1,Conf Absolute Roc Auc,Conf Absolute Bs,Conf Absolute Bs Mcb,Conf Absolute Bs Dsc,Conf Absolute Bs Unc,Conf Absolute Balanced Accuracy,Conf Normalized Accuracy,Conf Normalized Precision,Conf Normalized Recall,Conf Normalized F1,Conf Normalized Roc Auc,Conf Normalized Bs,Conf Normalized Bs Mcb,Conf Normalized Bs Dsc,Conf Normalized Bs Unc,Conf Normalized Balanced Accuracy,Accuracy,Precision,Recall,F1,Roc Auc,Bs,Bs Mcb,Bs Dsc,Bs Unc,Balanced Accuracy
500,0.6256,0.583249,0.689279,0.58255,0.303154,0.398784,0.716353,0.19563,0.0,0.028746,0.224376,0.59564,0.655823,0.406504,0.027189,0.050968,0.486579,0.306495,0.082836,0.000717,0.224376,0.503373,0.514233,0.399027,0.847743,0.542638,0.612411,0.314735,0.101476,0.011118,0.224376,0.595112,0.681146,0.557402,0.300979,0.39089,0.704983,0.200198,0.002284,0.026462,0.224376,0.588952
1000,0.5797,0.572676,0.689279,0.58255,0.303154,0.398784,0.716353,0.19563,0.0,0.028746,0.224376,0.59564,0.655823,0.406504,0.027189,0.050968,0.486579,0.306495,0.082836,0.000717,0.224376,0.503373,0.514233,0.399027,0.847743,0.542638,0.612411,0.314735,0.101476,0.011118,0.224376,0.595112,0.689187,0.593361,0.272159,0.373159,0.716154,0.196355,0.001662,0.029683,0.224376,0.588054
1500,0.5698,0.564061,0.689279,0.58255,0.303154,0.398784,0.716353,0.19563,0.0,0.028746,0.224376,0.59564,0.655823,0.406504,0.027189,0.050968,0.486579,0.306495,0.082836,0.000717,0.224376,0.503373,0.514233,0.399027,0.847743,0.542638,0.612411,0.314735,0.101476,0.011118,0.224376,0.595112,0.689279,0.546416,0.50571,0.525275,0.730538,0.193271,0.002235,0.03334,0.224376,0.644762
2000,0.5638,0.55818,0.689279,0.58255,0.303154,0.398784,0.716353,0.19563,0.0,0.028746,0.224376,0.59564,0.655823,0.406504,0.027189,0.050968,0.486579,0.306495,0.082836,0.000717,0.224376,0.503373,0.514233,0.399027,0.847743,0.542638,0.612411,0.314735,0.101476,0.011118,0.224376,0.595112,0.704898,0.580753,0.474171,0.522078,0.740984,0.189522,0.002699,0.037554,0.224376,0.648945
2500,0.5403,0.540062,0.689279,0.58255,0.303154,0.398784,0.716353,0.19563,0.0,0.028746,0.224376,0.59564,0.655823,0.406504,0.027189,0.050968,0.486579,0.306495,0.082836,0.000717,0.224376,0.503373,0.514233,0.399027,0.847743,0.542638,0.612411,0.314735,0.101476,0.011118,0.224376,0.595112,0.719778,0.637799,0.406471,0.496513,0.760511,0.182271,0.002368,0.044473,0.224376,0.643798
3000,0.5237,0.546879,0.689279,0.58255,0.303154,0.398784,0.716353,0.19563,0.0,0.028746,0.224376,0.59564,0.655823,0.406504,0.027189,0.050968,0.486579,0.306495,0.082836,0.000717,0.224376,0.503373,0.514233,0.399027,0.847743,0.542638,0.612411,0.314735,0.101476,0.011118,0.224376,0.595112,0.712384,0.579584,0.560359,0.569809,0.755404,0.18568,0.004933,0.043629,0.224376,0.675517
3500,0.5052,0.548722,0.689279,0.58255,0.303154,0.398784,0.716353,0.19563,0.0,0.028746,0.224376,0.59564,0.655823,0.406504,0.027189,0.050968,0.486579,0.306495,0.082836,0.000717,0.224376,0.503373,0.514233,0.399027,0.847743,0.542638,0.612411,0.314735,0.101476,0.011118,0.224376,0.595112,0.721349,0.600181,0.539967,0.568484,0.768749,0.18534,0.008636,0.047673,0.224376,0.677363
4000,0.4823,0.535537,0.689279,0.58255,0.303154,0.398784,0.716353,0.19563,0.0,0.028746,0.224376,0.59564,0.655823,0.406504,0.027189,0.050968,0.486579,0.306495,0.082836,0.000717,0.224376,0.503373,0.514233,0.399027,0.847743,0.542638,0.612411,0.314735,0.101476,0.011118,0.224376,0.595112,0.723937,0.619097,0.488309,0.54598,0.774259,0.179747,0.005921,0.05055,0.224376,0.666795
4500,0.448,0.593124,0.689279,0.58255,0.303154,0.398784,0.716353,0.19563,0.0,0.028746,0.224376,0.59564,0.655823,0.406504,0.027189,0.050968,0.486579,0.306495,0.082836,0.000717,0.224376,0.503373,0.514233,0.399027,0.847743,0.542638,0.612411,0.314735,0.101476,0.011118,0.224376,0.595112,0.713494,0.592214,0.504622,0.544921,0.75901,0.193686,0.015269,0.04596,0.224376,0.66284
5000,0.4063,0.657539,0.689279,0.58255,0.303154,0.398784,0.716353,0.19563,0.0,0.028746,0.224376,0.59564,0.655823,0.406504,0.027189,0.050968,0.486579,0.306495,0.082836,0.000717,0.224376,0.503373,0.514233,0.399027,0.847743,0.542638,0.612411,0.314735,0.101476,0.011118,0.224376,0.595112,0.7122,0.594949,0.480424,0.531588,0.747307,0.205078,0.023675,0.042973,0.224376,0.655992


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10820


  Batch size = 64


Saving model checkpoint to architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-500


Configuration saved in architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-500/config.json


Model weights saved in architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-500/pytorch_model.bin




The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10820


  Batch size = 64


Saving model checkpoint to architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-1000


Configuration saved in architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-1000/config.json


Model weights saved in architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-1000/pytorch_model.bin


Deleting older checkpoint [architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-500] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10820


  Batch size = 64


Saving model checkpoint to architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-1500


Configuration saved in architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-1500/config.json


Model weights saved in architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-1500/pytorch_model.bin


Deleting older checkpoint [architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-1000] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10820


  Batch size = 64


Saving model checkpoint to architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-2000


Configuration saved in architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-2000/config.json


Model weights saved in architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-2000/pytorch_model.bin


Deleting older checkpoint [architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-1500] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10820


  Batch size = 64


Saving model checkpoint to architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-2500


Configuration saved in architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-2500/config.json


Model weights saved in architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-2500/pytorch_model.bin


Deleting older checkpoint [architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-2000] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10820


  Batch size = 64


Saving model checkpoint to architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-3000


Configuration saved in architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-3000/config.json


Model weights saved in architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-3000/pytorch_model.bin




The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10820


  Batch size = 64


Saving model checkpoint to architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-3500


Configuration saved in architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-3500/config.json


Model weights saved in architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-3500/pytorch_model.bin


Deleting older checkpoint [architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-3000] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10820


  Batch size = 64


Saving model checkpoint to architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-4000


Configuration saved in architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-4000/config.json


Model weights saved in architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-4000/pytorch_model.bin


Deleting older checkpoint [architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-2500] due to args.save_total_limit


Deleting older checkpoint [architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-3500] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10820


  Batch size = 64


Saving model checkpoint to architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-4500


Configuration saved in architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-4500/config.json


Model weights saved in architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-4500/pytorch_model.bin




The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10820


  Batch size = 64


Saving model checkpoint to architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-5000


Configuration saved in architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-5000/config.json


Model weights saved in architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-5000/pytorch_model.bin


Deleting older checkpoint [architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-4500] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10820


  Batch size = 64


Saving model checkpoint to architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-5500


Configuration saved in architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-5500/config.json


Model weights saved in architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-5500/pytorch_model.bin


Deleting older checkpoint [architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-5000] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10820


  Batch size = 64


Saving model checkpoint to architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-6000


Configuration saved in architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-6000/config.json


Model weights saved in architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-6000/pytorch_model.bin


Deleting older checkpoint [architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-5500] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10820


  Batch size = 64


Saving model checkpoint to architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-6500


Configuration saved in architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-6500/config.json


Model weights saved in architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-6500/pytorch_model.bin


Deleting older checkpoint [architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-6000] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10820


  Batch size = 64


Saving model checkpoint to architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-7000


Configuration saved in architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-7000/config.json


Model weights saved in architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-7000/pytorch_model.bin


Deleting older checkpoint [architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-6500] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10820


  Batch size = 64


Saving model checkpoint to architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-7500


Configuration saved in architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-7500/config.json


Model weights saved in architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-7500/pytorch_model.bin


Deleting older checkpoint [architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-7000] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10820


  Batch size = 64


Saving model checkpoint to architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-8000


Configuration saved in architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-8000/config.json


Model weights saved in architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-8000/pytorch_model.bin


Deleting older checkpoint [architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-7500] due to args.save_total_limit






Training completed. Do not forget to share your model on huggingface.co/models =)




Loading best model from architectures/roberta-bs32-0sh-instance-split-07291003/checkpoint-4000 (score: 0.5355374813079834).


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/accuracy,▁▂▂▅▇▆██▆▆▇▄▆▆▅▅
eval/balanced_accuracy,▁▁▅▆▅██▇▇▆▆█▇▇▇▇
eval/bs,▄▃▃▂▁▂▂▁▃▄▅▆▆▇██
eval/bs_dsc,▁▂▃▄▆▆▇█▇▆▆▆▅▅▅▅
eval/bs_mcb,▁▁▁▁▁▂▂▂▃▄▅▇▆▇██
eval/bs_unc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/conf_absolute_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/conf_absolute_balanced_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/conf_absolute_bs,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/conf_absolute_bs_dsc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/accuracy,0.7036
eval/balanced_accuracy,0.67091
eval/bs,0.23218
eval/bs_dsc,0.03934
eval/bs_mcb,0.04714
eval/bs_unc,0.22438
eval/conf_absolute_accuracy,0.65582
eval/conf_absolute_balanced_accuracy,0.50337
eval/conf_absolute_bs,0.30649
eval/conf_absolute_bs_dsc,0.00072


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[:, 'correct'] = df['correct'].astype(int)


{'text': "In what follows, we provide short narratives, each of which illustrates a common proverb. \nNarrative: A father brought his son to the local airport for a show. They got up close to an plane in the show. The pilot arrives and turns on the engine. The plane makes a considerable roar, scaring the child. The child cowers behind the father as the father reassures the child that it is just a loud noise and it won't hurt him.\nThis narrative is a good illustration of the following proverb: ", 'label': 0}


loading configuration file https://huggingface.co/microsoft/deberta-v3-base/resolve/main/config.json from cache at /home/wout/.cache/huggingface/transformers/e6f9db57345f0f60c9f837fa97bcb27b1ed31e99feb33d732d7d8c80cb8f8459.de97182a9f32a68819030ba8f3f6ff2ba47276be3864425925523202f54cc79c


Model config DebertaV2Config {
  "_name_or_path": "microsoft/deberta-v3-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_att_key": true,
  "transformers_version": "4.20.1",
  "type_vocab_size": 0,
  "vocab_size": 128100
}



loading file https://huggingface.co/microsoft/deberta-v3-base/resolve/main/spm.model from cache at /home/wout/.cache/huggingface/transformers/ec748fd4f03d0e5a2d5d56dff01e6dd733f23c67105cd54a9910f9d711870253.0abaeacf7287ee8ba758fec15ddfb4bb6c697bb1a8db272725f8aa633501787a


loading file https://huggingface.co/microsoft/deberta-v3-base/resolve/main/tokenizer.json from cache at None


loading file https://huggingface.co/microsoft/deberta-v3-base/resolve/main/added_tokens.json from cache at None


loading file https://huggingface.co/microsoft/deberta-v3-base/resolve/main/special_tokens_map.json from cache at None


loading file https://huggingface.co/microsoft/deberta-v3-base/resolve/main/tokenizer_config.json from cache at /home/wout/.cache/huggingface/transformers/967a4d63eb35950cfd24a9e335906419009f32940fa2ba1b73e7ba032628c38d.df5a7f41459442f66bec27ac9352bba694cde109855024b3ae61be2f5734ee9a


loading configuration file https://huggingface.co/microsoft/deberta-v3-base/resolve/main/config.json from cache at /home/wout/.cache/huggingface/transformers/e6f9db57345f0f60c9f837fa97bcb27b1ed31e99feb33d732d7d8c80cb8f8459.de97182a9f32a68819030ba8f3f6ff2ba47276be3864425925523202f54cc79c


Model config DebertaV2Config {
  "_name_or_path": "microsoft/deberta-v3-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_att_key": true,
  "transformers_version": "4.20.1",
  "type_vocab_size": 0,
  "vocab_size": 128100
}



Adding [MASK] to the vocabulary


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


loading configuration file https://huggingface.co/microsoft/deberta-v3-base/resolve/main/config.json from cache at /home/wout/.cache/huggingface/transformers/e6f9db57345f0f60c9f837fa97bcb27b1ed31e99feb33d732d7d8c80cb8f8459.de97182a9f32a68819030ba8f3f6ff2ba47276be3864425925523202f54cc79c


Model config DebertaV2Config {
  "_name_or_path": "microsoft/deberta-v3-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_att_key": true,
  "transformers_version": "4.20.1",
  "type_vocab_size": 0,
  "vocab_size": 128100
}





Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


  0%|          | 0/44 [00:00<?, ?ba/s]

  0%|          | 0/11 [00:00<?, ?ba/s]



loading configuration file https://huggingface.co/microsoft/deberta-v3-base/resolve/main/config.json from cache at /home/wout/.cache/huggingface/transformers/e6f9db57345f0f60c9f837fa97bcb27b1ed31e99feb33d732d7d8c80cb8f8459.de97182a9f32a68819030ba8f3f6ff2ba47276be3864425925523202f54cc79c


Model config DebertaV2Config {
  "_name_or_path": "microsoft/deberta-v3-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_att_key": true,
  "transformers_version": "4.20.1",
  "type_vocab_size": 0,
  "vocab_size": 128100
}



loading weights file https://huggingface.co/microsoft/deberta-v3-base/resolve/main/pytorch_model.bin from cache at /home/wout/.cache/huggingface/transformers/322954555e18d37b9f98196875590497a8b33244551ec195e00fcdb831878224.f80cec19a933132f8b9636fbb3b736b2f4b9c8deaa45dfe4dd6e40a4d9970f44


Some weights of the model checkpoint at microsoft/deberta-v3-base were not used when initializing DebertaV2ForSequenceClassification: ['mask_predictions.dense.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'mask_predictions.classifier.bias', 'mask_predictions.LayerNorm.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias', 'mask_predictions.classifier.weight', 'lm_predictions.lm_head.LayerNorm.bias', 'mask_predictions.dense.weight', 'mask_predictions.LayerNorm.weight', 'lm_predictions.lm_head.dense.bias']
- This IS expected if you are initializing DebertaV2ForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2ForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['pooler.dense.weight', 'pooler.dense.bias', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


using `logging_steps` to initialize `eval_steps` to 500


PyTorch: setting up devices


The following columns in the training set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running training *****


  Num examples = 43246


  Num Epochs = 12


  Instantaneous batch size per device = 16


  Total train batch size (w. parallel, distributed & accumulation) = 64


  Gradient Accumulation steps = 2


  Total optimization steps = 8112


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"




Step,Training Loss,Validation Loss,Conf Distribution Accuracy,Conf Distribution Precision,Conf Distribution Recall,Conf Distribution F1,Conf Distribution Roc Auc,Conf Distribution Bs,Conf Distribution Bs Mcb,Conf Distribution Bs Dsc,Conf Distribution Bs Unc,Conf Distribution Balanced Accuracy,Conf Absolute Accuracy,Conf Absolute Precision,Conf Absolute Recall,Conf Absolute F1,Conf Absolute Roc Auc,Conf Absolute Bs,Conf Absolute Bs Mcb,Conf Absolute Bs Dsc,Conf Absolute Bs Unc,Conf Absolute Balanced Accuracy,Conf Normalized Accuracy,Conf Normalized Precision,Conf Normalized Recall,Conf Normalized F1,Conf Normalized Roc Auc,Conf Normalized Bs,Conf Normalized Bs Mcb,Conf Normalized Bs Dsc,Conf Normalized Bs Unc,Conf Normalized Balanced Accuracy,Accuracy,Precision,Recall,F1,Roc Auc,Bs,Bs Mcb,Bs Dsc,Bs Unc,Balanced Accuracy
500,0.6201,0.581782,0.689279,0.58255,0.303154,0.398784,0.716353,0.19563,0.0,0.028746,0.224376,0.59564,0.655823,0.406504,0.027189,0.050968,0.486579,0.306495,0.082836,0.000717,0.224376,0.503373,0.514233,0.399027,0.847743,0.542638,0.612411,0.314735,0.101476,0.011118,0.224376,0.595112,0.688447,0.562171,0.377379,0.451602,0.709858,0.199028,0.002694,0.028042,0.224376,0.61301
1000,0.5755,0.567378,0.689279,0.58255,0.303154,0.398784,0.716353,0.19563,0.0,0.028746,0.224376,0.59564,0.655823,0.406504,0.027189,0.050968,0.486579,0.306495,0.082836,0.000717,0.224376,0.503373,0.514233,0.399027,0.847743,0.542638,0.612411,0.314735,0.101476,0.011118,0.224376,0.595112,0.69159,0.582968,0.325721,0.417931,0.725471,0.194135,0.00207,0.032312,0.224376,0.602863
1500,0.5572,0.556022,0.689279,0.58255,0.303154,0.398784,0.716353,0.19563,0.0,0.028746,0.224376,0.59564,0.655823,0.406504,0.027189,0.050968,0.486579,0.306495,0.082836,0.000717,0.224376,0.503373,0.514233,0.399027,0.847743,0.542638,0.612411,0.314735,0.101476,0.011118,0.224376,0.595112,0.694177,0.546392,0.59081,0.567734,0.747526,0.189998,0.004729,0.039107,0.224376,0.66911
2000,0.5405,0.538272,0.689279,0.58255,0.303154,0.398784,0.716353,0.19563,0.0,0.028746,0.224376,0.59564,0.655823,0.406504,0.027189,0.050968,0.486579,0.306495,0.082836,0.000717,0.224376,0.503373,0.514233,0.399027,0.847743,0.542638,0.612411,0.314735,0.101476,0.011118,0.224376,0.595112,0.716543,0.586421,0.563622,0.574796,0.764394,0.182845,0.005214,0.046744,0.224376,0.679458
2500,0.5127,0.528503,0.689279,0.58255,0.303154,0.398784,0.716353,0.19563,0.0,0.028746,0.224376,0.59564,0.655823,0.406504,0.027189,0.050968,0.486579,0.306495,0.082836,0.000717,0.224376,0.503373,0.514233,0.399027,0.847743,0.542638,0.612411,0.314735,0.101476,0.011118,0.224376,0.595112,0.726525,0.661718,0.399946,0.49856,0.772712,0.177951,0.00276,0.049185,0.224376,0.647326
3000,0.4919,0.526664,0.689279,0.58255,0.303154,0.398784,0.716353,0.19563,0.0,0.028746,0.224376,0.59564,0.655823,0.406504,0.027189,0.050968,0.486579,0.306495,0.082836,0.000717,0.224376,0.503373,0.514233,0.399027,0.847743,0.542638,0.612411,0.314735,0.101476,0.011118,0.224376,0.595112,0.721165,0.595936,0.558184,0.576443,0.774014,0.178843,0.004729,0.050263,0.224376,0.68164
3500,0.4641,0.599032,0.689279,0.58255,0.303154,0.398784,0.716353,0.19563,0.0,0.028746,0.224376,0.59564,0.655823,0.406504,0.027189,0.050968,0.486579,0.306495,0.082836,0.000717,0.224376,0.503373,0.514233,0.399027,0.847743,0.542638,0.612411,0.314735,0.101476,0.011118,0.224376,0.595112,0.707763,0.564922,0.610386,0.586775,0.763033,0.199564,0.022995,0.047808,0.224376,0.684149
4000,0.4179,0.584644,0.689279,0.58255,0.303154,0.398784,0.716353,0.19563,0.0,0.028746,0.224376,0.59564,0.655823,0.406504,0.027189,0.050968,0.486579,0.306495,0.082836,0.000717,0.224376,0.503373,0.514233,0.399027,0.847743,0.542638,0.612411,0.314735,0.101476,0.011118,0.224376,0.595112,0.71451,0.579702,0.582382,0.581039,0.768849,0.192609,0.017441,0.049208,0.224376,0.682468
4500,0.3619,0.656968,0.689279,0.58255,0.303154,0.398784,0.716353,0.19563,0.0,0.028746,0.224376,0.59564,0.655823,0.406504,0.027189,0.050968,0.486579,0.306495,0.082836,0.000717,0.224376,0.503373,0.514233,0.399027,0.847743,0.542638,0.612411,0.314735,0.101476,0.011118,0.224376,0.595112,0.707856,0.578262,0.519304,0.5472,0.74974,0.206349,0.026347,0.044374,0.224376,0.66213
5000,0.3262,0.697693,0.689279,0.58255,0.303154,0.398784,0.716353,0.19563,0.0,0.028746,0.224376,0.59564,0.655823,0.406504,0.027189,0.050968,0.486579,0.306495,0.082836,0.000717,0.224376,0.503373,0.514233,0.399027,0.847743,0.542638,0.612411,0.314735,0.101476,0.011118,0.224376,0.595112,0.710074,0.581846,0.522838,0.550766,0.750913,0.210653,0.030592,0.044315,0.224376,0.664668


The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10820


  Batch size = 32


Saving model checkpoint to architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-500


Configuration saved in architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-500/config.json


Model weights saved in architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-500/pytorch_model.bin




The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10820


  Batch size = 32


Saving model checkpoint to architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-1000


Configuration saved in architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-1000/config.json


Model weights saved in architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-1000/pytorch_model.bin


Deleting older checkpoint [architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-500] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10820


  Batch size = 32


Saving model checkpoint to architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-1500


Configuration saved in architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-1500/config.json


Model weights saved in architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-1500/pytorch_model.bin


Deleting older checkpoint [architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-1000] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10820


  Batch size = 32


Saving model checkpoint to architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-2000


Configuration saved in architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-2000/config.json


Model weights saved in architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-2000/pytorch_model.bin


Deleting older checkpoint [architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-1500] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10820


  Batch size = 32


Saving model checkpoint to architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-2500


Configuration saved in architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-2500/config.json


Model weights saved in architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-2500/pytorch_model.bin


Deleting older checkpoint [architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-2000] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10820


  Batch size = 32


Saving model checkpoint to architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-3000


Configuration saved in architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-3000/config.json


Model weights saved in architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-3000/pytorch_model.bin


Deleting older checkpoint [architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-2500] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10820


  Batch size = 32


Saving model checkpoint to architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-3500


Configuration saved in architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-3500/config.json


Model weights saved in architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-3500/pytorch_model.bin




The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10820


  Batch size = 32


Saving model checkpoint to architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-4000


Configuration saved in architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-4000/config.json


Model weights saved in architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-4000/pytorch_model.bin


Deleting older checkpoint [architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-3500] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10820


  Batch size = 32


Saving model checkpoint to architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-4500


Configuration saved in architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-4500/config.json


Model weights saved in architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-4500/pytorch_model.bin


Deleting older checkpoint [architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-4000] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10820


  Batch size = 32


Saving model checkpoint to architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-5000


Configuration saved in architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-5000/config.json


Model weights saved in architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-5000/pytorch_model.bin


Deleting older checkpoint [architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-4500] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10820


  Batch size = 32


Saving model checkpoint to architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-5500


Configuration saved in architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-5500/config.json


Model weights saved in architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-5500/pytorch_model.bin


Deleting older checkpoint [architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-5000] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10820


  Batch size = 32


Saving model checkpoint to architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-6000


Configuration saved in architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-6000/config.json


Model weights saved in architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-6000/pytorch_model.bin


Deleting older checkpoint [architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-5500] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10820


  Batch size = 32


Saving model checkpoint to architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-6500


Configuration saved in architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-6500/config.json


Model weights saved in architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-6500/pytorch_model.bin


Deleting older checkpoint [architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-6000] due to args.save_total_limit




wandb: Network error (ReadTimeout), entering retry loop.


wandb: Network error (ReadTimeout), entering retry loop.


The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10820


  Batch size = 32


Saving model checkpoint to architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-7000


Configuration saved in architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-7000/config.json


Model weights saved in architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-7000/pytorch_model.bin


Deleting older checkpoint [architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-6500] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10820


  Batch size = 32


Saving model checkpoint to architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-7500


Configuration saved in architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-7500/config.json


Model weights saved in architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-7500/pytorch_model.bin


Deleting older checkpoint [architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-7000] due to args.save_total_limit




The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10820


  Batch size = 32


Saving model checkpoint to architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-8000


Configuration saved in architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-8000/config.json


Model weights saved in architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-8000/pytorch_model.bin


Deleting older checkpoint [architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-7500] due to args.save_total_limit






Training completed. Do not forget to share your model on huggingface.co/models =)




Loading best model from architectures/deberta-bs16*2-0sh-instance-split-07291344/checkpoint-3000 (score: 0.5266638398170471).


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/accuracy,▁▂▂▆█▇▅▆▅▅▅▄▅▅▃▄
eval/balanced_accuracy,▂▁▇█▅███▆▆▆▇▇▇▇▇
eval/bs,▃▃▂▁▁▁▃▂▄▄▅▅▆▇██
eval/bs_dsc,▁▂▄▇██▇█▆▆▅▆▅▅▄▅
eval/bs_mcb,▁▁▁▁▁▁▃▃▄▄▅▅▆▇██
eval/bs_unc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/conf_absolute_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/conf_absolute_balanced_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/conf_absolute_bs,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/conf_absolute_bs_dsc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/accuracy,0.70333
eval/balanced_accuracy,0.67736
eval/bs,0.24482
eval/bs_dsc,0.03926
eval/bs_mcb,0.05971
eval/bs_unc,0.22438
eval/conf_absolute_accuracy,0.65582
eval/conf_absolute_balanced_accuracy,0.50337
eval/conf_absolute_bs,0.30649
eval/conf_absolute_bs_dsc,0.00072


In [6]:
# @dataclass
# class Assessor():
#   name: str
#   path: Path

# BASE = Path("../artifacts/assessors/")
# transformers.logging.set_verbosity_error() # type: ignore
# tokenizers = {
#   "bert": AutoTokenizer.from_pretrained("bert-base-cased"),
#   "roberta": AutoTokenizer.from_pretrained("roberta-base"),
#   "albert": AutoTokenizer.from_pretrained("albert-base-v2"),
#   "gpt2": AutoTokenizer.from_pretrained("gpt2"),
# }
# tokenizers["gpt2"].pad_token = tokenizers["gpt2"].eos_token

# assessors = [
#   Assessor("gpt2", BASE / "gpt2-bs8-0sh-task-split/checkpoint-5500"),
#   Assessor("bert", BASE / "bert-bs32-0sh-task-split/checkpoint-1500"),
#   Assessor("roberta", BASE / "roberta-bs32-0sh-task-split/checkpoint-1500"),
#   Assessor("albert", BASE / "albert-bs32-0sh-task-split/checkpoint-1500"),
# ]

# compute_metrics = lass.metrics.hf.get_metric_computer([
#   "accuracy",
#   "precision",
#   "recall",
#   "f1",
#   "roc_auc",
#   "brier_score",
# ])

# results = []
# for assessor in assessors:
#   transformers.logging.set_verbosity_error() # type: ignore
#   model = AutoModelForSequenceClassification.from_pretrained(assessor.path)

#   # Tokenize according to specific model tokenizer
#   tokenizer = tokenizers[assessor.name]
#   def tokenize(examples):
#     return tokenizer(examples["text"], padding="max_length", truncation=True, return_tensors="np")
#   tokenized_datasets = dataset.map(tokenize, batched=True)

#   # Dummy Trainer for easy batched predictions
#   dummy_trainer = Trainer(model=model, compute_metrics=compute_metrics)

#   predictions, labels, metrics = dummy_trainer.predict(tokenized_datasets['test']) #type: ignore
#   print(metrics)
#   assert metrics is not None

#   metrics = {key: metrics[f"test_{key}"] for key in ["accuracy", "brier_score", "roc_auc"]}
#   results.append({"model": assessor.name, **metrics})