In [1]:
%load_ext autoreload
%autoreload 2
%env CUDA_DEVICE_ORDER=PCI_BUS_ID
%env CUDA_VISIBLE_DEVICES=0

env: CUDA_DEVICE_ORDER=PCI_BUS_ID
env: CUDA_VISIBLE_DEVICES=0


In [2]:
from typing import *
from dataclasses import dataclass
import shutil

import pandas as pd

import lass.train
import lass.test
import lass.datasets
from lass.log_handling import PaperTasks, LogIssues, LogLoaderArgs, LogLoader

import transformers

transformers.logging.set_verbosity_warning() # type: ignore

## Find non-empty tasks


In [3]:
loader_args = LogLoaderArgs(
    logdir="../artifacts/logs",
    tasks='paper-full',
    model_families=["BIG-G T=0"],
    model_sizes=["128b"],
    shots=[0],
    query_types=["multiple_choice"],
)

loader = LogLoader(loader_args)
data = lass.datasets.to_dataframe(loader)
nonempty_tasks = data.task.unique().tolist()
non_empty_tasks = [task for task in PaperTasks.full() if task in nonempty_tasks]
assert "ascii_word_recognition" not in non_empty_tasks

In [4]:
@dataclass
class Architecture():
    name: str
    name_short: str
    batch_size: int
    gradient_accumulation_steps: int

architecture = Architecture(
    name="microsoft/deberta-v3-base",
    name_short="deberta",
    batch_size=16,
    gradient_accumulation_steps=2,
)

MODEL_SIZES = {
    # "2m": 2098048,
    "16m": 16780288,
    "53m": 56629632,
    "125m": 134228992,
    "244m": 262161280,
    "422m": 453009408,
    "1b": 1073784832,
    "2b": 2097218560,
    "4b": 3623973888,
    "8b": 8590102528,
    "27b": 28991404032,
    "128b": 137440272384,
}

## Train An Assessor Model for All Different BIG-G Sizes

In [5]:
for size, size_precise in MODEL_SIZES.items():
    data_args = LogLoaderArgs(
        logdir="../artifacts/logs",
        tasks='paper-full',
        model_families=["BIG-G T=0"],
        model_sizes=[size],
        shots=[3],
        query_types=["multiple_choice"],
    )

    model = lass.train.train(
        data_args=data_args,
        group="scale-relation",
        split="instance",
        model_name=architecture.name,
        model_name_short=f"{architecture.name_short}-for-{size}",
        batch_size=architecture.batch_size,
        gradient_accumulation_steps=architecture.gradient_accumulation_steps,
        include_model_in_input=False,
        include_n_targets_in_input=False,
        output_dir="scaling",
        n_epochs=8,
        extra_training_args={
            "warmup_steps": 3000,
            "learning_rate": 2e-5,
        },
        # is_test_run=True,
    )

    results = {}

    # Metrics in total
    results_ = lass.test.test(
            data_args=data_args,
            split = 'instance',
            model_loc=model,
            model_name=architecture.name,
            max_sequence_length = 512,
    )
    results['_total'] = results_['metrics']
    results['_total']['count'] = len(results_['test'])

    print("Tested on everything")

    # Metrics per task
    for task in non_empty_tasks:
        task_data_args = LogLoaderArgs(**(loader_args.__dict__ | {'tasks': [task]}))
        results_ = lass.test.test(
            data_args=task_data_args,
            split = 'instance',
            model_loc=model,
            model_name=architecture.name,
            max_sequence_length = 512,
        )
        results[task] = results_['metrics']
        results[task]['count'] = len(results_['test'])

        df = pd.DataFrame.from_dict(results, orient='index')
        df.to_csv(f"scaling/{size}.csv")
        print(f"Tested on {task}")

    print("Tested on all tasks")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[:, 'correct'] = df['correct'].astype(int)


{'text': 'In what follows, we provide short narratives, each of which illustrates a common proverb. \nNarrative: The anxiety about getting the injection made her unable to sleep that night, but the actual experience wasn\'t as bad as she thought it would be.\nThis narrative is a good illustration of the following proverb: Don\'t meet troubles half-way\n\nNarrative: Inmate 76853 lay on the execution gurney ready to utter his last words before lethal drugs ended his life.  He looked through the mirror into the witness room and said to his family and his victim\'s family: "I killed Victoria.  It was wrong.  I can\'t take it back.  I earned my punishment, and I accept it."  Inmate 76853 turned his head back to stare at the stark ceiling and paid his debt with his life.\nThis narrative is a good illustration of the following proverb: The wages of sin is death\n\nNarrative: "Oil change? Why would I want that Tim told the auto tech. Tim was convinced the mechanic was just trying to figure out

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.




  0%|          | 0/44 [00:00<?, ?ba/s]

  0%|          | 0/11 [00:00<?, ?ba/s]

ERROR:wandb.jupyter:Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


[34m[1mwandb[0m: Currently logged in as: [33mwschella[0m. Use [1m`wandb login --relogin`[0m to force relogin






Some weights of the model checkpoint at microsoft/deberta-v3-base were not used when initializing DebertaV2ForSequenceClassification: ['mask_predictions.classifier.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'mask_predictions.dense.bias', 'mask_predictions.LayerNorm.bias', 'mask_predictions.classifier.weight', 'lm_predictions.lm_head.dense.bias', 'mask_predictions.LayerNorm.weight', 'mask_predictions.dense.weight', 'lm_predictions.lm_head.bias', 'lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.dense.weight']
- This IS expected if you are initializing DebertaV2ForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2ForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.weight', 'classifier.bias', 'pooler.dense.weight', 'pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


The following columns in the training set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running training *****


  Num examples = 43236


  Num Epochs = 8


  Instantaneous batch size per device = 16


  Total train batch size (w. parallel, distributed & accumulation) = 32


  Gradient Accumulation steps = 2


  Total optimization steps = 10808


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


Step,Training Loss,Validation Loss,Conf Distribution Accuracy,Conf Distribution Precision,Conf Distribution Recall,Conf Distribution F1,Conf Distribution Roc Auc,Conf Distribution Bs,Conf Distribution Bs Mcb,Conf Distribution Bs Dsc,Conf Distribution Bs Unc,Conf Distribution Balanced Accuracy,Conf Absolute Accuracy,Conf Absolute Precision,Conf Absolute Recall,Conf Absolute F1,Conf Absolute Roc Auc,Conf Absolute Bs,Conf Absolute Bs Mcb,Conf Absolute Bs Dsc,Conf Absolute Bs Unc,Conf Absolute Balanced Accuracy,Conf Normalized Accuracy,Conf Normalized Precision,Conf Normalized Recall,Conf Normalized F1,Conf Normalized Roc Auc,Conf Normalized Bs,Conf Normalized Bs Mcb,Conf Normalized Bs Dsc,Conf Normalized Bs Unc,Conf Normalized Balanced Accuracy,Accuracy,Precision,Recall,F1,Roc Auc,Bs,Bs Mcb,Bs Dsc,Bs Unc,Balanced Accuracy
500,0.6248,0.587989,0.695138,0.558756,0.276985,0.37037,0.715627,0.191582,0.0,0.027343,0.218925,0.586141,0.671289,0.482692,0.21502,0.297511,0.585515,0.258289,0.046963,0.0076,0.218925,0.552357,0.49584,0.379178,0.874643,0.529016,0.631065,0.344322,0.137149,0.011753,0.218925,0.59458,0.67628,0.0,0.0,0.0,0.682198,0.200811,0.002915,0.021029,0.218925,0.5
1000,0.5805,0.578786,0.695138,0.558756,0.276985,0.37037,0.715627,0.191582,0.0,0.027343,0.218925,0.586141,0.671289,0.482692,0.21502,0.297511,0.585515,0.258289,0.046963,0.0076,0.218925,0.552357,0.49584,0.379178,0.874643,0.529016,0.631065,0.344322,0.137149,0.011753,0.218925,0.59458,0.674986,0.498058,0.512564,0.505207,0.697789,0.19811,0.002932,0.023748,0.218925,0.632649
1500,0.5761,0.575302,0.695138,0.558756,0.276985,0.37037,0.715627,0.191582,0.0,0.027343,0.218925,0.586141,0.671289,0.482692,0.21502,0.297511,0.585515,0.258289,0.046963,0.0076,0.218925,0.552357,0.49584,0.379178,0.874643,0.529016,0.631065,0.344322,0.137149,0.011753,0.218925,0.59458,0.680163,0.510682,0.286693,0.367228,0.698215,0.196428,0.001938,0.024435,0.218925,0.5776
2000,0.5753,0.57262,0.695138,0.558756,0.276985,0.37037,0.715627,0.191582,0.0,0.027343,0.218925,0.586141,0.671289,0.482692,0.21502,0.297511,0.585515,0.258289,0.046963,0.0076,0.218925,0.552357,0.49584,0.379178,0.874643,0.529016,0.631065,0.344322,0.137149,0.011753,0.218925,0.59458,0.67665,0.500633,0.451742,0.474932,0.701915,0.195855,0.002635,0.025705,0.218925,0.618025
2500,0.5697,0.582962,0.695138,0.558756,0.276985,0.37037,0.715627,0.191582,0.0,0.027343,0.218925,0.586141,0.671289,0.482692,0.21502,0.297511,0.585515,0.258289,0.046963,0.0076,0.218925,0.552357,0.49584,0.379178,0.874643,0.529016,0.631065,0.344322,0.137149,0.011753,0.218925,0.59458,0.684507,0.582868,0.089377,0.154989,0.701874,0.199268,0.00566,0.025317,0.218925,0.52938
3000,0.5763,0.567766,0.695138,0.558756,0.276985,0.37037,0.715627,0.191582,0.0,0.027343,0.218925,0.586141,0.671289,0.482692,0.21502,0.297511,0.585515,0.258289,0.046963,0.0076,0.218925,0.552357,0.49584,0.379178,0.874643,0.529016,0.631065,0.344322,0.137149,0.011753,0.218925,0.59458,0.6846,0.519363,0.34466,0.414349,0.706048,0.193715,0.001238,0.026449,0.218925,0.595991
3500,0.5663,0.571959,0.695138,0.558756,0.276985,0.37037,0.715627,0.191582,0.0,0.027343,0.218925,0.586141,0.671289,0.482692,0.21502,0.297511,0.585515,0.258289,0.046963,0.0076,0.218925,0.552357,0.49584,0.379178,0.874643,0.529016,0.631065,0.344322,0.137149,0.011753,0.218925,0.59458,0.679053,0.505102,0.424043,0.461037,0.703463,0.194533,0.001769,0.026161,0.218925,0.612582
4000,0.5672,0.572652,0.695138,0.558756,0.276985,0.37037,0.715627,0.191582,0.0,0.027343,0.218925,0.586141,0.671289,0.482692,0.21502,0.297511,0.585515,0.258289,0.046963,0.0076,0.218925,0.552357,0.49584,0.379178,0.874643,0.529016,0.631065,0.344322,0.137149,0.011753,0.218925,0.59458,0.682104,0.508675,0.527413,0.517875,0.708943,0.194911,0.002684,0.026699,0.218925,0.641782
4500,0.56,0.571785,0.695138,0.558756,0.276985,0.37037,0.715627,0.191582,0.0,0.027343,0.218925,0.586141,0.671289,0.482692,0.21502,0.297511,0.585515,0.258289,0.046963,0.0076,0.218925,0.552357,0.49584,0.379178,0.874643,0.529016,0.631065,0.344322,0.137149,0.011753,0.218925,0.59458,0.685339,0.523626,0.310109,0.389527,0.706044,0.195557,0.002762,0.026131,0.218925,0.587531
5000,0.5634,0.573841,0.695138,0.558756,0.276985,0.37037,0.715627,0.191582,0.0,0.027343,0.218925,0.586141,0.671289,0.482692,0.21502,0.297511,0.585515,0.258289,0.046963,0.0076,0.218925,0.552357,0.49584,0.379178,0.874643,0.529016,0.631065,0.344322,0.137149,0.011753,0.218925,0.59458,0.680163,0.505833,0.519989,0.512813,0.707956,0.195542,0.003315,0.026698,0.218925,0.638411


The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10818


  Batch size = 16


  _warn_prf(average, modifier, msg_start, len(result))


Saving model checkpoint to scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-500


Configuration saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-500/config.json


Model weights saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-500/pytorch_model.bin


The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10818


  Batch size = 16


Saving model checkpoint to scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-1000


Configuration saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-1000/config.json


Model weights saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-1000/pytorch_model.bin


Deleting older checkpoint [scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-500] due to args.save_total_limit


The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10818


  Batch size = 16


Saving model checkpoint to scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-1500


Configuration saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-1500/config.json


Model weights saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-1500/pytorch_model.bin


Deleting older checkpoint [scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-1000] due to args.save_total_limit


The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10818


  Batch size = 16


Saving model checkpoint to scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-2000


Configuration saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-2000/config.json


Model weights saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-2000/pytorch_model.bin


Deleting older checkpoint [scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-1500] due to args.save_total_limit


The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10818


  Batch size = 16


Saving model checkpoint to scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-2500


Configuration saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-2500/config.json


Model weights saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-2500/pytorch_model.bin


The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10818


  Batch size = 16


Saving model checkpoint to scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-3000


Configuration saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-3000/config.json


Model weights saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-3000/pytorch_model.bin


Deleting older checkpoint [scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-2000] due to args.save_total_limit


Deleting older checkpoint [scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-2500] due to args.save_total_limit


The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10818


  Batch size = 16


Saving model checkpoint to scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-3500


Configuration saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-3500/config.json


Model weights saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-3500/pytorch_model.bin


The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10818


  Batch size = 16


Saving model checkpoint to scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-4000


Configuration saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-4000/config.json


Model weights saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-4000/pytorch_model.bin


Deleting older checkpoint [scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-3500] due to args.save_total_limit


The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10818


  Batch size = 16


Saving model checkpoint to scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-4500


Configuration saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-4500/config.json


Model weights saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-4500/pytorch_model.bin


Deleting older checkpoint [scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-4000] due to args.save_total_limit


The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10818


  Batch size = 16


Saving model checkpoint to scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-5000


Configuration saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-5000/config.json


Model weights saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-5000/pytorch_model.bin


Deleting older checkpoint [scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-4500] due to args.save_total_limit


The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10818


  Batch size = 16


Saving model checkpoint to scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-5500


Configuration saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-5500/config.json


Model weights saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-5500/pytorch_model.bin


Deleting older checkpoint [scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-5000] due to args.save_total_limit


The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10818


  Batch size = 16


Saving model checkpoint to scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-6000


Configuration saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-6000/config.json


Model weights saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-6000/pytorch_model.bin


Deleting older checkpoint [scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-5500] due to args.save_total_limit


The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10818


  Batch size = 16


Saving model checkpoint to scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-6500


Configuration saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-6500/config.json


Model weights saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-6500/pytorch_model.bin


Deleting older checkpoint [scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-6000] due to args.save_total_limit


The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10818


  Batch size = 16


Saving model checkpoint to scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-7000


Configuration saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-7000/config.json


Model weights saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-7000/pytorch_model.bin


Deleting older checkpoint [scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-6500] due to args.save_total_limit


The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10818


  Batch size = 16


Saving model checkpoint to scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-7500


Configuration saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-7500/config.json


Model weights saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-7500/pytorch_model.bin


Deleting older checkpoint [scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-7000] due to args.save_total_limit


The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10818


  Batch size = 16


Saving model checkpoint to scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-8000


Configuration saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-8000/config.json


Model weights saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-8000/pytorch_model.bin


Deleting older checkpoint [scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-7500] due to args.save_total_limit


The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10818


  Batch size = 16


Saving model checkpoint to scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-8500


Configuration saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-8500/config.json


Model weights saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-8500/pytorch_model.bin


Deleting older checkpoint [scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-8000] due to args.save_total_limit


The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10818


  Batch size = 16


Saving model checkpoint to scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-9000


Configuration saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-9000/config.json


Model weights saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-9000/pytorch_model.bin


Deleting older checkpoint [scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-8500] due to args.save_total_limit


The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10818


  Batch size = 16


Saving model checkpoint to scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-9500


Configuration saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-9500/config.json


Model weights saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-9500/pytorch_model.bin


Deleting older checkpoint [scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-9000] due to args.save_total_limit


The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10818


  Batch size = 16


Saving model checkpoint to scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-10000


Configuration saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-10000/config.json


Model weights saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-10000/pytorch_model.bin


Deleting older checkpoint [scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-9500] due to args.save_total_limit


The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


***** Running Evaluation *****


  Num examples = 10818


  Batch size = 16


Saving model checkpoint to scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-10500


Configuration saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-10500/config.json


Model weights saved in scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-10500/pytorch_model.bin


Deleting older checkpoint [scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-10000] due to args.save_total_limit




Training completed. Do not forget to share your model on huggingface.co/models =)




Loading best model from scaling/deberta-for-16m-bs16*2-3sh-instance-split-10191710/checkpoint-3000 (score: 0.5677661299705505).


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/accuracy,▅▄▆▅██▆▇█▆▆▅▆▁▁▅▃▂▂▃▁
eval/balanced_accuracy,▁█▅▇▂▆▇█▅█▇▇▇▆▇▆▆▆▆▆▆
eval/bs,▂▂▁▁▂▁▁▁▁▁▁▂▂▄▄▄▅▆▇██
eval/bs_dsc,▄▆▆▇▇█████▇▇▇▄▄▃▂▂▂▁▁
eval/bs_mcb,▁▁▁▁▂▁▁▁▁▁▁▂▂▄▄▄▅▅▇██
eval/bs_unc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/conf_absolute_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/conf_absolute_balanced_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/conf_absolute_bs,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/conf_absolute_bs_dsc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/accuracy,0.66639
eval/balanced_accuracy,0.60709
eval/bs,0.23968
eval/bs_dsc,0.01618
eval/bs_mcb,0.03694
eval/bs_unc,0.21893
eval/conf_absolute_accuracy,0.67129
eval/conf_absolute_balanced_accuracy,0.55236
eval/conf_absolute_bs,0.25829
eval/conf_absolute_bs_dsc,0.0076


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[:, 'correct'] = df['correct'].astype(int)
