In [1]:
%load_ext autoreload
%autoreload 2
%env CUDA_DEVICE_ORDER=PCI_BUS_ID
%env CUDA_VISIBLE_DEVICES=1

env: CUDA_DEVICE_ORDER=PCI_BUS_ID
env: CUDA_VISIBLE_DEVICES=1


In [2]:
from typing import *
from dataclasses import dataclass
import shutil

import pandas as pd

import lass.train
import lass.test
import lass.datasets
from lass.log_handling import PaperTasks, LogIssues, LoaderArgs, LogLoader

import transformers

transformers.logging.set_verbosity_warning() # type: ignore

In [3]:
@dataclass
class Architecture():
    name: str
    name_short: str
    batch_size: int
    gradient_accumulation_steps: int

# architecture = Architecture(
#     name="albert-base-v2",
#     name_short="albert",
#     batch_size=32,
#     gradient_accumulation_steps=1,
# )

architecture = Architecture(
    name="microsoft/deberta-v3-base",
    name_short="deberta",
    batch_size=16,
    gradient_accumulation_steps=2,
)

In [4]:
loader_args = LoaderArgs(
    logdir="../artifacts/logs",
    tasks='paper-full',
    model_families=["BIG-G T=0"],
    model_sizes=["128b"],
    shots=[0],
    query_types=["multiple_choice"],
)

loader = LogLoader(loader_args)
data = lass.datasets.to_dataframe(loader)
nonempty_tasks = data.task.unique().tolist()

In [None]:
tasks = [task for task in PaperTasks.full() if task in nonempty_tasks]
assert "ascii_word_recognition" not in tasks

results: Dict[str, Dict[str, Any]]=  {} # Dict[task, Dict[metric, value]]
for i, task in enumerate(tasks):
    print("----------------------------------------------------")
    print("----------------------------------------------------")
    print("----------------------------------------------------")
    print(f"Task: {task} ({i+1}/{len(tasks)})")
    print("----------------------------------------------------")
    print("----------------------------------------------------")
    print("----------------------------------------------------")

    task_loader_args = LoaderArgs(**(loader_args.__dict__ | {'tasks': [task]}))

    model = lass.train.train(
        data_args=task_loader_args,
        group="task-level-assessors",
        split="instance",
        model_name=architecture.name,
        model_name_short=f"{task}-{architecture.name_short}",
        batch_size=architecture.batch_size,
        gradient_accumulation_steps=architecture.gradient_accumulation_steps,
        include_model_in_input=False,
        include_n_targets_in_input=False,
        output_dir=f"task-level-assessors/{task}",
        n_epochs=6,
        extra_training_args={
            "evaluation_strategy": "epoch",
            "save_strategy": "epoch",
            "logging_strategy": "epoch",
            "learning_rate": 2e-5,
        },
        # is_test_run=True,
    )

    results_ = lass.test.test(
        data_args=task_loader_args,
        split = 'instance',
        model_loc=model,
        model_name=architecture.name,
        max_sequence_length = 512,
    )
    results[task] = results_['metrics']
    results[task]['count'] = len(results_['test'])

    df = pd.DataFrame.from_dict(results, orient='index')
    df.to_csv("task-level-assessors.csv")

    shutil.rmtree(f"task-level-assessors/{task}")