In [1]:
import argparse
import json
import math
import os
import random
import numpy as np

import nltk
import datasets
import evaluate
import torch
import torch.nn as nn
from accelerate import Accelerator
#from accelerate.logging import get_logger
from accelerate.utils import set_seed
from datasets import load_dataset
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
from typing import List, Optional, Union
from dataclasses import dataclass, field
import wandb

import transformers
from transformers import (
    AutoModelForSeq2SeqLM,
    AutoTokenizer,
    SchedulerType,
    get_scheduler,
    set_seed,
    DataCollatorForSeq2Seq,
)
from peft import (
    PromptTuningConfig,
    PrefixTuningConfig,
    LoraConfig,
    IA3Config,
    get_peft_model,
    PromptTuningInit,
    PromptTuningConfig,
    TaskType,
)
from transformers.utils.versions import require_version
from loguru import logger

import sys
sys.path.append('./../scripts')
import data_utils 
import finetuning_seq2seq 

  from .autonotebook import tqdm as notebook_tqdm


[2023-09-20 14:12:34,075] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)


In [None]:
args = finetuning_seq2seq.parse_args()
args.peft_method = "lora"
args.task_type = "classification"
args.dataset_name = "super_glue"
args.dataset_config_name = "boolq"
args.source_prefix = ""
args.num_beams = 1
accelerator = Accelerator()
model, tokenizer, config = finetuning_seq2seq.get_model(args)

#
args, model, tokenizer, accelerator, logger, train_dataloader, eval_dataloader = data_utils.preprocess_data(
        args=args,
        model=model,
        tokenizer=tokenizer,
        accelerator=accelerator,
        logger=logger,
    )

In [3]:
model.eval()
metric = evaluate.load(args.dataset_name, args.dataset_config_name)
gen_kwargs = {
    "max_length": args.val_max_target_length if args.val_max_target_length else args.max_target_length,
    "min_length": 1,
    "num_beams": args.num_beams,
}
for step_eval, batch in enumerate(eval_dataloader):
    print(step_eval)
    with torch.no_grad():
        unwrapped_model = accelerator.unwrap_model(model)
        generated_tokens = unwrapped_model.generate(
            input_ids=batch["input_ids"],
            attention_mask=batch["attention_mask"],
            **gen_kwargs,
        )

        generated_tokens = accelerator.pad_across_processes(
            generated_tokens, dim=1, pad_index=tokenizer.pad_token_id
        )
        labels = batch["labels"]
        if not args.pad_to_max_length:
            # If we did not pad to max length, we need to pad the labels too
            labels = accelerator.pad_across_processes(batch["labels"], dim=1, pad_index=tokenizer.pad_token_id)

        generated_tokens, labels = accelerator.gather_for_metrics((generated_tokens, labels))
        generated_tokens = generated_tokens.cpu().numpy()
        labels = labels.cpu().numpy()

        if args.ignore_pad_token_for_loss:
            # Replace -100 in the labels as we can't decode them.
            labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
        if isinstance(generated_tokens, tuple):
            generated_tokens = generated_tokens[0]
        decoded_preds = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
        decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
        decoded_preds, decoded_labels = data_utils.postprocess_classification(decoded_preds, decoded_labels, args.dataset_config_name)

        metric.add_batch(
            predictions=decoded_preds,
            references=decoded_labels,
        )

    if (step_eval > 1):
        break

result = metric.compute()#use_stemmer=True)
result = {k: round(v * 100, 4) for k, v in result.items()}
logger.info(result)

0
false
['false', 'true']
false
['false', 'true']
false
['false', 'true']
true
['false', 'true']
1
true
['false', 'true']
true
['false', 'true']
true
['false', 'true']
true
['false', 'true']
2


[32m2023-09-20 14:25:36.388[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m49[0m - [1m{'accuracy': 66.6667}[0m


false
['false', 'true']
true
['false', 'true']
false
['false', 'true']
false
['false', 'true']


In [None]:
decoded_labels

In [None]:
labels

In [None]:
generated_tokens

In [None]:
str_labels = tokenizer.batch_decode(labels)
str_preds = tokenizer.batch_decode(generated_tokens)

print(str_labels)
print(str_preds)

In [None]:
def postprocess_classification_(preds, labels, dataset_config_name=None):


    #
    pred_ids, label_ids = [], []
    for idx, pred in enumerate(preds):
        pred_id = string_label_to_class_id(
            string_label=pred.lower(), 
            label_classes=label_names_mapping[dataset_config_name]
            )
        label_id = string_label_to_class_id(
            string_label=labels[idx].lower(), 
            label_classes=label_names_mapping[dataset_config_name]
            )
        pred_ids.append(pred_id)
        label_ids.append(label_id)
    
    return pred_ids, label_ids

In [None]:
decoded_preds, decoded_labels = data_utils.postprocess_classification(str_preds, str_labels, args.dataset_config_name)

In [None]:
decoded_labels