# modeling11-nli-binaryclassifier-modeling-from-scratch-prediction
- making prediction script for binary_classifier_predict.py

In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICES']='1'

In [None]:
from accelerate import Accelerator
from accelerate.logging import get_logger
from accelerate.utils import set_seed
import heapq
import pathlib
import shutil

from pprint import pprint
from tqdm.auto import tqdm
from src.data import (
    BinaryCustomDatasetShuffle,
    BinarySentenceDataset,
    BinaryCustomDatasetDecisiveBinaryGold,
)

import json
import math
import os
import logging
import sys
import evaluate
from util import utils

import transformers
import torch
import numpy as np
import random
from torch import nn
from torch.utils.data import Dataset, DataLoader
from transformers import (
    AutoModelForSequenceClassification,
    AutoModel,
    AutoConfig,
    AutoTokenizer,
    DataCollatorWithPadding,
    EvalPrediction,
    HfArgumentParser,
    set_seed,
    get_scheduler,
)
from util.arguments import ModelArguments, DataTrainingArguments, CustomTrainingArguments
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss

NEW_LINE = "\n"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

DATASET_MAPPING = {
    "BinaryCustomDatasetShuffle" : BinaryCustomDatasetShuffle,
    "BinarySentenceDataset" : BinarySentenceDataset,
    'BinaryCustomDatasetDecisiveBinaryGold' : BinaryCustomDatasetDecisiveBinaryGold,
}

In [None]:
# #!/bin/bash

# #echo $SLURM_ARRAY_TASK_ID
# #export i=$SLURM_ARRAY_TASK_ID
# #echo $CUDA_VISIBLE_DEVICES
# export gpu_=$CUDA_VISIBLE_DEVICES

# # decisive_binary_gold_data
# CUDA_VISIBLE_DEVICES="$gpu_" python binary_classifier_predict.py \
# --do_train False \
# --do_eval True \
# --do_predict True \
# --per_device_eval_batch_size 128 \
# --prediction_model_name_or_path /data/philhoon-relevance/binary-classification/results/NQ-DEV-DPR/5-fold/1/roberta-decisive_binary_gold_data_trial1 \
# --prediction_model_step 380 \
# --max_seq_length 200 \
# --eval_file /data/philhoon-relevance/binary-classification/NQ-DEV-DPR/5-fold/1/decisive_binary_gold_data/binary_decisive_gold_ctx100id_split_dev_1.json \
# --dataset_class BinaryCustomDatasetDecisiveBinaryGold \
# --num_labels 2 \


In [None]:
parser = HfArgumentParser(
    (ModelArguments, DataTrainingArguments, CustomTrainingArguments)
)
model_args, data_args, train_args = parser.parse_args_into_dataclasses([])
# os.environ['WANDB_PROJECT'] = model_args.wandb_project

In [None]:
pprint(vars(model_args))

In [None]:
pprint(vars(data_args))

In [None]:
pprint(vars(train_args))

In [None]:
train_args.do_train = False
train_args.do_eval = True
train_args.do_predict = True
train_args.per_device_eval_batch_size = 256

In [None]:
model_args.prediction_model_name_or_path = '/data/philhoon-relevance/binary-classification/results/NQ-DEV-DPR/5-fold/1/roberta-decisive_binary_gold_data_trial1'
model_args.prediction_model_step = '320'
model_args.max_seq_length = 200
model_args.num_labels = 2

In [None]:
print(model_args.prediction_model_name_or_path)
print(model_args.prediction_model_step)
print(model_args.max_seq_length)
print(model_args.num_labels)

In [None]:
# Dev set prediction
data_args.intact_eval = False
data_args.eval_file = '/data/philhoon-relevance/binary-classification/NQ-DEV-DPR/5-fold/1/decisive_binary_gold_data/binary_decisive_gold_ctx100id_split_dev_1.json'

# Test set prediction (following input format)
# data_args.intact_eval = False
# data_args.eval_file = '/data/philhoon-relevance/binary-classification/NQ-TEST-DPR/binary_decisive_format_ctx100id_test.json'

# Whole Test Set prediction (Whole Naive set - DPR binary format)
# data_args.intact_eval = True
# data_args.eval_file = '/data/philhoon-relevance/binary-classification/NQ-TEST-DPR/binary_decisive_gold_ctx100id_test.json'

# data_args.dataset_class = 'BinarySentenceDataset'
data_args.dataset_class = 'BinaryCustomDatasetDecisiveBinaryGold'

In [None]:
print(data_args.eval_file)
print(data_args.dataset_class)

In [None]:
logger = get_logger(__name__)
accelerator = Accelerator()

In [None]:
logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO,
    )

In [None]:
logger.info(accelerator.state, main_process_only=False)
if accelerator.is_local_main_process:
    transformers.utils.logging.set_verbosity_info()
else:
    transformers.utils.logging.set_verbosity_error()

In [None]:
if not data_args.intact_eval:
    train_args.output_dir = os.path.join(model_args.prediction_model_name_or_path,
                                         f'step_{model_args.prediction_model_step}', 'partial_prediction')
else:
    train_args.output_dir = os.path.join(model_args.prediction_model_name_or_path,
                                         f'step_{model_args.prediction_model_step}', 'intact_prediction')

In [None]:
print(train_args.output_dir)

In [None]:
if accelerator.is_main_process and train_args.output_dir is not None:
    os.makedirs(train_args.output_dir, exist_ok=True)
accelerator.wait_for_everyone()

In [None]:
print(model_args.prediction_model_name_or_path)

In [None]:
config = AutoConfig.from_pretrained(model_args.prediction_model_name_or_path, num_labels=data_args.num_labels)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_args.prediction_model_name_or_path)

In [None]:
# pprint(vars(tokenizer))

In [None]:
pytorch_model_path = os.path.join(model_args.prediction_model_name_or_path, f'step_{model_args.prediction_model_step}')



In [None]:
pytorch_model_path

In [None]:
model = AutoModelForSequenceClassification.from_pretrained(
        pytorch_model_path,
        config=config,
)

In [None]:
eval_file = data_args.eval_file
eval_data = utils.open_json(eval_file)
DataSetClass = DATASET_MAPPING[data_args.dataset_class]
eval_dataset = DataSetClass(eval_data, tokenizer=tokenizer,
                            max_length=model_args.max_seq_length, shuffle=False)

In [None]:
# for index in random.sample(range(len(eval_dataset)), 5):
#     logger.info(f"Sample {index} of the eval set: {eval_dataset[index]}.")

In [None]:
data_collator = DataCollatorWithPadding(tokenizer, pad_to_multiple_of=8)

In [None]:
eval_dataloader = DataLoader(eval_dataset,
                             shuffle=False,
                             collate_fn=data_collator,
                             batch_size=train_args.per_device_eval_batch_size,
                             )

In [None]:
model, eval_dataloader = accelerator.prepare(
    model, eval_dataloader
)

In [None]:
metric_acc = evaluate.load("accuracy")
metric_pre = evaluate.load('precision')
metric_re = evaluate.load('recall')
metric_f1 = evaluate.load('f1')

In [None]:
logger.info("***** Running evaluation *****")
logger.info(f"  Num examples = {len(eval_dataset)}")
logger.info(f"  Instantaneous batch size per device = {train_args.per_device_eval_batch_size}")
logger.info(f"  Steps = {math.ceil(len(eval_dataset)/train_args.per_device_eval_batch_size) + 1}")


In [None]:
# Saving model_args, data_args, train_args
train_dict = vars(train_args)
logger.info(f"  Saving training_args = {train_dict}")
with open(os.path.join(train_args.output_dir, "train_args.json"), "w") as f:
    json.dump(train_dict, f)

model_dict = vars(model_args)
logger.info(f"  Saving model_args = {model_dict}")
with open(os.path.join(train_args.output_dir, "model_args.json"), "w") as f:
    json.dump(model_dict, f)

data_dict = vars(data_args)
logger.info(f"  Saving data_args = {data_dict}")
with open(os.path.join(train_args.output_dir, "data_args.json"), "w") as f:
    json.dump(data_dict, f)

In [None]:
eval_progress_bar = tqdm(range(len(eval_dataloader)), disable=not accelerator.is_local_main_process)

In [None]:
eval_loss = 0
model.eval()
samples_seen = 0
prediction_lst = []
reference_lst = []

In [None]:
for step, batch in enumerate(eval_dataloader):
    with torch.no_grad():
        outputs = model(**batch)
        loss = outputs.loss
    if train_args.with_tracking:
        eval_loss += loss.detach().float()

    predictions = outputs.logits.argmax(dim=-1)
    predictions, references = accelerator.gather((predictions, batch["labels"]))
    # If we are in a multiprocess environment, the last batch has duplicates
    if accelerator.num_processes > 1:
        if step == len(eval_dataloader) - 1:
            predictions = predictions[: len(eval_dataloader.dataset) - samples_seen]
            references = references[: len(eval_dataloader.dataset) - samples_seen]
        else:
            samples_seen += references.shape[0]
    
    metric_acc.add_batch(
        predictions=predictions,
        references=references,
    )
    metric_pre.add_batch(
        predictions=predictions,
        references=references,
    )
    metric_re.add_batch(
        predictions=predictions,
        references=references,
    )
    metric_f1.add_batch(
        predictions=predictions,
        references=references,
    )
    eval_progress_bar.update(1)
    prediction_lst.extend(predictions.detach().cpu().tolist())
    reference_lst.extend(references.detach().cpu().tolist())
#     print(f'predictions : {predictions}')
#     print(f'prediction_lst : {prediction_lst}')
#     print(f'references : {references}')
#     print(f'eval_loss : {eval_loss}')


In [None]:
eval_metric = metric_acc.compute()
eval_metric_pre = metric_pre.compute()
eval_metric_re = metric_re.compute()
eval_metric_f1 = metric_f1.compute()

logger.info(f"Accuracy : {eval_metric['accuracy']} Precision : {eval_metric_pre['precision']}")
logger.info(f"Recall : {eval_metric_re['recall']} F1 : {eval_metric_f1['f1']}")
logger.info(f"Eval_loss : {eval_loss.item() / len(eval_dataloader)}")


In [None]:
result_log = {
    "eval_accuracy": eval_metric['accuracy'],
    "eval_precision": eval_metric_pre['precision'],
    "eval_recall": eval_metric_re['recall'],
    "eval_f1": eval_metric_f1['f1'],
    "eval_loss": eval_loss.item() / len(eval_dataloader),
}

In [None]:
output_result_path = os.path.join(train_args.output_dir, 'result.json')
with open(output_result_path, "w") as f:
    json.dump(result_log, f)

In [None]:
output_result_path

In [None]:
prediction_np = np.array(prediction_lst)
reference_np = np.array(reference_lst)

In [None]:
for ins, p_, r_ in zip(eval_data, prediction_np, reference_np):
    if str(r_) != ins['em']:
        logger.info(f"Not Matching Instance")
    ins['binary_inference'] = str(p_)

In [None]:
predcition_output_path = os.path.join(train_args.output_dir, 'prediction.json')
with open(predcition_output_path, "w") as f:
    json.dump(eval_data, f)

In [None]:
predcition_output_path

## Get top-3 models

In [None]:
path = '/data/philhoon-relevance/binary-classification/results/NQ-DEV-DPR/5-fold/1/roberta-decisive_binary_gold_data_trial1'

In [None]:
import pathlib

In [None]:
path

In [None]:
path = pathlib.Path(path)

In [None]:
files = path.glob('*/*.json')
# pprint(list(files))

In [None]:
step_result = []

for file in files:
    step = str(file).split('/')[-2]
    result = utils.open_json(file)
    step_result.append(result)
    
pprint(step_result)

In [None]:
def sort_by(result, key, top_k):
    newlist = sorted(result, key=lambda d: d[key], reverse = True) 
    print(f'sorting by {key}')
    for dic_ in newlist[:top_k]:
        print(f"step : {dic_['step']}, key : {dic_[key]}")

In [None]:
sort_by(step_result, 'eval_accuracy', 5)

In [None]:
sort_by(step_result, 'eval_f1', 5)

In [None]:
sort_by(step_result, 'eval_precision', 5)

In [None]:
sort_by(step_result, 'eval_recall', 5)

In [None]:
sort_by(step_result, 'eval_loss', 5)

In [None]:
# 12/30/2022 03:18:32 - INFO - __main__ - Accuracy : 0.843587640142193 Precision : 0.9129472519365548
# 12/30/2022 03:18:32 - INFO - __main__ - Recall : 0.880469583778015 F1 : 0.896414342629482
# 12/30/2022 03:18:32 - INFO - __main__ - Eval_loss : 0.4741322724715523

In [None]:
from sklearn.metrics import confusion_matrix
import pandas as pd

y_actu = pd.Series(reference_np, name='Actual')
y_pred = pd.Series(prediction_np, name='Predicted')
df_confusion = pd.crosstab(y_actu, y_pred, rownames=['Actual'], colnames=['Predicted'], margins=True)
df_confusion

In [None]:
len(eval_data)

In [None]:
len(prediction_np)

In [None]:
for ins, p_, r_ in zip(eval_data, prediction_np, reference_np):
    if str(r_) != ins['em']:
        logger.info(f"Not Matching Instance")
        break
    ins['binary_inference'] = str(p_)

In [None]:
eval_data[2]

In [None]:
predcition_output_path = os.path.join(train_args.output_dir, 'prediction.json')

In [None]:
with open(predcition_output_path, "w") as f:
    json.dump(eval_data, f)

In [None]:
len(prediction_lst)

In [None]:
len(reference_lst)

In [None]:
len(eval_dataset)

In [None]:
eval_metric = metric_acc.compute()
eval_metric_pre = metric_pre.compute()
eval_metric_re = metric_re.compute()
eval_metric_f1 = metric_f1.compute()

logger.info(f"Accuracy : {eval_metric['accuracy']} Precision : {eval_metric_pre['precision']}")
logger.info(f"Recall : {eval_metric_re['recall']} F1 : {eval_metric_f1['f1']}")
logger.info(f"Eval_loss : {eval_loss.item() / len(eval_dataloader)}")

In [None]:
eval_dataloader

In [None]:
# Train & Eval
if train_args.do_train:
    print('This script does not support train. Use binary_classifier.py for training')
    train(model_args, data_args, train_args)
    exit()
# Eval & Prediction only
if not train_args.do_train:
    eval(model_args, data_args, train_args)

In [None]:
from src.data import BinaryCustomDatasetShuffle

In [None]:
from accelerate import Accelerator
from pprint import pprint
from tqdm import tqdm

In [None]:
import json
import math
import os
import logging
import sys
import evaluate
from util import utils

import transformers
import torch
import numpy as np
import random
from torch import nn
from torch.utils.data import Dataset, DataLoader
from transformers import (
    AutoModelForSequenceClassification, 
    AutoModel, 
    AutoConfig, 
    AutoTokenizer,
    DataCollatorWithPadding,
    EvalPrediction,
    HfArgumentParser,
    Trainer,
    TrainingArguments,
    set_seed,
    get_scheduler,
)
from util.arguments import ModelArguments, DataTrainingArguments 
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss

os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [None]:
class BinaryCustomDatasetShuffle(torch.utils.data.Dataset):
    def __init__(self, instances, tokenizer, max_length, shuffle = False):
        if shuffle:
            random.shuffle(instances)
        self.instances = instances
        self.tokenizer = tokenizer
        self.sep_token = tokenizer.sep_token
        self.max_length = max_length

    def __len__(self):
        return len(self.instances)

    def __getitem__(self, idx):
        input_ = 'question: ' + self.instances[idx]['question'] + \
                 ' title: ' + self.instances[idx]['ctx']['title'] + \
                 ' context : ' + self.instances[idx]['ctx']['text']
        output = self.tokenizer(
            input_,
            # return_tensors="pt", will be applied later through collator
            # padding=True, will be padded later through collate
            truncation=True,
            add_special_tokens=True,
            max_length=self.max_length)

        item = {key: val for key, val in output.items()}
        # item['labels'] = torch.tensor(int(self.instances[idx]['em']))
        item['labels'] = int(self.instances[idx]['em'])

        return item

In [None]:
model_name_or_path = 'roberta-large'
num_labels = 2

In [None]:
config = AutoConfig.from_pretrained(model_name_or_path, num_labels=num_labels)

In [None]:
pprint(config)

In [None]:
print(config.num_labels)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

In [None]:
ignore_mismatched_sizes = True

In [None]:
model = AutoModelForSequenceClassification.from_pretrained(
        model_name_or_path,
        config=config,
        ignore_mismatched_sizes=ignore_mismatched_sizes,
    )

In [None]:
train_file = '/data/philhoon-relevance/binary-classification/\
NQ-DEV-DPR/5-fold/1/binary_data/binary_ex_ctx100id_split_train_1_partial.json'
eval_file = '/data/philhoon-relevance/binary-classification/\
NQ-DEV-DPR/5-fold/1/binary_data/binary_ex_ctx100id_split_train_1_partial.json'

In [None]:
train_data = utils.open_json(train_file)
eval_data = utils.open_json(eval_file)

In [None]:
max_length = 200
shuffle = False

In [None]:
train_dataset = BinaryCustomDatasetShuffle(train_data, tokenizer = tokenizer, \
                                           max_length = max_length, shuffle = shuffle)

In [None]:
eval_dataset = BinaryCustomDatasetShuffle(eval_data, tokenizer = tokenizer, \
                                           max_length = max_length, shuffle = shuffle)

In [None]:
data_collator = DataCollatorWithPadding(tokenizer, pad_to_multiple_of=8)

In [None]:
batch_size = 8

In [None]:
train_dataloader = DataLoader(train_dataset,
                              shuffle = True,
                              collate_fn=data_collator,
                              batch_size=batch_size,
)

In [None]:
eval_dataloader = DataLoader(eval_dataset,
                              shuffle = True,
                              collate_fn=data_collator,
                              batch_size=batch_size,
)

In [None]:
no_decay = ["bias", "LayerNorm.weight"]

In [None]:
weight_decay = 0.0

In [None]:
optimizer_grouped_parameters = [
        {
            "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
            "weight_decay": weight_decay,
        },
        {
            "params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)],
            "weight_decay": 0.0,
        },
    ]

In [None]:
optimizer_grouped_parameters[0]["weight_decay"]

In [None]:
learning_rate=5e-5

In [None]:
optimizer = torch.optim.AdamW(optimizer_grouped_parameters, lr=learning_rate)

In [None]:
lr_scheduler_type='linear'
num_warmup_steps = 0
# max_train_steps = 
num_train_epochs = 5
gradient_accumulation_steps = 1

In [None]:
num_update_steps_per_epoch = math.ceil(len(train_dataloader) / gradient_accumulation_steps)

In [None]:
max_train_steps = num_train_epochs * num_update_steps_per_epoch

In [None]:
max_train_steps

In [None]:
lr_scheduler = get_scheduler(
        name=lr_scheduler_type,
        optimizer=optimizer,
        num_warmup_steps=num_warmup_steps,
        num_training_steps=max_train_steps,
)

In [None]:
accelerator.prepare(
        model, optimizer, train_dataloader, eval_dataloader, lr_scheduler
    )

In [None]:
metric = evaluate.load("accuracy")

In [None]:
per_device_train_batch_size = 8

In [None]:
total_batch_size = per_device_train_batch_size * accelerator.num_processes * gradient_accumulation_steps

In [None]:
total_batch_size

In [None]:
progress_bar = tqdm(range(max_train_steps), disable=not accelerator.is_local_main_process)

In [None]:
num_train_epochs

In [None]:
starting_epoch = 0
with_tracking = True

In [None]:
checkpointing_steps = 50

In [None]:
for epoch in range(starting_epoch, num_train_epochs):
    model.train()
    if with_tracking:
        total_loss = 0
    for step, batch in enumerate(train_dataloader):
        outputs = model(**batch)
        loss = outputs.loss
        
        if with_tracking:
            total_loss += loss.detach().float()
            
        loss = loss / gradient_accumulation_steps
        accelerator.backward(loss)
        
        if step % args.gradient_accumulation_steps == 0 or step == len(train_dataloader) - 1:
            optimizer.step()
            lr_scheduler.step()
            optimizer.zero_grad()
            progress_bar.update(1)
            completed_steps += 1
            
        if isinstance(checkpointing_steps, int):
            if completed_steps % checkpointing_steps == 0:
                output_dir = f"step_{completed_steps }"
                if output_dir is not None:
                    output_dir = os.path.join(args.output_dir, output_dir)
                accelerator.save_state(output_dir)
        if completed_steps >= args.max_train_steps:
                break
                
                
    model.eval()
    samples_seen = 0
    for step, batch in enumerate(eval_dataloader):
         with torch.no_grad():
            outputs = model(**batch)
        predictions = outputs.logits.argmax(dim=-1) 
        predictions, references = accelerator.gather((predictions, batch["labels"]))
        
        if accelerator.num_processes > 1:
            if step == len(eval_dataloader) - 1:
                predictions = predictions[: len(eval_dataloader.dataset) - samples_seen]
                references = references[: len(eval_dataloader.dataset) - samples_seen]
            else:
                samples_seen += references.shape[0]
        
        metric.add_batch(
                predictions=predictions,
                references=references,
            )
        
        eval_metric = metric.compute()
        logger.info(f"epoch {epoch}: {eval_metric}")
        
        if args.with_tracking:
            accelerator.log(
                {
                    "accuracy" : eval_metric,
                    "train_loss": total_loss.item() / len(train_dataloader),
                    "epoch": epoch,
                    "step": completed_steps,
                },
                step=completed_steps,
            )


In [None]:
metric_acc = evaluate.load("accuracy")
metric_pre = evaluate.load('precision')
metric_re = evaluate.load('recall')
metric_f1 = evaluate.load('f1')

In [None]:
metric_acc

In [None]:
accelerator.num_processes

In [None]:
parser = HfArgumentParser(
    (ModelArguments, DataTrainingArguments, TrainingArguments)
)
args = ["--model_name_or_path", 'allenai/longformer-large-4096', '--output_dir', './']
model_args, data_args, training_args = parser.parse_args_into_dataclasses(args)


In [None]:
type(model_args)

In [None]:
vars(training_args)

In [None]:
logger = logging.getLogger(__name__)

logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
    datefmt="%m/%d/%Y %H:%M:%S",
    handlers=[logging.StreamHandler(sys.stdout)],
)

log_level = training_args.get_process_log_level()
logger.setLevel(log_level)
transformers.utils.logging.set_verbosity(log_level)
transformers.utils.logging.enable_default_handler()
transformers.utils.logging.enable_explicit_format()

# Log on each process the small summary:
logger.warning(
    f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
    + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
)
logger.info(f"Training/evaluation parameters {training_args}")

In [None]:
# Detecting last checkpoint.
last_checkpoint = None
if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
    last_checkpoint = get_last_checkpoint(training_args.output_dir)
    if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
        raise ValueError(
            f"Output directory ({training_args.output_dir}) already exists and is not empty. "
            "Use --overwrite_output_dir to overcome."
        )
    elif last_checkpoint is not None:
        logger.info(
            f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
            "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
        )


In [None]:
# Set seed before initializing model.
set_seed(training_args.seed)

In [None]:
config = AutoConfig.from_pretrained(
        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
        num_labels=model_args.num_labels,
    )
tokenizer = AutoTokenizer.from_pretrained(
        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
    )
model = AutoModelForSequenceClassification.from_pretrained(
    model_args.model_name_or_path,
    config=config,
)

In [None]:
if training_args.do_train:
    instances, cut_off, total_questions = preprocessing_data(
        data_args.train_file, 
        data_args.sample_size, 
        data_args.position)
    
    train_instance = instances[data_args.dev_size:]
    dev_instance = instances[:data_args.dev_size]
    
    train_dataset = CustomDataset(train_instance, 
                               tokenizer, 
                               model_args.max_seq_length)
    dev_dataset = CustomDataset(train_instance, 
                               tokenizer, 
                               model_args.max_seq_length)
    
    # Log a few random samples from the training set:
    for index in random.sample(range(len(train_dataset)), 3):
        logger.info(f"Sample {index} of the training set: {train_dataset[index]}.")

if training_args.do_eval:
    instances, cut_off, total_questions = preprocessing_data(
        data_args.test_file, 
        data_args.sample_size, 
        data_args.position)
    
    test_dataset = CustomDataset(instances, 
                               tokenizer, 
                               model_args.max_seq_length)
    
    


In [None]:
# Get the metric function
metric = evaluate.load("xnli")

def compute_metrics(p: EvalPrediction):
    preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions
    preds = np.argmax(preds, axis=1)
    return metric.compute(predictions=preds, references=p.label_ids)


In [None]:
 # Initialize Trainer
data_collator = DataCollatorWithPadding(
    tokenizer, 
    pad_to_multiple_of=8,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset if training_args.do_train else None,
    eval_dataset=eval_dataset if training_args.do_train else None,
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
    data_collator=data_collator,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=30)]
)

# Training
if training_args.do_train:
    checkpoint = None
    if training_args.resume_from_checkpoint is not None:
        checkpoint = training_args.resume_from_checkpoint
    elif last_checkpoint is not None:
        checkpoint = last_checkpoint
    train_result = trainer.train(resume_from_checkpoint=checkpoint)
    metrics = train_result.metrics
    max_train_samples = (
        data_args.max_train_samples if data_args.max_train_samples is not None else len(train_dataset)
    )
    metrics["train_samples"] = min(max_train_samples, len(train_dataset))

    trainer.save_model()  # Saves the tokenizer too for easy upload

    trainer.log_metrics("train", metrics)
    trainer.save_metrics("train", metrics)
    trainer.save_state()
    
# Evaluation
if training_args.do_eval:
    logger.info("*** Evaluate ***")
    metrics = trainer.evaluate(eval_dataset=eval_dataset)

    max_eval_samples = data_args.max_eval_samples if data_args.max_eval_samples is not None else len(eval_dataset)
    metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset))

    trainer.log_metrics("eval", metrics)
    trainer.save_metrics("eval", metrics)

In [None]:
metric

In [None]:
data_args.dataset_name = a
    

In [None]:
data.max_seq_length

In [None]:
training_args.fp16

In [None]:
bb

In [None]:
def main():
    parser = HfArgumentParser(
        (ModelArguments, DataTrainingArguments, TrainingArguments)
    )
    
    model_args, data_args, training_args = parser.parse_args_into_dataclasses()
    
    