In [1]:
import torch
import evaluate
import numpy as np

from datasets import load_dataset
from peft import LoraConfig, get_peft_model
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from transformers import Seq2SeqTrainer, Seq2SeqTrainingArguments, DataCollatorForSeq2Seq

In [2]:
model_name = './lora_trained/lora-flan-t5-squad/'
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

dataset = load_dataset('squad_v2')

Some weights of the model checkpoint at ./lora_trained/lora-flan-t5-squad/ were not used when initializing T5ForConditionalGeneration: ['decoder.block.0.layer.0.SelfAttention.q.base_layer.weight', 'decoder.block.0.layer.0.SelfAttention.q.lora_A.default.weight', 'decoder.block.0.layer.0.SelfAttention.q.lora_B.default.weight', 'decoder.block.0.layer.0.SelfAttention.v.base_layer.weight', 'decoder.block.0.layer.0.SelfAttention.v.lora_A.default.weight', 'decoder.block.0.layer.0.SelfAttention.v.lora_B.default.weight', 'decoder.block.0.layer.1.EncDecAttention.q.base_layer.weight', 'decoder.block.0.layer.1.EncDecAttention.q.lora_A.default.weight', 'decoder.block.0.layer.1.EncDecAttention.q.lora_B.default.weight', 'decoder.block.0.layer.1.EncDecAttention.v.base_layer.weight', 'decoder.block.0.layer.1.EncDecAttention.v.lora_A.default.weight', 'decoder.block.0.layer.1.EncDecAttention.v.lora_B.default.weight', 'decoder.block.1.layer.0.SelfAttention.q.base_layer.weight', 'decoder.block.1.layer.0.Se

In [3]:
total_params = sum(p.numel() for p in model.parameters())
print(total_params * 4)
for key ,value in enumerate(dataset):
    print(key, value)

990311424
0 train
1 validation


In [4]:
def data_preprocessing(data):
    inputs = [ q for q in data['question']] 
    targets = [a['text'][0] if len(a['text']) >0 else '' for a in data['answers']]
    model_inputs = tokenizer(inputs, max_length=512, truncation=True)
    
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(targets, max_length=512, truncation=True)
        
    model_inputs['labels'] = labels['input_ids']
    return model_inputs

tokenized_dataset = dataset.map(data_preprocessing, batched=True,  batch_size=32)

Map:   0%|          | 0/130319 [00:00<?, ? examples/s]



Map:   0%|          | 0/11873 [00:00<?, ? examples/s]

In [5]:

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions
    accuracy_metric = evaluate.load("accuracy")

    if preds.ndim == 3:  
        preds = preds.argmax(-1)


    preds = np.array(preds)
    labels = np.array(labels)

    valid_preds = []
    valid_labels = []

    for i in range(len(labels)):
        label = labels[i]
        pred = preds[i]
        valid_indices = label != -100

        min_len = min(len(pred), len(label))
        pred = pred[:min_len]  
        label = label[:min_len]

        filtered_pred = pred[valid_indices[:min_len]]
        filtered_label = label[valid_indices[:min_len]]

        valid_preds.extend(filtered_pred.tolist())
        valid_labels.extend(filtered_label.tolist())

    accuracy = accuracy_metric.compute(predictions=valid_preds, references=valid_labels)

    return {"accuracy": accuracy['accuracy']}

In [6]:

device = 'cuda' if torch.cuda.is_available() else 'cpu'
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)


lora_config = LoraConfig(
    r=8,           
    lora_alpha=32, 
    target_modules=["q", "v"],
    lora_dropout=0.1, 
    bias="none"
)

# Apply LoRA to the model
lora_model = get_peft_model(model, lora_config)

accuracy_metric = evaluate.load("accuracy")


training_args = Seq2SeqTrainingArguments(
    output_dir = "./results",
    eval_strategy = "epoch",
    learning_rate = 1e-5,
    per_device_train_batch_size = 12,
    per_device_eval_batch_size = 12,
    weight_decay = 0.01,
    save_total_limit = 3,
    num_train_epochs =3,
    predict_with_generate =True
)


trainer = Seq2SeqTrainer(
    model=lora_model,
    args=training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['validation'],
    data_collator= data_collator,
    compute_metrics= compute_metrics    
)

# Uncomment this to retrin model.
# trainer.train()

In [7]:

results = trainer.evaluate()
print(results)




  0%|          | 0/990 [00:00<?, ?it/s]

{'eval_loss': 7.995985984802246, 'eval_model_preparation_time': 0.006, 'eval_accuracy': 0.24964726288946706, 'eval_runtime': 324.4098, 'eval_samples_per_second': 36.599, 'eval_steps_per_second': 3.052}


In [8]:
# model.save_pretrained(model_name)
# tokenizer.save_pretrained(model_name)