In [1]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [2]:
!pip install datasets transformers rouge_score nltk evaluate rouge_score transformers[torch] peft sacrebleu -q bitsandbytes datasets accelerate loralib

In [3]:
from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer, DataCollatorForSeq2Seq, AutoTokenizer

model_checkpoint = "t5-small"
model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model_checkpoint,max_length=200)

In [4]:
import torch
import torch.nn as nn
for param in model.parameters():
  param.requires_grad = False  # freeze the model - train adapters later
  if param.ndim == 1:
    # cast the small parameters (e.g. layernorm) to fp32 for stability
    param.data = param.data.to(torch.float32)

model.gradient_checkpointing_enable()  # reduce number of stored activations
model.enable_input_require_grads()

class CastOutputToFloat(nn.Sequential):
  def forward(self, x): return super().forward(x).to(torch.float32)
model.lm_head = CastOutputToFloat(model.lm_head)

In [5]:
import evaluate

rouge = evaluate.load("rouge")

In [6]:
import numpy as np


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    result = rouge.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)

    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]
    result["gen_len"] = np.mean(prediction_lens)

    return {k: round(v, 4) for k, v in result.items()}

In [7]:
def print_trainable_parameters(model):
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [8]:
from peft import get_peft_config, get_peft_model, get_peft_model_state_dict, LoraConfig, TaskType

peft_config = LoraConfig(task_type=TaskType.SEQ_2_SEQ_LM, inference_mode=False, r=8, bias="none", lora_alpha=32, lora_dropout=0.1)

model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

trainable params: 294,912 || all params: 60,801,536 || trainable%: 0.4850403779272945


In [9]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [10]:
from transformers import Seq2SeqTrainer, Seq2SeqTrainingArguments
from transformers import T5ForConditionalGeneration, T5Tokenizer
from datasets import load_from_disk
from torch.utils.tensorboard import SummaryWriter

# Load tokenized datasets
tokenized_dataset_train = load_from_disk("/content/drive/MyDrive/wikihow_data/T5small_train_tokenized")
tokenized_dataset_test = load_from_disk("/content/drive/MyDrive/wikihow_data/T5small_test_tokenized")

# Set up TensorBoard writer
tensorboard_writer = SummaryWriter("/content/drive/MyDrive/Models/wikihow_t5small_LoRA_fine_tuned/tensorboard_logs")

# Training arguments
training_args = Seq2SeqTrainingArguments(
    output_dir="/content/drive/MyDrive/Models/wikihow_t5small_LoRA_fine_tuned",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    weight_decay=0.01,
    save_total_limit=2,
    num_train_epochs=2,
    predict_with_generate=True,
    fp16=True,
    gradient_checkpointing=True,
    push_to_hub=True,
    logging_dir="/content/drive/MyDrive/Models/wikihow_t5small_LoRA_fine_tuned/tensorboard_logs",  # Set the directory for TensorBoard logs
)

# Trainer setup
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset_train,
    eval_dataset=tokenized_dataset_test,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

# Train and log metrics to TensorBoard
model.config.use_cache = False
trainer.train()
tensorboard_writer.close()  # Close the TensorBoard writer when training is finished

You're using a T5TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len
1,0.4494,0.268441,0.0,0.0,0.0,0.0,0.0




Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len
1,0.4494,0.268441,0.0,0.0,0.0,0.0,0.0
2,0.2821,0.255748,0.0,0.0,0.0,0.0,0.0




In [11]:
trainer.save_model("/content/drive/MyDrive/Models/wikihow_t5small_LoRA_fine_tuned")

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/1.19M [00:00<?, ?B/s]

events.out.tfevents.1702179490.cda89b4bc3f9.20360.1:   0%|          | 0.00/7.50k [00:00<?, ?B/s]

In [13]:
trainer.push_to_hub()

'https://huggingface.co/rohitmacherla3/wikihow_t5small_LoRA_fine_tuned/tree/main/'

In [None]:
tokenized_dataset_test = load_from_disk("/content/drive/MyDrive/wikihow_data/T5small_test_tokenized")

In [None]:
tokenized_dataset_test['text'][0]

In [None]:
# Use a pipeline as a high-level helper
from transformers import pipeline

summarizer = pipeline("summarization", model="rohitmacherla3/wikihow_t5small_LoRA")
summarizer(tokenized_dataset_test['text'][0])

In [None]:
from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer, DataCollatorForSeq2Seq

model_checkpoint = "rohitmacherla3/T5fine_tuned_LoRA_model"
model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)

In [None]:
# Use a pipeline as a high-level helper
from transformers import pipeline

summarizer_2 = pipeline("summarization", model="Prashanth-1998/wikihow_t5small_model")
summarizer_2(tokenized_dataset_test['text'][0])

In [None]:
tokenized_dataset_test['headline'][0]