In [None]:
import torch 

from datasets import Dataset 

import wandb 

from transformers import T5ForConditionalGeneration, T5Tokenizer, Trainer, TrainingArguments, TrainerCallback, AutoTokenizer, DataCollatorForSeq2Seq 

from peft import LoftQConfig, LoraConfig, get_peft_model 


from datasets import Dataset, load_metric

  

# Initialize W&B 

wandb.init(project="language_model_finetuning", entity="your_username") 

 

# Custom callback for logging 

class LoggingCallback(TrainerCallback): 

    def on_log(self, args, state, control, logs=None, **kwargs): 

        if logs is not None: 

            wandb.log(logs) 

  

# Initialize Weights & Biases 

wandb.init(project="t5-finetuning-peft") 

  

# Sample data 

#you can replace this sample data with your own data 

train_data = [ 

    {"article": "Fine-tuning language models can significantly improve their performance on specialized tasks. By training the model on task-specific data, it can learn the nuances of the task and provide better results.", "summary": "Fine-tuning improves model performance on specialized tasks."}, 

    {"article": "Quantized methods like QLoRA are becoming popular for efficient fine-tuning of large language models. These methods reduce the computational resources required for training while maintaining accuracy.", "summary": "QLoRA enables efficient fine-tuning of large models."}, 

    {"article": "Weight and Biases (W&B) is a powerful tool for tracking and visualizing machine learning experiments. It helps data scientists monitor the training process and gain insights into model performance.", "summary": "W&B aids in tracking and visualizing ML experiments."}, 

    {"article": "Using pre-trained models as a starting point for fine-tuning can save time and resources. Pre-trained models have already learned a vast amount of information from large datasets.", "summary": "Pre-trained models save time and resources."}, 

    {"article": "During fine-tuning, it is essential to monitor the model's loss and metrics to ensure it is learning correctly. Visualization tools can help in understanding the training process.", "summary": "Monitoring loss and metrics is crucial during fine-tuning."}, 

    {"article": "Data augmentation techniques can help improve the robustness of machine learning models. By artificially increasing the diversity of the training data, models can generalize better to new, unseen data.", "summary": "Data augmentation improves model robustness."}, 

    {"article": "Transfer learning involves using a pre-trained model on a new, but related task. This approach can accelerate the training process and improve model performance.", "summary": "Transfer learning accelerates training and improves performance."}, 

    {"article": "Hyperparameter tuning is a critical step in optimizing model performance. Techniques like grid search and random search are commonly used to find the best hyperparameter values.", "summary": "Hyperparameter tuning optimizes model performance."}, 

    {"article": "Regularization methods like dropout and weight decay help prevent overfitting in neural networks. These techniques ensure that the model generalizes well to new data.", "summary": "Regularization prevents overfitting in neural networks."}, 

    {"article": "Cross-validation is a robust method for assessing the performance of machine learning models. It provides a more accurate estimate of model performance than a single train-test split.", "summary": "Cross-validation provides accurate performance estimates."} 

]  

test_data = [ 

    {"article": "Active learning can reduce the amount of labeled data needed for training. By selectively choosing the most informative samples, models can learn more efficiently.", "summary": "Active learning reduces the need for labeled data."}, 

    {"article": "Ensemble methods combine the predictions of multiple models to improve accuracy. Techniques like bagging and boosting are popular ensemble methods.", "summary": "Ensemble methods improve prediction accuracy."}, 

    {"article": "Gradient descent is a fundamental optimization algorithm used to train machine learning models. Variants like stochastic gradient descent (SGD) offer improvements in convergence speed.", "summary": "Gradient descent and its variants optimize model training."}]  

evaluation_data = [ 

    {"article": "Feature engineering is the process of using domain knowledge to create features that make machine learning algorithms work better. It is a crucial step in building effective models.", "summary": "Feature engineering enhances model effectiveness."}, 

    {"article": "Batch normalization is a technique used to improve the training of deep neural networks. It normalizes the input of each layer, allowing for faster and more stable training.", "summary": "Batch normalization stabilizes and speeds up training."}, 

    {"article": "Model interpretability is important for understanding how machine learning models make decisions. Techniques like SHAP and LIME provide insights into model predictions.", "summary": "Model interpretability techniques provide insights into predictions."}]  

# Convert data to Hugging Face datasets 

train_dataset = Dataset.from_list(train_data) 

test_dataset = Dataset.from_list(test_data) 

eval_dataset = Dataset.from_list(evaluation_data) 

  


# Load the T5 tokenizer and model 

# = 't5-small' tokenizer model have 60 million trainable parameter and storage size is 987mb 

 

model_name = 't5-small' 

tokenizer = T5Tokenizer.from_pretrained(model_name) 

base_model = T5ForConditionalGeneration.from_pretrained(model_name) 

  

# Apply PEFT using LoRA 

loftq_config = LoftQConfig(loftq_bits=4)           # set 4bit quantization 

lora_config = LoraConfig( 

    r=6,  # Rank of the adaptation matrices 

    lora_alpha=16,  # When the weight changes are added back into the original #model weights, they are multiplied by a scaling factor that's #calculated as alpha divided by rank. 

    loftq_config=loftq_config, 

    target_modules=["k","q", "v"],  # Target modules for LoRA adaptation 

    lora_dropout=0.1,  # Dropout rate 

    bias="none"  # Whether to adapt bias terms 

) 

  

peft_model = get_peft_model(base_model, lora_config) 

peft_model.print_trainable_parameters() 


 

# Preprocessing the datasets 

def preprocess_function(examples): 

    inputs = tokenizer(examples["article"], padding="max_length", truncation=True, max_length=512) 

    targets = tokenizer(examples["summary"], padding="max_length", truncation=True, max_length=128) 

    inputs["labels"] = targets["input_ids"] 

    return inputs 

  

train_dataset = train_dataset.map(preprocess_function, batched=True) 

test_dataset = test_dataset.map(preprocess_function, batched=True) 

eval_dataset = eval_dataset.map(preprocess_function, batched=True) 

 # Define the compute_metrics function 

def compute_metrics(pred: EvalPrediction): 

    metric = load_metric("rouge") 

    labels_ids = pred.label_ids 

    pred_ids = pred.predictions 

    # Decode the predicted and true labels 

    decoded_preds = tokenizer.batch_decode(pred_ids, skip_special_tokens=True) 

    decoded_labels = tokenizer.batch_decode(labels_ids, skip_special_tokens=True) 

    # Rouge expects a newline after each sentence 

    decoded_preds = ["\n".join(pred.strip().split()) for pred in decoded_preds] 

    decoded_labels = ["\n".join(label.strip().split()) for label in decoded_labels] 

    result = metric.compute(predictions=decoded_preds, references=decoded_labels) 

    return { 

    "rouge1": result["rouge1"].mid.fmeasure 

    } 

 

# Define training arguments 

training_args = TrainingArguments(do_eval=True, 

    output_dir="./results", 

    eval_strategy="steps",  # Change to "steps" for more frequent logging 

    eval_steps=10,  # Evaluate every 10 steps 

    logging_dir='./logs',  # Directory for storing logs 

    logging_steps=20,  # Log every 5 steps 

    learning_rate=2e-5, 

    per_device_train_batch_size=20, 

    per_device_eval_batch_size=20, 

    num_train_epochs=30, 

    weight_decay=0.01, 

    report_to="wandb", 

) 

  

# Initialize the Trainer 

trainer = Trainer( 

    model=peft_model, 

    args=training_args, 

    train_dataset=train_dataset, 

    eval_dataset=eval_dataset, 

    tokenizer=tokenizer, 

compute_metrics=compute_metrics, 

    callbacks=[LoggingCallback()]  # Add the custom logging callback 

) 

  

# Fine-tune the model 

trainer.train() 

eval_results = trainer.evaluate() 

  

# Save the model 

trainer.save_model("./t5-finetuned") 


 # Load LoRA adapter and merge 

merged_model = peft_model.merge_and_unload() 

# Save the merged model 

merged_model.save_pretrained("./t5-finetuned") 

 
# Finish the W&B run 

wandb.finish()