In [None]:
from huggingface_hub import login

# Option 1: Interactive login (Recommended for Colab or local notebook)
login()

In [None]:
!pip install bitsandbytes==0.41.1
!pip install transformers==4.38.1
!pip install accelerate==0.27.2

In [None]:
!pip install -U bitsandbytes
!pip install -U accelerate transformers datasets peft trl

In [None]:
# 🛠️ Install required libraries
# !pip install -q bitsandbytes accelerate datasets peft transformers trl

import torch
from datasets import load_dataset, Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments
)
from peft import (
    prepare_model_for_kbit_training,
    LoraConfig,
    get_peft_model
)
from trl import SFTTrainer
from transformers import BitsAndBytesConfig
import warnings
warnings.filterwarnings("ignore")

In [None]:
#load and preprocess dataset

dataset = load_dataset("knkarthick/samsum")

def format_data(example):
    example["text"] = f"Summarize the following conversation:\n\n{example['dialogue']}\n\nSummary:"
    example["labels"] = example["summary"]
    return example

dataset = dataset.map(format_data)

In [None]:
#load tokenizer

model_id = "mistralai/Mistral-7B-Instruct-v0.1"
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token

In [None]:
#quantization + lora config

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
   # bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
)


lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

In [None]:
# load mistral model and prepare for QLoRA

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_config)

In [None]:
# Tokenize the dataset

def tokenize(example):
    input = tokenizer(example["text"], padding="max_length", truncation=True, max_length=512)
    label = tokenizer(example["labels"], padding="max_length", truncation=True, max_length=128)
    input["labels"] = label["input_ids"]
    return input

tokenized_dataset = dataset.map(tokenize, batched=True)

In [None]:
# Finetune with SFT trainer

training_args = TrainingArguments(
    output_dir="./mistral_qlora_samsum",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=2,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_dir="./logs",
    learning_rate=2e-4,
    bf16=True,
    push_to_hub=False,
    report_to="none"
)

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    dataset_text_field="text"
)

trainer.train()

In [None]:
# Rogue Evaluation
rouge = load_metric("rouge")

def compute_rouge(preds, refs):
    scores = rouge.compute(predictions=preds, references=refs, use_stemmer=True)
    for k, v in scores.items():
        print(f"{k}: {v.mid.fmeasure:.4f}")

def generate_predictions(model, dataset, tokenizer, max_samples=50):
    inputs = [f"Summarize the following conversation:\n\n{x['dialogue']}\n\nSummary:" for x in dataset]
    inputs = tokenizer(inputs, return_tensors="pt", padding=True, truncation=True).to(model.device)
    outputs = model.generate(
        input_ids=inputs['input_ids'],
        attention_mask=inputs['attention_mask'],
        max_new_tokens=60,
        do_sample=True,
        top_p=0.9,
        temperature=0.7
    )
    preds = tokenizer.batch_decode(outputs, skip_special_tokens=True)
    refs = [x['summary'] for x in dataset]
    return preds, refs

sample_data = dataset['test'].select(range(50))
preds, refs = generate_predictions(model, sample_data, tokenizer)
compute_rouge(preds, refs)

In [None]:
# Sample inference

sample = dataset['test'][0]
print("Dialogue:\n", sample['dialogue'])

input_prompt = f"Summarize the following conversation:\n\n{sample['dialogue']}\n\nSummary:"
inputs = tokenizer(input_prompt, return_tensors="pt").to(model.device)

output = model.generate(
    input_ids=inputs['input_ids'],
    attention_mask=inputs['attention_mask'],
    max_new_tokens=60,
    do_sample=True,
    top_p=0.9,
    temperature=0.7
)

print("\nGenerated Summary:\n", tokenizer.decode(output[0], skip_special_tokens=True))
print("\nReference Summary:\n", sample['summary'])

In [None]:
# Save the finetuned QLoRA Model
model.save_pretrained("./mistral-qlora-samsum-adapter")
tokenizer.save_pretrained("./mistral-qlora-samsum-adapter")