In [3]:
# 🛠️ Step 1: Install dependencies
!pip install -q transformers datasets peft accelerate bitsandbytes

# 🧱 Step 2: Imports
import os
import json
import torch
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling,
    BitsAndBytesConfig
)
from peft import (
    LoraConfig,
    get_peft_model,
    prepare_model_for_kbit_training
)

# 📁 Step 3: Sample Q&A dataset (5 examples)
qa_pairs = [
    {
        "instruction": "What are the rights of a woman to seek divorce under Muslim Family Law in Pakistan?",
        "input": "",
        "output": "Under Muslim Family Law in Pakistan, a woman can seek divorce through Khula if she cannot live with her husband within the limits prescribed by Allah. The Family Court may grant Khula after reconciliation efforts fail."
    },
    {
        "instruction": "Who has custody of children after a divorce in Pakistan?",
        "input": "",
        "output": "Generally, the mother has the right to custody of young children (Hizanat), but the father is responsible for their maintenance. As children grow older, custody decisions are made considering the best interests of the child."
    },
    {
        "instruction": "What is the role of the arbitration council in a Muslim divorce?",
        "input": "",
        "output": "The Arbitration Council, under the Muslim Family Laws Ordinance 1961, is responsible for attempting reconciliation after a divorce notice is served and before the divorce becomes effective after 90 days."
    },
    {
        "instruction": "Is registration of marriage (Nikah) mandatory in Pakistan?",
        "input": "",
        "output": "Yes, the registration of marriage is mandatory under the Muslim Family Laws Ordinance 1961. Nikah Nama (marriage contract) must be duly registered with the Union Council."
    },
    {
        "instruction": "Can a Muslim woman in Pakistan include conditions in the Nikah Nama?",
        "input": "",
        "output": "Yes, a woman can include conditions in the Nikah Nama such as the right to divorce (Talaq-e-Tafweez), restrictions on second marriage, and financial matters. These clauses are legally binding if not contrary to Islamic injunctions."
    }
]

# 💾 Step 4: Save to JSONL
with open("muslim_family_law_qa.jsonl", "w") as f:
    for item in qa_pairs:
        f.write(json.dumps(item) + "\n")

# 📦 Step 5: Load and prepare dataset
dataset = load_dataset("json", data_files="muslim_family_law_qa.jsonl")["train"]

def format_example(example):
    return {
        "text": f"### Instruction:\n{example['instruction']}\n\n### Input:\n{example['input']}\n\n### Response:\n{example['output']}"
    }

dataset = dataset.map(format_example)

# 🔡 Step 6: Load TinyLlama model and tokenizer (4-bit quantized)
model_name = "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token  # 🔧 Fix padding issue

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=BitsAndBytesConfig(load_in_4bit=True)
)

# 🧠 Step 7: PEFT + LoRA config
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)

# ✂️ Step 8: Tokenize dataset
def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=512)

tokenized_dataset = dataset.map(tokenize, batched=True)

# ⚙️ Step 9: Training config
training_args = TrainingArguments(
    output_dir="./tinyllama-legal-bot",
    per_device_train_batch_size=2,
    num_train_epochs=3,
    logging_steps=10,
    save_steps=50,
    save_total_limit=2,
    fp16=True,
    report_to="none"
)

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=data_collator
)

# 🚀 Step 10: Train!
trainer.train()


Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/5 [00:00<?, ? examples/s]

Map:   0%|          | 0/5 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


Step,Training Loss


  return fn(*args, **kwargs)


TrainOutput(global_step=9, training_loss=2.5935607486300998, metrics={'train_runtime': 7.0747, 'train_samples_per_second': 2.12, 'train_steps_per_second': 1.272, 'total_flos': 47722235166720.0, 'train_loss': 2.5935607486300998, 'epoch': 3.0})

In [5]:
model.save_pretrained("tinyllama_lora_muslim_family_law")
tokenizer.save_pretrained("tinyllama_lora_muslim_family_law")


('tinyllama_lora_muslim_family_law/tokenizer_config.json',
 'tinyllama_lora_muslim_family_law/special_tokens_map.json',
 'tinyllama_lora_muslim_family_law/tokenizer.model',
 'tinyllama_lora_muslim_family_law/added_tokens.json',
 'tinyllama_lora_muslim_family_law/tokenizer.json')

In [6]:
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer

base_model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)
tokenizer = AutoTokenizer.from_pretrained("tinyllama_lora_muslim_family_law")

model = PeftModel.from_pretrained(base_model, "tinyllama_lora_muslim_family_law")
model.eval()

# Inference
prompt = "Q: What are the rights of a woman after divorce under Muslim Family Law in Pakistan?\nA:"
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=100)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


Q: What are the rights of a woman after divorce under Muslim Family Law in Pakistan?
A: The rights of a woman after divorce under Muslim Family Law in Pakistan are as follows:
1. The woman has the right to maintain her property and her children.
2. The woman has the right to maintain her property and her children.
3. The woman has the right to maintain her property and her children.
4. The woman has the right to maintain her property and her children.
5. The woman has the right to maintain her property and her children.
6
