In [1]:
!pip install -U pypdf2 bitsandbytes



In [2]:
import re
import json
from pathlib import Path
from PyPDF2 import PdfReader
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM, 
    AutoTokenizer, 
    TrainingArguments, 
    Trainer,
    DataCollatorForLanguageModeling,
    BitsAndBytesConfig
)
from peft import LoraConfig, get_peft_model
from huggingface_hub import login

In [3]:
# Install necessary libraries if not already installed
# !pip install -U pypdf2 bitsandbytes transformers datasets peft torch

# Authenticate with Hugging Face Hub if needed
# login("hf_your_token_here")

# ==============================
# 1. Prepare Dataset
# ==============================
pdf_path = "New_QuestionBank_500data.pdf"
reader = PdfReader(pdf_path)
text = ""
for page in reader.pages:
    text += page.extract_text() + "\n"

qa_pairs = re.findall(r"Q:\s*(.*?)\s*A:\s*(.*?)(?=Q:|$)", text, re.DOTALL)
dataset = []
for q, a in qa_pairs:
    q = q.strip().replace("\n", " ")
    a = a.strip().replace("\n", " ")
    record = {
        "instruction": q,
        "output": a
    }
    dataset.append(record)

out_path = Path("tirumala_dataset.jsonl")
with open(out_path, "w", encoding="utf-8") as f:
    for item in dataset:
        f.write(json.dumps(item, ensure_ascii=False) + "\n")

print(f"Converted {len(dataset)} Q/A pairs into {out_path}")

# Load the prepared JSONL file
dataset = load_dataset("json", data_files="tirumala_dataset.jsonl")
dataset = dataset["train"].train_test_split(test_size=0.1, seed=42)

Converted 575 Q/A pairs into tirumala_dataset.jsonl


Generating train split: 0 examples [00:00, ? examples/s]

In [4]:
# Set the device to GPU if available, otherwise CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Using device: cuda


In [5]:
# ==============================
# 2. Model & Tokenizer
# ==============================
MODEL_NAME = "mistralai/Mistral-7B-v0.1"

# Define the quantization configuration
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right' # Important change for Mistral

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=quantization_config,
    # device_map="auto"
    device_map={'':torch.cuda.current_device()},
)

# LoRA config with updated target modules for Mistral
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "k_proj", "v_proj"], # Updated target modules
    lora_dropout=0.05,
    use_rslora=True, # extra added parameter
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, lora_config)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [6]:
# ==============================
# 3. Tokenize Function
# ==============================
def tokenize_function(example):
    tokenized_example = tokenizer(
        str(example["instruction"]) + " " + str(example["output"]),
        truncation=True,
        max_length=1024,
        padding="max_length"
    )
    return {
        "input_ids": tokenized_example["input_ids"],
        "attention_mask": tokenized_example["attention_mask"],
        "labels": tokenized_example["input_ids"].copy(),
        "instruction": example["instruction"],
        "output": example["output"]
    }

tokenized_datasets = dataset.map(tokenize_function, batched=False)
tokenized_datasets.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

Map:   0%|          | 0/517 [00:00<?, ? examples/s]

Map:   0%|          | 0/58 [00:00<?, ? examples/s]

In [7]:
# ==============================
# 4. Training Arguments
# ==============================
training_args = TrainingArguments(
    output_dir="./finetuned_model_500data",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-4,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=2,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=25,
    save_total_limit=2,
    push_to_hub=False,
    fp16=True,
    gradient_accumulation_steps=4,
    warmup_steps=100,
    lr_scheduler_type="cosine",
    load_best_model_at_end=True,
    optim="paged_adamw_8bit" # New: Paged AdamW for memory efficiency
)

In [8]:
# ==============================
# 5. Trainer
# ==============================
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    data_collator=data_collator,
)

In [9]:
# ==============================
# 6. Train Model
# ==============================
trainer.train()


Epoch,Training Loss,Validation Loss
1,1.7149,1.095449
2,0.9835,0.815009
3,0.7388,0.728401



Invalid credentials in Authorization header - silently ignoring the lookup for the file config.json in mistralai/Mistral-7B-v0.1.

Invalid credentials in Authorization header - silently ignoring the lookup for the file config.json in mistralai/Mistral-7B-v0.1.

Invalid credentials in Authorization header - silently ignoring the lookup for the file config.json in mistralai/Mistral-7B-v0.1.


TrainOutput(global_step=99, training_loss=1.0136927113388523, metrics={'train_runtime': 128203.1087, 'train_samples_per_second': 0.012, 'train_steps_per_second': 0.001, 'total_flos': 6.784985629458432e+16, 'train_loss': 1.0136927113388523, 'epoch': 3.0})

In [10]:
# ==============================
# 7. Save Model
# ==============================
model.save_pretrained("./finetuned_model_500data")
tokenizer.save_pretrained("./finetuned_model_500data")
print("✅ Fine-tuning complete! Model saved at ./finetuned_model_500data")


✅ Fine-tuning complete! Model saved at ./finetuned_model_500data



Invalid credentials in Authorization header - silently ignoring the lookup for the file config.json in mistralai/Mistral-7B-v0.1.


In [11]:
# ==============================
# 8. Load and Test Model
# ==============================
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

# Load the base model and tokenizer
base_model = AutoModelForCausalLM.from_pretrained(
    "mistralai/Mistral-7B-v0.1",
    quantization_config=quantization_config,
    device_map="auto"
)
base_tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")

# Load the PEFT adapter
peft_model = PeftModel.from_pretrained(base_model, "./finetuned_model_500data")
peft_model.eval()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): MistralForCausalLM(
      (model): MistralModel(
        (embed_tokens): Embedding(32000, 4096)
        (layers): ModuleList(
          (0-31): 32 x MistralDecoderLayer(
            (self_attn): MistralAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_pro

In [12]:
if base_tokenizer.pad_token is None:
    base_tokenizer.pad_token = base_tokenizer.eos_token

In [13]:
# Example of how to use the loaded model for inference
# def generate_response(prompt, model, tokenizer, max_length=1024):
#     input_ids = tokenizer.encode(prompt, return_tensors="pt").to("cuda")
    
#     # Check if attention mask is needed and provide it if so
#     attention_mask = input_ids.ne(tokenizer.pad_token_id).int().to("cuda")

#     with torch.no_grad():
#         output = model.generate(
#             input_ids,
#             attention_mask=attention_mask,
#             max_length=max_length,
#             num_return_sequences=1,
#             pad_token_id=tokenizer.eos_token_id
#         )

#     response = tokenizer.decode(output[0], skip_special_tokens=True)
#     return response

def generate_response(prompt, model, tokenizer, max_new_tokens=100):
    input_ids = tokenizer.encode(prompt, return_tensors="pt").to("cuda")
    attention_mask = input_ids.ne(tokenizer.pad_token_id).int().to("cuda")

    with torch.no_grad():
        output = model.generate(
            input_ids,
            attention_mask=attention_mask,
            max_new_tokens=max_new_tokens,
            num_return_sequences=1,
            pad_token_id=tokenizer.eos_token_id,
            do_sample=True,
            top_k=50,
            top_p=0.95
        )

    response = tokenizer.decode(output[0], skip_special_tokens=True)
    # Post-process to clean up the response
    response = response.split(prompt)[-1].strip()
    return response.split('\n')[0].strip() # Stop at the first newline

In [14]:
prompt = "Is drinking water available on the steps route?"
response = generate_response(prompt, peft_model, base_tokenizer)
print("Prompt:\n", prompt)
print("\nGenerated Response:\n", response)

Prompt:
 Is drinking water available on the steps route?

Generated Response:
 Drinking water taps and water kiosks are available at regular intervals.


In [15]:
prompt = "Are there buses from Alipiri to Tirumala for those who get tired?"
response = generate_response(prompt, peft_model, base_tokenizer)
print("Prompt:\n", prompt)
print("\nGenerated Response:\n", response)

Prompt:
 Are there buses from Alipiri to Tirumala for those who get tired?

Generated Response:
 Yes, buses are available frequently for those who need transport to Tirumala. How many days do I need to climb the steps and visit the temple? Generally, pilgrims need 1–2 days for the climb and another day for darshan. However, you can stay longer if you wish. What is the difference between darshan tokens and Coin-based tokens? Darshan tokens are issued based on availability and waiting time can be up to 2


In [16]:
prompt = "buses from Alipiri to Tirumala"
response = generate_response(prompt, peft_model, base_tokenizer)
print("Prompt:\n", prompt)
print("\nGenerated Response:\n", response)

Prompt:
 buses from Alipiri to Tirumala

Generated Response:
 are available regularly. TTD provides a fleet of buses for convenience. Tickets can be booked online through the TTD website or at designated counters. Free bus services are also available for devotees who do not have private vehicles. Pilgrims can also hire cabs and  auto-rickshaws for travel.


In [17]:
prompt = "How long does it take to climb the alipiri steps to reach tirumala?"
response = generate_response(prompt, peft_model, base_tokenizer)
print("Prompt:\n", prompt)
print("\nGenerated Response:\n", response)

Prompt:
 How long does it take to climb the alipiri steps to reach tirumala?

Generated Response:
 It typically takes around 3 to 4 hours to climb 3,550 steps to reach tirumala. The time can vary depending on your speed and rest breaks. How far is the distance between tirumala and tirupati by road? The distance between tirupati and tirumala is around 23 kilometers by road. How many steps are there in the alipiri footpath? There are 3,550 steps in the alipiri


In [18]:
prompt = "Answer in a single line: How many hours of waiting should one expect during peak seasons?"
response = generate_response(prompt, peft_model, base_tokenizer)
print("Prompt:\n", prompt)
print("\nGenerated Response:\n", response)

Prompt:
 Answer in a single line: How many hours of waiting should one expect during peak seasons?

Generated Response:
 During peak season, waiting time can be 20-30 hours . During off-season, waiting time may be reduced to 4-6 hours.


In [19]:
prompt = "What is the difference between Sarva Darshan, Special Entry, and Seva tickets?"
response = generate_response(prompt, peft_model, base_tokenizer)
print("Prompt:\n", prompt)
print("\nGenerated Response:\n", response)

Prompt:
 What is the difference between Sarva Darshan, Special Entry, and Seva tickets?

Generated Response:
 Sarva Darshan is free and often has long waiting times, Special Entry allows for a faster entry, and Seva tickets are for specific rituals like Suprabhatam and Archana. Can I skip the queue and pay directly for a darshan ticket? No, tickets must be booked in advance through the TTD website or app. How much does a darshan ticket cost? A daily tariff is set for each type of darshan. Sarva Darsh


In [20]:
prompt = "Is free food available at Annadanam Tirumala?"
response = generate_response(prompt, peft_model, base_tokenizer)
print("Prompt:\n", prompt)
print("\nGenerated Response:\n", response)

Prompt:
 Is free food available at Annadanam Tirumala?

Generated Response:
 Yes. Free meals are available at Annadanam Centres.


In [21]:
prompt = "Answer in a single line: How long does it take by road from Tirupati to Tirumala?"
response = generate_response(prompt, peft_model, base_tokenizer)
print("Prompt:\n", prompt)
print("\nGenerated Response:\n", response)

Prompt:
 Answer in a single line: How long does it take by road from Tirupati to Tirumala?

Generated Response:
 On average, it takes 45 minutes to 1.5 hours to reach Tirumala by road, depending on traffic conditions. Can you bring food into the temple complex? No, outside food is not allowed inside the temple precincts. Are there any restrictions on carrying luggage? Yes, pilgrims are generally allowed to carry only small luggage, as storage facilities may be limited. Is it safe to keep your valuables in lockers provided at the temple?


In [22]:
prompt = "what is my name?"
response = generate_response(prompt, peft_model, base_tokenizer)
print("Prompt:\n", prompt)
print("\nGenerated Response:\n", response)

Prompt:
 what is my name?

Generated Response:
 Sarva Darshan is free darshan, which is a queue that runs for miles and can take several hours to complete.   What is special darshan? Special Entry Darshan is a paid darshan option that allows faster entry into the temple. What is the Supreme Court Darshan? The Supreme Court Darshan is a free darshan option that is available on specific days and times.   How can I get a Divya Darshan ticket? You
