In [None]:
!pip install peft
!pip install transformers
!pip install bitsandBytes
!pip install datasets

In [1]:
from datasets import load_dataset

In [None]:
!pip install datasets

In [None]:
from datasets import load_dataset

In [None]:
import torch
import os

if torch.cuda.is_available():
    device = torch.device("cuda")
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU is not available, using CPU instead")

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
from transformers import LlamaTokenizer
from huggingface_hub import notebook_login
from datasets import load_dataset
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model

In [None]:
os.environ["HF_TOKEN"] = "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"

In [None]:
base_model_id = "meta-llama/Llama-2-7b-chat-hf"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    quantization_config=bnb_config,
    device_map="auto",
)

In [None]:
import os

file_name = "BhagavadGita.txt"
file_path = os.path.abspath(file_name)

print("Full path:", file_path)


In [None]:
from datasets import load_dataset, Features, Value

features = Features({'text': Value('string')})

train_dataset = load_dataset(
    "text",
    data_files={"train": ["/content/data/BhagavadGita.txt", "/content/data/siva_puranam.txt"]},
    split="train",
    features=features
)


In [None]:
train_dataset["text"][1000]

In [None]:
tokenizer = LlamaTokenizer.from_pretrained(base_model_id, use_fast=False, trust_remote_code=True, add_eos_token=True)

In [None]:
if tokenizer.pad is None:
  tokenizer.add_special_tokens({"pad_token": tokenizer.eos_token})

In [None]:
tokenized_train_dataset=[]
for phrase in train_dataset:
    tokenized_train_dataset.append(tokenizer(phrase["text"]))

In [None]:
tokenized_train_dataset[100]

In [None]:
tokenizer.eos_token

In [None]:
model.gradient_checkpointing_enable()
model=prepare_model_for_kbit_training(model)

config = LoraConfig(
    r=8,
    lora_alpha=64,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    bias="none",
    lora_dropout=0.05,
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)

In [None]:
from transformers import Trainer, TrainingArguments, DataCollatorForLanguageModeling

# Fix: Ensure tokenizer has a pad_token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token  # or use tokenizer.add_special_tokens({'pad_token': '[PAD]'})

# Optional: Resize model embeddings if you've added a new pad token
# model.resize_token_embeddings(len(tokenizer))  # Only if you added new token

# Setup Trainer
trainer = Trainer(
    model=model,
    train_dataset=tokenized_train_dataset,
    args=TrainingArguments(
        output_dir="./finetunedModel",
        per_device_train_batch_size=2,
        gradient_accumulation_steps=2,
        num_train_epochs=5,
        learning_rate=1e-4,
        max_steps=100,
        fp16=False,
        optim="paged_adamw_8bit",  # Use standard "adamw_torch" if "paged_adamw_8bit" is unsupported
        logging_dir="./log",
        save_strategy="epoch",
        save_steps=50,
        logging_steps=20,
    ),
    data_collator=DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False),
)

# Optional: Prevent warning for use_cache during training
model.config.use_cache = False

# Train the model
trainer.train()


In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import LlamaTokenizer, BitsAndBytesConfig
from peft import PeftModel

base_model_id = "meta-llama/Llama-2-7b-chat-hf"

nf4Config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

tokenizer = LlamaTokenizer.from_pretrained(base_model_id, use_fast=False, trust_remote_code=True, add_eos_token=True)
model = AutoModelForCausalLM.from_pretrained(base_model_id, quantization_config=nf4Config, device_map="auto", trust_remote_code=True, use_auth_token=True)

In [None]:
tokenizer = LlamaTokenizer.from_pretrained(base_model_id, use_fast=False, trust_remote_code=True, add_eos_token=True)

modelFinetuned = PeftModel.from_pretrained(model, "finetunedModel/checkpoint-100")

In [None]:
user_question = "what is mentioned in shiva puran about lord shiva?"

eval_prompt = f"Question: {user_question} just answer this question accurately and concisely. \n"

promptTokenized = tokenizer(eval_prompt, return_tensors="pt").to("cuda")
modelFinetuned.eval()

with torch.no_grad():
  print(tokenizer.decode(modelFinetuned.generate(**promptTokenized, max_new_tokens=1024)[0], skip_special_tokens=True))
  torch.cuda.empty_cache()

In [None]:
user_question = "what is the role of lord krishna in bhagavad gita?"

eval_prompt = f"Question: {user_question} just answer this question accurately and concisely. \n"

promptTokenized = tokenizer(eval_prompt, return_tensors="pt").to("cuda")
modelFinetuned.eval()

with torch.no_grad():
  print(tokenizer.decode(modelFinetuned.generate(**promptTokenized, max_new_tokens=1024)[0], skip_special_tokens=True))
  torch.cuda.empty_cache()