In [14]:
import re  

def load_text(file_path):  
    with open(file_path, 'r', encoding='utf-8') as file:  
        text = file.read()  
    return text  

def preprocess_text(text):  
    # Remove non-English characters and symbols  
    text = re.sub(r'[^a-zA-Z\s]', '', text)  # Keep only English letters and whitespace  
    text = ' '.join(text.split())  # Remove extra spaces  
    text = text.lower()  # Convert to lowercase  
    return text  

def split_into_chunks(text, chunk_size=400):  # Reduced chunk size  
    chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]  
    return chunks  

# Load and preprocess the text  
file_path = "/kaggle/input/bhagwat-gita/The Bhagavad Gita.txt"  # Replace with your file path  
text = load_text(file_path)  
cleaned_text = preprocess_text(text)  
chunks = split_into_chunks(cleaned_text)  

# Save preprocessed chunks (optional)  
with open('preprocessed_chunks.txt', 'w', encoding='utf-8') as file:  
    for chunk in chunks:  
        file.write(chunk + '\n')  

In [15]:
!pip install -U langchain-community



In [16]:
# Step 2: Generate embeddings and create a FAISS vector database  
from sentence_transformers import SentenceTransformer
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from typing import List, Optional

def create_vector_database(
    chunks: List[str], 
    model_name: str = 'sentence-transformers/all-MiniLM-L6-v2',
    save_path: Optional[str] = None
) -> FAISS:
    """
    Creates a FAISS vector database from text chunks using HuggingFace embeddings.
    
    Args:
        chunks (List[str]): List of text chunks to be embedded
        model_name (str): Name of the sentence transformer model to use
        save_path (Optional[str]): Path to save the vector database. If None, database won't be saved
    
    Returns:
        FAISS: A FAISS vector database containing the embeddings
        
    Raises:
        ValueError: If chunks list is empty
    """
    if not chunks:
        raise ValueError("Chunks list cannot be empty")
    
    # Initialize the embedding model
    embedding_function = HuggingFaceEmbeddings(
        model_name=model_name,
        model_kwargs={'device': 'cpu'}
    )
    
    # Create FAISS vector database
    vector_db = FAISS.from_texts(
        texts=chunks,
        embedding=embedding_function
    )
    
    # Save the database if path is provided
    if save_path:
        vector_db.save_local(save_path)
    
    return vector_db

In [18]:
!pip install faiss-cpu
!pip install huggingface_hub[hf_xet]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl (30.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m48.6 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.10.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting hf-xet>=0.1.4 (from huggingface_hub[hf_xet])
  Downloading hf_xet-1.0.3-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (494 bytes)
Downloading hf_xet-1.0.3-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (53.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.8/53.8 MB[0m [31m28.1 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: hf-xet
Successfully installed hf-xet-1.0.3


In [19]:
from sentence_transformers import SentenceTransformer  
from langchain.vectorstores import FAISS  

# Generate embeddings with truncation  
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')  
embeddings = model.encode(chunks, truncate=True)  # Truncate excess tokens  

# Create and save the vector database  
vector_db = FAISS.from_embeddings(  
    text_embeddings=list(zip(chunks, embeddings)),  
    embedding=embeddings[0]  
)  
vector_db.save_local("faiss_vector_db")  

Batches:   0%|          | 0/53 [00:00<?, ?it/s]

In [20]:
def retrieve_relevant_chunks(query, vector_db, model, top_k=3):  
    query_embedding = model.encode([query])  
    relevant_chunks = vector_db.similarity_search_by_vector(query_embedding[0], k=top_k)  
    return relevant_chunks  

# Example retrieval  
query = "What is the concept of Karma in the Bhagavad Gita?"  
relevant_chunks = retrieve_relevant_chunks(query, vector_db, model)  
for i, chunk in enumerate(relevant_chunks):  
    print(f"Chunk {i+1}:\n{chunk.page_content}\n")  


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Chunk 1:
 categorically states that the best path for one to achieve divine spiritual knowledge is for one to focus on performing ones assigned duties as an offering to the lord without expectations on the outcome nishkama the bhagavad gita karma such a realized soul would be rid of the consequences of good and bad deeds and would be eligible for liberation among all activities those that lead to attainmen

Chunk 2:
e spiritual knowledge taught in the gita is a summary of the scriptures such as pancha ratra that was taught by lord vishnu to deities such as brahma vayu rudra indra surya yama the ashwini twins etc in verse the example of king janaka was quoted to demonstrate the practice of nishkama karma among householders who have family and societal responsibilities the three verses to give examples of other

Chunk 3:
learning while doing or like a good lesson that we usually learn at the height of misery and peril no wonder that the bhagavad gita is a prescribed text in india and els

In [22]:
!pip install google-generativeai langchain-google-genai sentence-transformers faiss-cpu  

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting langchain-google-genai
  Downloading langchain_google_genai-2.1.3-py3-none-any.whl.metadata (4.7 kB)
Collecting filetype<2.0.0,>=1.2.0 (from langchain-google-genai)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
INFO: pip is looking at multiple versions of langchain-google-genai to determine which version is compatible with other requirements. This could take a while.
Collecting langchain-google-genai
  Downloading langchain_google_genai-2.1.2-py3-none-any.whl.metadata (4.7 kB)
  Downloading langchain_google_genai-2.1.1-py3-none-any.whl.metadata (4.7 kB)
  Downloading langchain_google_genai-2.1.0-py3-none-any.whl.metadata (3.6 kB)
  Downloading langchain_google_genai-2.0.11-py3-none-any.whl.metadata (3.6 kB)
  Downloading langchain_google_genai-2.0.10-py3-none-any.whl.metadata (3.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6

In [25]:
!pip install google-generativeai sentence-transformers scikit-learn numpy  

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




In [27]:
import google.generativeai as genai  
from sentence_transformers import SentenceTransformer  
import numpy as np  
import os  
from sklearn.metrics.pairwise import cosine_similarity  

# 1. Configure Google Gemini  
os.environ['GOOGLE_API_KEY'] = 'AIzaSyDx7yJCneYXXTXP1I4ed03QN0saGwYRjHI'  # Replace with your actual API key  
genai.configure(api_key=os.environ['GOOGLE_API_KEY'])  

# 2. Initialize embedding model (no LangChain)  
embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')  

# 3. Load your documents (replace with your actual document loading logic)  
documents = [  
    "The Bhagavad Gita teaches...",  # Your document 1  
    "Karma yoga emphasizes...",      # Your document 2  
    # Add all your documents here  
]  

# 4. Pre-compute embeddings  
document_embeddings = embedding_model.encode(documents)  

def retrieve_relevant_documents(query, k=3):  
    """Semantic search without FAISS"""  
    query_embedding = embedding_model.encode(query)  
    similarities = cosine_similarity([query_embedding], document_embeddings)[0]  
    most_relevant = np.argsort(similarities)[-k:][::-1]  
    return [documents[i] for i in most_relevant]  

def get_answer(question):  
    try:  
        # Retrieve context  
        context = "\n".join(retrieve_relevant_documents(question))  
        
        # Generate answer using pure Gemini API  
        model = genai.GenerativeModel('gemini-1.5-pro-latest')  
        response = model.generate_content(  
            f"""Answer this question about the Bhagavad Gita:  
            
            Question: {question}  
            
            Context: {context}  
            
            Provide a detailed answer with relevant verses:"""  
        )  
        return response.text  
    except Exception as e:  
        return f"Error: {str(e)}"  

# Example usage  
if __name__ == "__main__":  
    question = "Explain the concept of Dharma in the Bhagavad Gita"  
    answer = get_answer(question)  
    print("Question:", question)  
    print("Answer:", answer)  

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Question: Explain the concept of Dharma in the Bhagavad Gita
Answer: The concept of Dharma in the Bhagavad Gita is complex and multifaceted, going beyond simple "duty" or "righteousness."  It's a nuanced concept encompassing one's individual nature (svadharma), cosmic order, action performed with selfless intent, and the path towards liberation.

Here's a breakdown:

* **Svadharma (One's Own Dharma):** The Gita emphasizes that everyone has a unique dharma based on their individual nature (guna) and social position (varna).  Fulfilling this svadharma, even imperfectly, is better than trying to perfectly follow someone else's dharma.  Krishna encourages Arjuna to fight because it's his duty as a warrior, essential to upholding the social order and his own inherent nature.  This is not a rigid caste system argument, but rather about recognizing and fulfilling one's inherent potential and responsibilities within the larger cosmic order.

> **Bhagavad Gita 3.35:**  "It is far better to perf

In [29]:
    question = "What is concept of Bhaktiyog and how it is different from gyanyog"  
    answer = get_answer(question)  
    print("Question:", question)  
    print("Answer:", answer) 

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Question: What is concept of Bhaktiyog and how it is different from gyanyog
Answer: The Bhagavad Gita presents multiple paths (yogas) to achieve liberation (moksha) from the cycle of birth and death. Bhakti Yoga and Jnana Yoga are two prominent paths among them, though intertwined and ultimately leading to the same goal.  They differ significantly in their approach and focus:

**Bhakti Yoga: The Path of Devotion**

Bhakti Yoga is the path of loving devotion to a personal God (often Krishna in the Gita's context). It emphasizes cultivating a deep, personal relationship with the divine through practices like prayer, chanting, worship, and selfless service. The essence of Bhakti Yoga lies in surrendering one's ego and will to the divine, offering all actions and their fruits as a loving offering.  It focuses on the heart, emphasizing love and faith as the primary means of attaining liberation.

* **Key characteristics of Bhakti Yoga:**
    * **Love and Surrender:**  The central element is

In [30]:
    question = "Explain the concept of krishna as Superior of all and give examples to support it"  
    answer = get_answer(question)  
    print("Question:", question)  
    print("Answer:", answer) 

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Question: Explain the concept of krishna as Superior of all and give examples to support it
Answer: The Bhagavad Gita presents Krishna not merely as a charioteer or even a revered teacher, but as the Supreme Being, the ultimate reality, and the source of all existence.  This concept of Krishna's supremacy is woven throughout the text and supported by several key arguments and demonstrations:

**1.  Cosmic Form (Vishvarupa):**  Perhaps the most dramatic demonstration of Krishna's supremacy is his revelation of the Vishvarupa – the Universal Form – to Arjuna in Chapter 11. This awe-inspiring vision showcases the entire universe residing within Krishna, including all beings, past, present, and future. Arjuna witnesses the creation and destruction of worlds within Krishna, confirming his absolute power and encompassing nature.

* **Verse 11.15-16:** "I see You without beginning, middle, or end, infinite in power with limitless arms. The sun and moon are Your eyes, the burning fire Your mou