In [10]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.llms import Ollama
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
import os

In [12]:
# Prompt template
prompt = PromptTemplate.from_template("""
Answer the question based on the context below.
If you can't answer the question based on the context, respond with "mujhe nahi pata".

Context:
{context}

Question:
{question}

Answer:
""")

# Initialize LLM and Parser
llm = Ollama(model="llama3.2")
parser = StrOutputParser()

# Create the chain
chain = prompt | llm | parser

In [14]:
# --- IMPORTANT ---
# Change this path to the location of your PDF file
pdf_path = "/Users/pulkitchauhan/coding/ollam/1723042610822.pdf" 
# For example: pdf_path = "C:/Users/YourUser/Documents/report.pdf" or pdf_path = "./data/my_paper.pdf"

if not os.path.exists(pdf_path):
    print(f"Error: The file '{pdf_path}' was not found.")
    print("Please update the 'pdf_path' variable with the correct path to your PDF.")
else:
    # 1. Load and split the PDF
    print("Loading and splitting the PDF...")
    loader = PyPDFLoader(pdf_path)
    pages = loader.load_and_split()
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    split_docs = splitter.split_documents(pages)
    print(f"PDF split into {len(split_docs)} chunks.")

    # 2. Create embeddings and store in a Chroma vector store
    print("Creating embeddings and storing them in ChromaDB...")
    embeddings = OllamaEmbeddings(model="nomic-embed-text")
    vectorstore = Chroma.from_documents(
        documents=split_docs,
        embedding=embeddings,
        collection_name="pdf_qa_collection"  # A unique name for the collection
    )
    
    # 3. Create the retriever
    retriever = vectorstore.as_retriever()
    print("✅ PDF processed and retriever is ready!")

Loading and splitting the PDF...
PDF split into 8 chunks.
Creating embeddings and storing them in ChromaDB...


  embeddings = OllamaEmbeddings(model="nomic-embed-text")
2025-08-05 23:51:14.311 INFO    chromadb.telemetry.product.posthog: Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.


✅ PDF processed and retriever is ready!


In [16]:
# --- Ask your question here ---
question = "uski kitni girlfriend hai?"
# -----------------------------

if 'retriever' in locals():
    # 1. Retrieve relevant documents based on the question
    print(f"Retrieving relevant documents for the question: '{question}'")
    docs = retriever.get_relevant_documents(question)
    
    # 2. Combine the content of the retrieved documents into a single context string
    context = "\n\n".join([doc.page_content for doc in docs])
    
    # 3. Invoke the chain with the context and question
    print("Invoking the LLM to generate an answer...")
    response = chain.invoke({"context": context, "question": question})
    
    # 4. Print the final answer
    print("\n" + "="*50)
    print("🤖 Answer:")
    print("="*50)
    print(response)
else:
    print("\nRetriever not initialized. Please run the PDF processing cell (In[4]) successfully first.")

Retrieving relevant documents for the question: 'uski kitni girlfriend hai?'
Invoking the LLM to generate an answer...

🤖 Answer:
Mujhe nahi pata. (I don't know) Shivam Shrivastava ki profile mein koi information nahin hai jo uske relationship status ko show karti ho.
