In [1]:
import sys
from pathlib import Path

PROJECT_ROOT =Path(Path.cwd()).parent  # adjust if notebook isn't at repo root
# If your notebook is in e.g. notebooks/, use: Path.cwd().parent

sys.path.insert(0, str(PROJECT_ROOT))


### Set up the 2 key LangChain objects: retriever and llm

In [2]:
from src.retriever import get_retriever
db_path=Path(str(PROJECT_ROOT)) / "vectors"
retriever=get_retriever(db_path=db_path)
retriever.invoke("who is Mamoon Rashid ?")

[Document(id='90e7fa82-6e26-4b53-adc8-8fcf2e9937ff', metadata={'source': 'd:\\LLM\\Projects\\LLM-RAG-private-knowldge-worker/data/processed/pdf_markdown\\research_papers\\An_Efficient_Deep_Learning_based_Approach_for_the_Detection_of_Brain_Tumors.md', 'type': 'research_papers'}, page_content='202000611@vupune.ac.in 202001143@vupune.ac.in 202001369@vupune.ac.in\nJanvi Anand Pagariya Mamoon Rashid\nResearch Center of Excellence for Research Center of Excellence for\nHealth Informatics Health Informatics\nVishwakarma University Vishwakarma University\nPune, India. Pune, India.\n202001239@vupune.ac.in mamoon.rashid@vupune.ac.in\nAbstract‚Äî Deep learning has stretched out its roots even neurologists to treat is appallingly small, which is our\nmore in our daily lives. As a society, we are witnessing small devastating reality [4].\nchanges in lifestyle such as self-driving cars, Google Assistant,\nThe typical technique used by neurologists to identify'),
 Document(id='daaeb115-466a-462d-96e

### Advanced RAG Rerank the chunks 


In [3]:
from pydantic import BaseModel,Field

class RankOrder(BaseModel):
   order: list[int] = Field(description="he order of relevance of chunks, from most relevant to least relevant, by chunk id number")

In [4]:
from openai import OpenAI
ollama_host = "http://localhost:11434"  # Adjust if your Ollama server is running on a different URL or port
ollama_base_url = f"{ollama_host}/v1"
ollama_model="llama3.2"
ollama_client = OpenAI(base_url=ollama_base_url, api_key="ollama")

In [5]:
from src.rag_system import rewrite_query,fetch_unranked_chunks,merge_chunks,rerank

  llm = Ollama(model="llama3.2", base_url=ollama_host, temperature=0)


In [6]:
question="who is Mamoon Rashid ?"
unranked_chunks=fetch_unranked_chunks(question, retriever)
unranked_chunks

[Document(id='90e7fa82-6e26-4b53-adc8-8fcf2e9937ff', metadata={'source': 'd:\\LLM\\Projects\\LLM-RAG-private-knowldge-worker/data/processed/pdf_markdown\\research_papers\\An_Efficient_Deep_Learning_based_Approach_for_the_Detection_of_Brain_Tumors.md', 'type': 'research_papers'}, page_content='202000611@vupune.ac.in 202001143@vupune.ac.in 202001369@vupune.ac.in\nJanvi Anand Pagariya Mamoon Rashid\nResearch Center of Excellence for Research Center of Excellence for\nHealth Informatics Health Informatics\nVishwakarma University Vishwakarma University\nPune, India. Pune, India.\n202001239@vupune.ac.in mamoon.rashid@vupune.ac.in\nAbstract‚Äî Deep learning has stretched out its roots even neurologists to treat is appallingly small, which is our\nmore in our daily lives. As a society, we are witnessing small devastating reality [4].\nchanges in lifestyle such as self-driving cars, Google Assistant,\nThe typical technique used by neurologists to identify'),
 Document(id='daaeb115-466a-462d-96e

In [7]:
question = "Who is the director of UMA?"
chunks = fetch_unranked_chunks(question, retriever)

In [8]:
reranked = rerank(question, chunks)
reranked

input chunks:, [Document(id='8fcf903b-5b57-4e26-b4a6-69c3f0abaa4e', metadata={'source': 'd:\\LLM\\Projects\\LLM-RAG-private-knowldge-worker/data/processed/repo_summaries\\UMA-V-2.md', 'type': 'repo_summaries'}, page_content='<!-- Generated: 2026-02-15T03:03:41.012793Z | Model: gpt-4.1-nano -->\n\n# UMA-V-2 Repository Documentation\n\n## Overview\nThe **UMA-V-2** repository appears to be a comprehensive collection of web-based educational resources focused on anatomy, biology, and chemistry practicals. It includes static web pages, 3D models, multimedia content, and backend PHP scripts for user management and data handling. The repository is intended for students and educators to access interactive lab simulations, practical instructions, and assessment tools within a virtual laboratory environment.\n\n---'), Document(id='a7e4a20e-dd1e-4333-8167-440ad5b8faed', metadata={'source': 'd:\\LLM\\Projects\\LLM-RAG-private-knowldge-worker/data/processed/pdf_markdown\\internships\\uma-internship

[Document(id='a7e4a20e-dd1e-4333-8167-440ad5b8faed', metadata={'source': 'd:\\LLM\\Projects\\LLM-RAG-private-knowldge-worker/data/processed/pdf_markdown\\internships\\uma-internship-completion.md', 'type': 'internships'}, page_content='# UMA ‚Äì Internship / Project Completion Certificate (2024)\n\n## Certificate Type\n**Certificate of Completion**\n\n## Issued By\n**Universidad Mar√≠a Auxiliadora (UMA)**\n\n## Recipient\n**Prathamesh Uravane**\n\n## Project Title\n**Virtual Tutor for Comprehensive Student Monitoring**\n\n## Duration\n**8 March 2024 ‚Äì 7 July 2024**\n\n## Description\nThis certificate confirms successful completion of the project focused on building a **virtual tutor** to support comprehensive student monitoring. The project mentions evaluating student **attendance, performance, and attentiveness**, including use of **facial recognition** and automation of report generation for academic staff.'),
 Document(id='8fcf903b-5b57-4e26-b4a6-69c3f0abaa4e', metadata={'source':

### make rag message 

In [9]:
SYSTEM_PROMPT_TEMPLATE = """
You are a helpful, knowledgeable assistant with access to a user's personal knowledge base.
Your role is to answer questions about the user's background, experience, achievements, and projects based on provided context.

while answering questions:
- Dont refere any document.
- understand the nuance of the question and depend on that provide short or long detailed answer.
- Maintain a polite and friendly tone
- If information is not available in the provided context, clearly state that you don't have that information
- don't mention name of any document use it for your context only.
- While answering strictly do not mentionany reference also , like "as per document 1, document 2, according to knowledge base" etc.

Context:
{context}
"""



In [None]:
def _history_to_messages(history):
    msgs = []
    for pair in history or []:
        # pair can be tuple/list like (user, assistant) or [user, assistant]
        if not pair or len(pair) != 2:
            continue
        user_msg, assistant_msg = pair

        if user_msg:
            msgs.append({"role": "user", "content": str(user_msg)})
        if assistant_msg:
            msgs.append({"role": "assistant", "content": str(assistant_msg)})

    return msgs

def make_rag_messages(question, history, chunks):
    context = "\n\n".join(
        f"Extract from {chunk.metadata.get('source','unknown')}:\n{chunk.page_content}"
        for chunk in chunks
    )
    system_prompt = SYSTEM_PROMPT_TEMPLATE.format(context=context)

    return (
        [{"role": "system", "content": system_prompt}]
        + _history_to_messages(history)
        + [{"role": "user", "content": question}]
    )


In [11]:
que="where did Prathamesh completed SSC?"

query=rewrite_query(que, [])
query

'"Prathamesh education SSC location"'

In [12]:
def fetch_context(original_question,retriever,top_k=8):
    rewritten_question = rewrite_query(original_question)
    chunks1 = fetch_unranked_chunks(original_question, retriever)
    chunks2 = fetch_unranked_chunks(rewritten_question, retriever)
    chunks = merge_chunks(chunks1, chunks2)
    reranked = rerank(original_question, chunks)
    return reranked[:top_k]


In [13]:
from tenacity import retry, wait_exponential
# wait = wait_exponential(multiplier=1, min=10, max=240)
# @retry(wait=wait)
def answer_question(question: str, history: list[dict] = []) -> tuple[str, list]:
    """
    Answer a question using RAG and return the answer and the retrieved context
    """
    retriever=get_retriever(db_path=db_path)
    chunks = fetch_context(question, retriever)
    messages = make_rag_messages(question, history, chunks)
    print("Messages sent to LLM:",messages)
    response = ollama_client.chat.completions.create(model=ollama_model, messages=messages)
    # return response.choices[0].message.content, chunks
    return response.choices[0].message.content

In [14]:
# question="where did Prathamesh completed Btech, tell me about his btech journey?"
# answer,chunks =answer_question(question, history=[])
# print(answer)

In [15]:
import gradio as gr 
gr.ChatInterface(answer_question).launch(inbrowser=True)

  self.chatbot = Chatbot(


* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.




input chunks:, [Document(id='4669674a-d3c6-4118-824b-1903e8a136b2', metadata={'type': 'resume', 'source': 'd:\\LLM\\Projects\\LLM-RAG-private-knowldge-worker/data/processed/pdf_markdown\\Resume\\AI_ML Resume 5 Prathamesh Uravane.md'}, page_content='Prathamesh Uravane\nWashington DC, Baltimore Area | +1 (732) 318-9234 | upratham2002@gmail.com\nlinkedin.com/in/upratham/ | https://github.com/upratham|Google Scholar Profile\nEDUCATION\nUniversity of Maryland, College Park Expected: May 2027\nMaster of Science in Applied Machine Learning ; GPA: 4.0/ 4.0 College Park, Maryland\nVishwakarma University May 2024\nB.Tech in Artificial Intelligence and Data Science ; GPA: 3.75 / 4 Pune, India\n‚óè Featured in Times of India for Developing real time fall detection system\n‚óè AI/ML Core Team Member, Google Developer Student Club\nTECHNICAL SKILLS\nMachine Learning: Sci-kit Learn, Keras, Tensorflow, PyTorch.HuggingFace,LLMs'), Document(id='1d2f649d-951e-4fd6-a703-1f73971d9afd', metadata={'source': 

Traceback (most recent call last):
  File "d:\LLM\Projects\LLM-RAG-private-knowldge-worker\.venv\Lib\site-packages\gradio\queueing.py", line 759, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\LLM\Projects\LLM-RAG-private-knowldge-worker\.venv\Lib\site-packages\gradio\route_utils.py", line 354, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\LLM\Projects\LLM-RAG-private-knowldge-worker\.venv\Lib\site-packages\gradio\blocks.py", line 2191, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\LLM\Projects\LLM-RAG-private-knowldge-worker\.venv\Lib\site-packages\gradio\blocks.py", line 1696, in call_function
    prediction = await fn(*processed_input)
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\LLM\Projects\LLM-RAG-private-knowldge-worker\.venv\Lib\site-packages\grad