In [1]:
from PyPDF2 import PdfReader

**The Document Location**

In [2]:
pdfPath = "pdfContent/document.pdf"

**Function for loading the file**

In [None]:
def loadFile(path):
    reader = PdfReader(path)
    pages  = []
   

    for i , page in enumerate(reader.pages):
        text = page.extract_text()
        pages.append({"page": i+ 1,"text":text})

    return pages

**Splitting data into chunks**

In [None]:
def splitIntoChunks(pages,chunkSize=1000,overlap=200):
    chunks = []
    metadatas = []

    global_chunk_index = 0

    for p in pages:
        text = p['text']
        page_no = p['page']
        text_length = len(text)
        start = 0

        while start < text_length:
            end = start + chunkSize
            chunk = text[start:end]
            chunk_id  =f"p{page_no}-c{global_chunk_index}"
            meta = {
                "page": page_no,
                "start_char": start,
                "end_char": min(end, text_length),
                "chunk_id": chunk_id
            }

            chunks.append(chunk)
            metadatas.append(meta)

            global_chunk_index += 1
            start = end - overlap

    return chunks,metadatas

In [41]:
pages = loadFile(pdfPath)
print("total pages : ",len(pages))

chunks,metadatas = splitIntoChunks(pages)
print("total chunks created : ",len(chunks))

total pages :  6
total chunks created :  15


**Loading Embedding Model**

In [6]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm





**ChromaDB setup**

In [7]:
import chromadb

# creating chromadb instance which will store all chunks and embeddings in it

chroma_client = chromadb.Client()
collection  = chroma_client.create_collection(name = "pdf_reader" )

**Inserting data into CHROMADB**

In [42]:
for chunk, meta in zip(chunks, metadatas):
    emb = model.encode([chunk])[0].tolist()
    collection.add(
        documents=[chunk],
        embeddings=[emb],
        ids=[meta["chunk_id"]],
        metadatas=[meta]
    )

print("vector db is ready")

vector db is ready


### Integrating LLM through Ollama

In [9]:
import ollama

def runLLM(prompt):
    response = ollama.generate(
        # model = "tinyllama", # you have to pull "ollama pull tinyllama" on your local ollama terminal
        model = "mistral", # you have to pull "ollama pull mistral" on your local ollama terminal
        prompt = prompt
    )
    return response["response"]

**RAG Function**

*Re-Ranking*

In [10]:
from sentence_transformers import CrossEncoder
reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")


In [None]:
def rag(query):
    print("The Question is:", query)

    # STEP 1 — Expand & embed query
    expanded = f"Explain in detail: {query} (in context of armwrestling)"
    query_emb = model.encode([expanded])[0].tolist()

    # STEP 2 — Retrieve top K documents (with metadata)
    results = collection.query(
        query_embeddings=[query_emb],
        n_results=5,
        include=["documents", "metadatas"]
    )

    retrieved_chunks = results["documents"][0]
    retrieved_metas  = results["metadatas"][0]

    # STEP 3 — Re-ranking with CrossEncoder
    pairs = [(query, chunk) for chunk in retrieved_chunks]
    scores = reranker.predict(pairs)

    # zip scores + chunks + metadata together for safe sorting
    ranked = sorted(
        zip(scores, retrieved_chunks, retrieved_metas),
        reverse=True,
        key=lambda x: x[0]
    )

    # STEP 4 — Take best top 2
    final = ranked[:2]
    final_chunks = [x[1] for x in final]
    final_metas  = [x[2] for x in final]
    final_scores = [float(x[0]) for x in final]

    # STEP 5 — Build context
    context = "\n\n".join(final_chunks)

    # STEP 6 — Strict No-Hallucination Prompt
    prompt = f"""
You are a STRICT RAG assistant. Follow the rules exactly.

CONTEXT:
{context}

QUESTION:
{query}

IMPORTANT ANSWERING RULES:
1. Use ONLY the text in the provided context.
2. Do NOT guess, assume, or infer anything outside the context.
3. If the exact term is NOT mentioned in the context, say:
   "The term '{query}' is not explicitly mentioned in the document."
4. If related concepts ARE described, summarize ONLY those parts.
5. Keep the answer factual and concise.
6. Never use phrases like “it is implied”.
"""

    # STEP 7 — Run LLM
    answer = runLLM(prompt)

    # STEP 8 — Return sources for frontend
    sources = []
    for meta, score in zip(final_metas, final_scores):
        sources.append({
            "chunk_id": meta.get("chunk_id"),
            "page": meta.get("page"),
            "start_char": meta.get("start_char"),
            "end_char": meta.get("end_char"),
            "score": score
        })

    return answer, sources


**Asking question from user and pass it to the LLM**

In [None]:
# userQuery = input("What's your question ?")
# rag(userQuery)

The Question is :  what kind of common injuries happen in armwrestling ? 
Answer:

 Common Injuries: Distal humerus (upper arm) fractures, elbow/biceps tendon strains, wrist ligament and carpal injuries, shoulder strains.


**FASTAPI Deployment**

In [13]:
from fastapi import FastAPI, Form
from fastapi.middleware.cors import CORSMiddleware
import uvicorn
import threading

In [14]:
app = FastAPI()

In [15]:
# Allow frontend to call backend ( cors )

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_headers = ["*"],
    allow_methods= ["*"]
)

In [43]:
@app.post("/ask")
async def askQuestion(question: str = Form(...)):
    answer, sources = rag(question)         
    return {
        "answer": answer,
        "sources": sources
    }


@app.get("/")
async def home():
    return {"message":"RAG Backend is running"}

**Running the FastAPI server from notebook**

In [None]:
def runServer():
    uvicorn.run(app, host="127.0.0.1", port=5173)

threading.Thread(target=runServer,daemon=True).start()

INFO:     Started server process [11008]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:5173 (Press CTRL+C to quit)


INFO:     127.0.0.1:63385 - "GET / HTTP/1.1" 200 OK
INFO:     127.0.0.1:63385 - "GET /favicon.ico HTTP/1.1" 404 Not Found
INFO:     127.0.0.1:51295 - "GET /ask HTTP/1.1" 405 Method Not Allowed
INFO:     127.0.0.1:51295 - "GET / HTTP/1.1" 200 OK
The Question is :  what kind of common injuries happen in armwrestling ? 
Answer:

 Common injuries in armwrestling include distal humerus (upper arm) fractures, elbow/biceps tendon strains, wrist ligament and carpal injuries, and shoulder strains.
INFO:     127.0.0.1:55006 - "POST /ask HTTP/1.1" 200 OK
The Question is :  what kind of common injuries happen in armwrestling ? 
Answer:

 Common Injuries in Armwrestling include: Distal humerus (upper arm) fractures, elbow/biceps tendon strains, wrist ligament and carpal injuries, and shoulder strains.
INFO:     127.0.0.1:64005 - "POST /ask HTTP/1.1" 200 OK
The Question is :  what kind of common injuries happen in armwrestling ? 
INFO:     127.0.0.1:61767 - "POST /ask HTTP/1.1" 200 OK
The Question i