In [None]:

import os
import sys

# Llama-Index core pieces
from llama_index.core import (
    Settings, StorageContext, ServiceContext, VectorStoreIndex
)
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core.node_parser import HierarchicalNodeParser, SentenceSplitter
from llama_index.core.retrievers import VectorIndexRetriever, RouterRetriever

# optional BM25 (comment out if the plug-in isn’t available)
from llama_index.retrievers.bm25 import BM25Retriever

# correct post-processor path
from llama_index.core.postprocessor import SentenceTransformerRerank

from llama_index.core.query_engine import RetrieverQueryEngine

from llama_index.core.prompts import PromptTemplate
from llama_index.core import (
    SimpleDirectoryReader, VectorStoreIndex, Settings, ServiceContext
)
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SentenceTransformerRerank
from sentence_transformers import CrossEncoder


import qdrant_client
from qdrant_client.http import models as rest               
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import 

from rich.console import Console
from dotenv import load_dotenv


  from .autonotebook import tqdm as notebook_tqdm


In [None]:


console = Console()

# 1) Read .env (expects at least QDRANT_URL, optional QDRANT_API)
load_dotenv()

QDRANT_URL = os.getenv("QDRANT_URL")
QDRANT_API = os.getenv("QDRANT_API", "")

# 2) Basic sanity check — stop early if URL missing
if not QDRANT_URL:
    console.print("QDRANT_URL not set in .env")
    raise SystemExit(1)

console.print(f"Env loaded – Qdrant endpoint: {QDRANT_URL}")

In [None]:
#  Language model: local Ollama running Llama-3-8B
from llama_index.core import Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.ollama import Ollama

llm = Ollama(
    base_url=os.getenv("OLLAMA_URL"),   
    model="llama3.1:8b",
    request_timeout=60.0,                  
)

#  Embedding model: BGE-large (≈1 GB VRAM).  Fallback to base if OOM.

try:
    embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en")
except RuntimeError as oom:                  # GPU out-of-memory
    console.print("Falling back to bge-base-en-v1.5")
    embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")

Settings.llm = llm
Settings.embed_model = embed_model           # register globally
console.print("LLM and embedding model ready")

In [None]:

COLLECTION = "data"                                         # one-PDF project

# ── 1) Build the client ──────
client = qdrant_client.QdrantClient(
    url=QDRANT_URL,   # cname endpoint
    api_key=QDRANT_API,                    # None ⇒ no auth
    https=True,
    port=443,
    prefer_grpc=False,                                          # stick to REST
    timeout=60.0,
)

# quick ping (raises if DNS or TLS fails)
client.get_collections()
console.print("Connected to Qdrant")

# ── 2) Ensure the collection exists (creates it if absent) ────
existing = {c.name for c in client.get_collections().collections}

if COLLECTION not in existing:
    # determine vector size once from the embed model
    vec_dim = len(embed_model.get_query_embedding("dim-check"))
    client.create_collection(
        collection_name=COLLECTION,
        vectors_config=rest.VectorParams(
            size=vec_dim,
            distance=rest.Distance.COSINE,
        ),
        optimizers_config=rest.OptimizersConfigDiff(memmap_threshold=20000),
    )
    console.print(
        f"Created new collection '{COLLECTION}' (dim = {vec_dim})")
else:
    console.print(f"  Collection '{COLLECTION}' already exists")

# ── 3) Wrap it for Llama-Index ────────────────────────────────────
vector_store = QdrantVectorStore(client=client, collection_name=COLLECTION)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

console.print(f"Vector store ready → collection: {COLLECTION}")

In [5]:
# 1)  Load & split PDF(s)

docs_dir = "docs"
documents = SimpleDirectoryReader(docs_dir, required_exts=[".pdf"]).load_data()

splitter = SentenceSplitter(chunk_size=150, chunk_overlap=40)
nodes = splitter.get_nodes_from_documents(documents)
console.print(
    f"  Parsed {len(nodes)} chunks from {len(documents)} PDF(s)")

# 2)  Embed + store in Qdrant

index = VectorStoreIndex(nodes, storage_context=storage_context)
console.print("  Chunks embedded and stored in Qdrant")

# 3)  Build a vector retriever  (top-k = 20)

vector_retriever = VectorIndexRetriever(index=index, similarity_top_k=30)

# 4)  Cross-encoder reranker  — robust to old & new signatures

MODEL_NAME = "cross-encoder/ms-marco-MiniLM-L-12-v2"

try:
    # Newer API: keyword args (top_n first), model as string
    reranker = SentenceTransformerRerank(top_n=12, model=MODEL_NAME)
except TypeError:
    # Older API: (model, top_n) positional
    reranker = SentenceTransformerRerank(MODEL_NAME, 8)


# 5)  Assemble the query engine  – proper PromptTemplate

QA_PROMPT = PromptTemplate(
    template=(
        "You are an academic advisor for the IIT-Madras BS programme. "
        "Answer fully and quote numbers exactly as they appear in the context.\n\n"
        "If the context does not explicitly give a number, answer ‘Not specified in the document’.\n\n"
        "<context>\n{context_str}\n</context>\n\n"
        "Q: {query_str}\nA:"
    )
)

query_engine = RetrieverQueryEngine.from_args(
    retriever=vector_retriever,
    llm=llm,
    node_postprocessors=[reranker],
    response_mode="compact",
    text_qa_template=QA_PROMPT,
)

# 6)  Helper function


def answer(query: str) -> str:
    """Ask a question and return the model’s answer as plain text."""
    try:
        return query_engine.query(query).response
    except Exception as exc:
        console.print(f"  Query failed: {exc}")
        return ""


console.print("  Query engine started ")

In [6]:
console.print(answer(" what CGPA does it require after completing BS-level credits to be eligible for the MS by research pathway?"))

In [7]:
console.print(answer("what is the overall time limit to finish the entire programme?"))

In [8]:
console.print(answer("what is the maximum number of final exams (full-course + make-up) that a student can register for in a single term?"))