# ðŸ“˜ RAG Evaluation â€” Precision@K & Recall@K

This notebook evaluates the RAG subsystem using FAISS + Azure embeddings.
We compute **Precision@K** and **Recall@K** manually.

In [None]:
from backend.rag.retriever import retrieve_top_k
import json
import pandas as pd

# Example dataset for evaluation
queries = [
    {
        "query": "What are SEBI rules for liquid funds?",
        "expected_keywords": ["91 days", "liquid", "money-market"]
    },
    {
        "query": "Explain equity mutual funds",
        "expected_keywords": ["equity", "stocks"]
    }
]

def precision_recall_at_k(retrieved_texts, expected_keywords):
    hits = sum(any(k.lower() in chunk.lower() for k in expected_keywords) for chunk in retrieved_texts)
    precision = hits / len(retrieved_texts) if retrieved_texts else 0
    recall = hits / len(expected_keywords) if expected_keywords else 0
    return precision, recall

results = []
for item in queries:
    ctx = retrieve_top_k(item['query'], top_k=5)
    retrieved_texts = [c["text"] for c in ctx]
    p, r = precision_recall_at_k(retrieved_texts, item["expected_keywords"])
    results.append({
        "query": item["query"],
        "precision@5": p,
        "recall@5": r
    })

df = pd.DataFrame(results)
df