# RAG Sandbox

In [17]:
import os

import nest_asyncio
from langfuse.llama_index import LlamaIndexCallbackHandler
from llama_index.core import Settings, StorageContext, load_index_from_storage
from llama_index.core.callbacks import CallbackManager
from llama_index.core.evaluation import (FaithfulnessEvaluator,
                                         RetrieverEvaluator)
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.llms.ollama import Ollama
from rich import print

nest_asyncio.apply()

In [2]:
langfuse_callback_handler = LlamaIndexCallbackHandler(
    public_key="pk-lf-363e8a50-c2ef-4622-b47b-0fd9db3b90eb",
    secret_key=os.environ["LANGFUSE_SECRET_KEY"],
    host="http://localhost:3000",
)
Settings.callback_manager = CallbackManager([langfuse_callback_handler])

In [4]:
MODEL = "llama3.2:1b"
# MODEL = "deepseek-r1:1.5b"
# MODEL = "gemma2:2b"
# MODEL = "llama3.2:3b"
# MODEL = "mistral:7b"
# MODEL = "deepseek-r1:7b"
# MODEL = "llama3.1:8b"
# MODEL = "gemma2:9b"
# MODEL = "phi4:14b"
# MODEL = "deepseek-r1:14b"
# MODEL = "llama3.3:70b"

EMBEDDER = "all-minilm"  # 384
# EMBEDDER = "nomic-embed-text" # 768
# EMBEDDER = "bge-m3" # 1024
# EMBEDDER = MODEL

llm = Ollama(model=MODEL, request_timeout=60.0)
# Settings.llm = llm
Settings.embed_model = OllamaEmbedding(model_name=EMBEDDER)

In [None]:
response = llm.complete("What is the capital of France?")
print(response)

In [7]:
# response = llm.stream_complete("Tell me a short story about a robot.")
# for token in response:
#     print(token.delta, end="", flush=True)

In [10]:
storage_context = StorageContext.from_defaults(persist_dir="../out/simple_default/")
index = load_index_from_storage(storage_context)
query_engine = index.as_query_engine()

evaluator = FaithfulnessEvaluator(llm=llm)

In [None]:
# response = query_engine.query("Who is Tyler?")
# response = query_engine.query("How many documents are there?")
response = query_engine.query("What are the documents about?")
# print(response)
print(response.response)
# print(response.source_nodes)
print(response.metadata)

eval_result = evaluator.evaluate_response(response=response)
print(eval_result)
# print(str(eval_result.passing))

In [None]:
retriever_evaluator = RetrieverEvaluator.from_metric_names(
    ["mrr", "hit_rate", "precision", "recall", "ap"],
    retriever=query_engine.retriever,
)
eval_result = retriever_evaluator.evaluate(
    query="Who is Tyler?",
    expected_ids=[
        # "node_id1",
        # "node_id2",
        "13e70559-884b-4ea5-b318-36d53ff1caaa",
    ],
)
print(eval_result)