In [1]:
from pathlib import Path
import chromadb
from chromadb.utils import embedding_functions

# Path
Relative_Database_path = "./chroma_Data_v5"
Absolute_Database_path = Path(Relative_Database_path).resolve()
collection_name = "anlp_rag_collection"

# Initialize Chroma
client = chromadb.PersistentClient(path=str(Absolute_Database_path))
print(f"[INFO] ChromaDB client initialized at: {Absolute_Database_path}")

# Correct embedding function: use model_name (primitive), not a model instance
embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(
    model_name="all-MiniLM-L6-v2"
)

# Load existing collection
collection = client.get_collection(
    name=collection_name,
    embedding_function=embedding_function
)

print(f"[SUCCESS] Loaded collection '{collection_name}'")
print(f"[INFO] Count: {collection.count()}")


[INFO] ChromaDB client initialized at: C:\Users\Gaming window\Desktop\ANLP_Assignment_2\RAG-A2\VectorDB\chroma_Data_v5


  from .autonotebook import tqdm as notebook_tqdm


[SUCCESS] Loaded collection 'anlp_rag_collection'
[INFO] Count: 126


In [4]:
# === Groq + RAG + RAGAS Evaluation ===
# Prereqs:
# pip install ragas datasets groq tqdm sentence-transformers numpy

import os
import json
import numpy as np
import time
import asyncio
from datetime import datetime
from tqdm import tqdm
from datasets import Dataset
from groq import Groq

from ragas import evaluate
from ragas.metrics import faithfulness, answer_relevancy
from ragas.embeddings.base import HuggingfaceEmbeddings
from sentence_transformers import SentenceTransformer
from langchain_core.prompt_values import PromptValue
from langchain_core.outputs import Generation, LLMResult
import os
from groq import Groq

# Set the API key for Groq
# os.environ["GROQ_API_KEY"] = "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" abhinav key 1
os.environ["GROQ_API_KEY"] = "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"

# Initialize Groq client
groq_client = Groq(api_key=os.environ["GROQ_API_KEY"])

# ==== CONFIG ====
# Use the API key already set in previous cell
groq_client = Groq(api_key=os.environ["GROQ_API_KEY"])

testbed_path = "../RAG Results/test_bed.json"
output_metrics_path = "../RAG Results/multiquery_rag_metrics.txt"
cached_answers_path = "../RAG Results/cached_rag_answers.json"  # NEW: Cache file
TOP_K = 3

GROQ_RAG_MODEL = "llama-3.3-70b-versatile"
GROQ_RAGAS_MODEL = "llama-3.3-70b-versatile"
EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"

# Rate limiting config for llama-3.3-70b-versatile:
# RPM: 30 (requests per minute)
# RPD: 1,000 (requests per day)
# TPM: 12,000 (tokens per minute)
# TPD: 100,000 (tokens per day)
REQUEST_DELAY = 7.5  # seconds between requests (allows ~24 RPM, safe margin below 30 RPM)
BATCH_SIZE = 5  # Process in small batches to avoid hitting token limits
MAX_RETRIES = 5  # Retry failed requests

print("Exists:", os.path.exists(testbed_path))
print("Size:", os.path.getsize(testbed_path), "bytes")

with open(testbed_path, "r", encoding="utf-8") as f:
    first_200 = f.read(200)
print("First few characters:\n", first_200)


# ==== 1️⃣ Load test data ====
with open(testbed_path, "r", encoding="utf-8") as f:
    test_data = json.load(f)

print(f"[INFO] Loaded {len(test_data)} QA pairs from testbed.")


# ==== 2️⃣ Groq generation with retry logic ====
def generate_with_groq(prompt, model_name=GROQ_RAG_MODEL, retries=MAX_RETRIES):
    for attempt in range(retries):
        try:
            chat_completion = groq_client.chat.completions.create(
                messages=[
                    {
                        "role": "user",
                        "content": prompt,
                    }
                ],
                model=model_name,
                temperature=0.7,
            )
            time.sleep(REQUEST_DELAY)
            return chat_completion.choices[0].message.content.strip()
        except Exception as e:
            if "rate_limit" in str(e).lower():
                wait_time = REQUEST_DELAY * (attempt + 2)  # Exponential backoff
                print(f"[WARN] Rate limit hit. Waiting {wait_time}s before retry {attempt + 1}/{retries}")
                time.sleep(wait_time)
            else:
                print(f"[ERROR] Groq API call failed (attempt {attempt + 1}): {e}")
                if attempt == retries - 1:
                    time.sleep(REQUEST_DELAY)
                    return None
                time.sleep(REQUEST_DELAY)
    return None


# ==== 3️⃣ Groq wrapper for RAGAS following BaseRagasLLM interface ====
from ragas.llms.base import BaseRagasLLM as RagasBaseLLM
from ragas.run_config import RunConfig
from collections import deque
import time

MAX_RPM = 30       # requests per minute
MAX_TPM = 12000    # tokens per minute
WINDOW = 60        # 60 seconds

class GroqRagasLLM(RagasBaseLLM):
    """Groq LLM wrapper implementing RAGAS BaseRagasLLM interface."""
    
    def __init__(self, model_name):
        super().__init__(run_config=RunConfig())
        self.model_name = model_name
        self.client = Groq(api_key=os.environ["GROQ_API_KEY"])
        
        # sliding windows
        self.request_times = deque()
        self.token_counts = deque()

    def _rate_limit_check(self, estimated_tokens=500):
        """Block until we are below RPM and TPM."""
        now = time.time()
        
        # drop events older than 60 seconds
        while self.request_times and now - self.request_times[0] > WINDOW:
            self.request_times.popleft()

        while self.token_counts and now - self.token_counts[0][0] > WINDOW:
            self.token_counts.popleft()

        # compute current usage in window
        current_rpm = len(self.request_times)
        current_tpm = sum(tokens for _, tokens in self.token_counts)

        # compute wait time if needed
        while current_rpm >= MAX_RPM or current_tpm + estimated_tokens >= MAX_TPM:
            # sleep until oldest entry expires
            oldest_req = self.request_times[0] if self.request_times else now
            oldest_tok = self.token_counts[0][0] if self.token_counts else now
            wait_until = min(oldest_req, oldest_tok) + WINDOW
            sleep_time = max(wait_until - time.time(), 0.1)
            print(f"[RATE LIMIT] Waiting {sleep_time:.1f}s (RPM={current_rpm}, TPM={current_tpm})")
            time.sleep(sleep_time)
            
            # refresh window
            now = time.time()
            while self.request_times and now - self.request_times[0] > WINDOW:
                self.request_times.popleft()
            while self.token_counts and now - self.token_counts[0][0] > WINDOW:
                self.token_counts.popleft()

            current_rpm = len(self.request_times)
            current_tpm = sum(tokens for _, tokens in self.token_counts)

        # Register this request
        self.request_times.append(time.time())
        self.token_counts.append((time.time(), estimated_tokens))

    def _extract_text_from_prompt(self, prompt: PromptValue) -> str:
        """Extract text from PromptValue object."""
        # PromptValue has .to_string() method
        if hasattr(prompt, "to_string"):
            return prompt.to_string()
        # Fallback to string conversion
        return str(prompt)

    def generate_text(
        self,
        prompt: PromptValue,
        n: int = 1,
        temperature: float = 0.01,
        stop=None,
        callbacks=None,
    ) -> LLMResult:
        """Synchronous generation - required by BaseRagasLLM."""
        prompt_text = self._extract_text_from_prompt(prompt)
        generations = []
        
        for i in range(n):
            for attempt in range(MAX_RETRIES):
                try:
                    # Rate limit check
                    estimated_tokens = len(prompt_text.split()) * 1.3
                    self._rate_limit_check(int(estimated_tokens))
                    
                    chat_completion = self.client.chat.completions.create(
                        messages=[{"role": "user", "content": prompt_text}],
                        model=self.model_name,
                        temperature=temperature,
                    )
                    
                    text = chat_completion.choices[0].message.content.strip()
                    generations.append([Generation(text=text)])
                    break  # Success
                    
                except Exception as e:
                    if "rate_limit" in str(e).lower() and attempt < MAX_RETRIES - 1:
                        wait_time = REQUEST_DELAY * (attempt + 2)
                        print(f"[WARN] Rate limit hit. Waiting {wait_time}s (attempt {attempt + 1})")
                        time.sleep(wait_time)
                    else:
                        print(f"[ERROR] Failed (attempt {attempt + 1}): {e}")
                        if attempt == MAX_RETRIES - 1:
                            generations.append([Generation(text=f"[Error: {e}]")])
                        else:
                            time.sleep(REQUEST_DELAY)
        
        return LLMResult(generations=generations)

    async def agenerate_text(
        self,
        prompt: PromptValue,
        n: int = 1,
        temperature: float = 0.01,
        stop=None,
        callbacks=None,
    ) -> LLMResult:
        """Asynchronous generation - required by BaseRagasLLM."""
        prompt_text = self._extract_text_from_prompt(prompt)
        generations = []
        
        for i in range(n):
            for attempt in range(MAX_RETRIES):
                try:
                    # Rate limit check
                    await asyncio.sleep(REQUEST_DELAY)
                    
                    # Run blocking SDK call in thread
                    chat_completion = await asyncio.to_thread(
                        self.client.chat.completions.create,
                        messages=[{"role": "user", "content": prompt_text}],
                        model=self.model_name,
                        temperature=temperature,
                    )
                    
                    text = chat_completion.choices[0].message.content.strip()
                    generations.append([Generation(text=text)])
                    break  # Success
                    
                except Exception as e:
                    if "rate_limit" in str(e).lower() and attempt < MAX_RETRIES - 1:
                        wait_time = REQUEST_DELAY * (attempt + 2)
                        print(f"[WARN] Rate limit hit. Waiting {wait_time}s (attempt {attempt + 1})")
                        await asyncio.sleep(wait_time)
                    else:
                        print(f"[ERROR] Failed (attempt {attempt + 1}): {e}")
                        if attempt == MAX_RETRIES - 1:
                            generations.append([Generation(text=f"[Error: {e}]")])
                        else:
                            await asyncio.sleep(REQUEST_DELAY)
        
        return LLMResult(generations=generations)

    def is_finished(self, response: LLMResult) -> bool:
        """Check if response is complete - required by BaseRagasLLM."""
        return True


# ==== 4️⃣ Check collection availability ====
try:
    collection.query(query_texts=["test"], n_results=1)
except NameError:
    print("\n[CRITICAL WARNING] The 'collection' object (ChromaDB) is NOT defined.")
    print("Please initialize your ChromaDB client/collection before running this cell.")
    raise SystemExit


# ==== 5️⃣ Generate records with caching and rate limiting ====
records = []

# Check if cached answers exist
if os.path.exists(cached_answers_path):
    print(f"[INFO] Found cached answers at '{cached_answers_path}'")
    try:
        with open(cached_answers_path, "r", encoding="utf-8") as f:
            cached_data = json.load(f)
        
        # Validate cache matches current test data
        if len(cached_data) == len(test_data):
            questions_match = all(
                cached_data[i]["question"] == test_data[i]["question"] 
                for i in range(len(test_data))
            )
            
            if questions_match:
                print(f"[INFO] Loading {len(cached_data)} cached answers (skipping generation)")
                records = cached_data
            else:
                print("[WARN] Cached questions don't match test data. Regenerating...")
        else:
            print(f"[WARN] Cache size mismatch ({len(cached_data)} vs {len(test_data)}). Regenerating...")
    except Exception as e:
        print(f"[ERROR] Failed to load cache: {e}. Regenerating...")

# Generate new answers if cache not usable
if not records:
    print(f"[INFO] Generating RAG answers with rate limiting (max 30 RPM)...")
    print(f"[INFO] Request delay: {REQUEST_DELAY}s | Batch size: {BATCH_SIZE}")
    
    for item in tqdm(test_data, desc="Generating Groq RAG answers"):
        question = item["question"]
        ideal_answer = item["ideal_answer"]

        retrieved = collection.query(query_texts=[question], n_results=TOP_K)
        retrieved_docs = retrieved["documents"][0]
        retrieved_context = "\n".join(retrieved_docs)

        prompt = (
            f"Context:\n{retrieved_context}\n\n"
            f"Question:\n{question}\n\nAnswer:"
        )

        generated_answer = generate_with_groq(prompt)
        if not generated_answer:
            generated_answer = f"[Fallback mock answer] Context excerpt: {retrieved_docs[0][:150]}..."

        records.append({
            "question": question,
            "contexts": retrieved_docs,
            "answer": generated_answer,
            "ground_truth": ideal_answer,
        })
        
        # Progress update every 5 questions
        if len(records) % 5 == 0:
            print(f"[INFO] Processed {len(records)}/{len(test_data)} questions")
    
    # Save generated answers to cache
    try:
        os.makedirs(os.path.dirname(cached_answers_path), exist_ok=True)
        with open(cached_answers_path, "w", encoding="utf-8") as f:
            json.dump(records, f, indent=2, ensure_ascii=False)
        print(f"[SUCCESS] Cached {len(records)} answers to '{cached_answers_path}'")
    except Exception as e:
        print(f"[WARN] Failed to save cache: {e}")


# ==== 6️⃣ Convert to HF Dataset ====
dataset = Dataset.from_list(records)
print(f"[INFO] Created dataset with {len(dataset)} samples")


# ==== 7️⃣ Custom HuggingFace Embedding Wrapper ====
class CustomHuggingfaceEmbeddings(HuggingfaceEmbeddings):
    """Implements both sync + async embedding methods for latest RAGAS."""
    def __init__(self, model_name: str):
        # ✅ Do not call super()
        self.model_name = model_name
        self.model = SentenceTransformer(model_name)

    # --- Sync methods ---
    def embed_documents(self, texts):
        return self.model.encode(texts, show_progress_bar=False).tolist()

    def embed_query(self, text):
        return self.model.encode([text], show_progress_bar=False).tolist()[0]

    # --- Async methods ---
    async def aembed_documents(self, texts):
        return self.embed_documents(texts)

    async def aembed_query(self, text):
        return self.embed_query(text)


# ==== 8️⃣ Evaluate with RAGAS ====
llm = GroqRagasLLM(GROQ_RAGAS_MODEL)
embeddings = CustomHuggingfaceEmbeddings(model_name=EMBED_MODEL)

print(f"\n[INFO] Starting RAGAS evaluation with {GROQ_RAGAS_MODEL}...")
print(f"[INFO] Rate limits: 30 RPM | 12K TPM | Using {REQUEST_DELAY}s delays")
print(f"[INFO] Estimated time: ~{len(dataset) * REQUEST_DELAY / 60:.1f} minutes")

start_time = time.time()

results = evaluate(
    dataset=dataset,
    metrics=[faithfulness, answer_relevancy],
    llm=llm,
    embeddings=embeddings
)

end_time = time.time()
elapsed_time = end_time - start_time

print(f"\n[SUCCESS] Evaluation completed in {elapsed_time / 60:.2f} minutes")


# ==== 9️⃣ Save Results ====
faithfulness_scores = results["faithfulness"]
answer_relevancy_scores = results["answer_relevancy"]

# ✅ Compute mean values
faithfulness_mean = float(np.mean(faithfulness_scores))
answer_relevancy_mean = float(np.mean(answer_relevancy_scores))

os.makedirs(os.path.dirname(output_metrics_path), exist_ok=True)

with open(output_metrics_path, "w", encoding="utf-8") as f:
    f.write("=== RAG Evaluation Metrics (Groq + RAGAS) ===\n")
    f.write(f"Timestamp: {datetime.now()}\n")
    f.write(f"Evaluation Duration: {elapsed_time / 60:.2f} minutes\n\n")
    f.write(f"RAG Generation Model: {GROQ_RAG_MODEL}\n")
    f.write(f"RAGAS Evaluation Model: {GROQ_RAGAS_MODEL}\n")
    f.write(f"Rate Limiting: {REQUEST_DELAY}s delay between requests\n")
    f.write(f"Cached Answers: {os.path.basename(cached_answers_path)}\n\n")
    f.write(f"Faithfulness (avg): {faithfulness_mean:.4f}\n")
    f.write(f"Answer Relevancy (avg): {answer_relevancy_mean:.4f}\n\n")
    f.write("Full Results:\n")
    f.write(str(results))

print(f"\n✅ Evaluation complete! Metrics saved to '{output_metrics_path}'")
print(f"Faithfulness (avg): {faithfulness_mean:.4f} | Answer Relevancy (avg): {answer_relevancy_mean:.4f}")
print(f"\n[TIP] To regenerate answers, delete: {cached_answers_path}")

Exists: True
Size: 3632 bytes
First few characters:
 [
    {
        "question": "How does Caesar first enter the play?",
        "ideal_answer": "In a triumphal procession; he has defeated the sons of his deceased rival, Pompey"
    },
{
"question": "W
[INFO] Loaded 25 QA pairs from testbed.
[INFO] Found cached answers at '../RAG Results/cached_rag_answers.json'
[INFO] Loading 25 cached answers (skipping generation)
[INFO] Created dataset with 25 samples

[INFO] Starting RAGAS evaluation with llama-3.3-70b-versatile...
[INFO] Rate limits: 30 RPM | 12K TPM | Using 7.5s delays
[INFO] Estimated time: ~3.1 minutes


Evaluating:   0%|          | 0/50 [00:00<?, ?it/s]

[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hi

Exception raised in Job[4]: TimeoutError()
Exception raised in Job[14]: TimeoutError()
Exception raised in Job[2]: TimeoutError()
Exception raised in Job[6]: TimeoutError()
Exception raised in Job[0]: TimeoutError()
Exception raised in Job[10]: TimeoutError()
Exception raised in Job[12]: TimeoutError()
Exception raised in Job[8]: TimeoutError()
Exception raised in Job[15]: TimeoutError()
Exception raised in Job[3]: TimeoutError()
Evaluating:   2%|▏         | 1/50 [03:00<2:27:01, 180.04s/it]Exception raised in Job[5]: TimeoutError()
Exception raised in Job[9]: TimeoutError()
Exception raised in Job[1]: TimeoutError()
Exception raised in Job[7]: TimeoutError()
Exception raised in Job[13]: TimeoutError()
Exception raised in Job[11]: TimeoutError()


[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hi

Exception raised in Job[18]: TimeoutError()
Exception raised in Job[22]: TimeoutError()
Exception raised in Job[16]: TimeoutError()
Exception raised in Job[20]: TimeoutError()
Evaluating:  34%|███▍      | 17/50 [06:00<10:04, 18.32s/it]  Exception raised in Job[17]: TimeoutError()
Exception raised in Job[21]: TimeoutError()
Exception raised in Job[19]: TimeoutError()
Exception raised in Job[23]: TimeoutError()
Exception raised in Job[24]: TimeoutError()
Exception raised in Job[28]: TimeoutError()
Exception raised in Job[26]: TimeoutError()
Exception raised in Job[30]: TimeoutError()
Exception raised in Job[27]: TimeoutError()
Exception raised in Job[25]: TimeoutError()
Exception raised in Job[31]: TimeoutError()
Exception raised in Job[29]: TimeoutError()


[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hit. Waiting 15.0s (attempt 1)
[WARN] Rate limit hi

Exception raised in Job[34]: TimeoutError()
Exception raised in Job[32]: TimeoutError()
Exception raised in Job[33]: TimeoutError()
Evaluating:  66%|██████▌   | 33/50 [09:00<04:02, 14.24s/it]Exception raised in Job[35]: TimeoutError()
Exception raised in Job[38]: TimeoutError()
Exception raised in Job[36]: TimeoutError()
Exception raised in Job[39]: TimeoutError()
Exception raised in Job[37]: TimeoutError()
Exception raised in Job[42]: TimeoutError()
Exception raised in Job[40]: TimeoutError()
Exception raised in Job[44]: TimeoutError()
Exception raised in Job[46]: TimeoutError()
Exception raised in Job[41]: TimeoutError()
Exception raised in Job[43]: TimeoutError()
Exception raised in Job[47]: TimeoutError()
Exception raised in Job[45]: TimeoutError()


[WARN] Rate limit hit. Waiting 15.0s (attempt 1)


Evaluating:  98%|█████████▊| 49/50 [10:05<00:09,  9.64s/it]

[WARN] Rate limit hit. Waiting 22.5s (attempt 2)
[WARN] Rate limit hit. Waiting 30.0s (attempt 3)
[WARN] Rate limit hit. Waiting 37.5s (attempt 4)


Exception raised in Job[48]: TimeoutError()
Evaluating: 100%|██████████| 50/50 [12:00<00:00, 14.40s/it]



[SUCCESS] Evaluation completed in 12.06 minutes

✅ Evaluation complete! Metrics saved to '../RAG Results/multiquery_rag_metrics.txt'
Faithfulness (avg): nan | Answer Relevancy (avg): nan

[TIP] To regenerate answers, delete: ../RAG Results/cached_rag_answers.json
