In [2]:
from pathlib import Path
import chromadb
from chromadb.utils import embedding_functions

# Path
Relative_Database_path = "./chroma_Data_v4"
Absolute_Database_path = Path(Relative_Database_path).resolve()
collection_name = "anlp_rag_collection"

# Initialize Chroma
client = chromadb.PersistentClient(path=str(Absolute_Database_path))
print(f"[INFO] ChromaDB client initialized at: {Absolute_Database_path}")

# Correct embedding function: use model_name (primitive), not a model instance
embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(
    model_name="all-MiniLM-L6-v2"
)

# Load existing collection
collection = client.get_collection(
    name=collection_name,
    embedding_function=embedding_function
)

print(f"[SUCCESS] Loaded collection '{collection_name}'")
print(f"[INFO] Count: {collection.count()}")


[INFO] ChromaDB client initialized at: C:\Users\micro\Desktop\Abhinav college\Resources\Sem 7\Advanced NLP\Assignment 2\RAG-A2\VectorDB\chroma_Data_v4
[SUCCESS] Loaded collection 'anlp_rag_collection'
[INFO] Count: 126
[SUCCESS] Loaded collection 'anlp_rag_collection'
[INFO] Count: 126


In [3]:
# === Groq + RAG + RAGAS Evaluation ===
# Prereqs:
# pip install ragas datasets groq tqdm sentence-transformers numpy

import os
import json
import numpy as np
import time
import asyncio
from datetime import datetime
from tqdm import tqdm
from datasets import Dataset
from groq import Groq

from ragas import evaluate
from ragas.metrics import faithfulness, answer_relevancy
from ragas.embeddings.base import HuggingfaceEmbeddings
from sentence_transformers import SentenceTransformer
from langchain_core.prompt_values import PromptValue
from langchain_core.outputs import Generation, LLMResult
import os
from groq import Groq

# Set the API key for Groq
os.environ["GROQ_API_KEY"] = "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"

# Initialize Groq client
groq_client = Groq(api_key=os.environ["GROQ_API_KEY"])

# ==== CONFIG ====
# Use the API key already set in previous cell
groq_client = Groq(api_key=os.environ["GROQ_API_KEY"])

testbed_path = "../RAG Results/test_bed.json"
output_metrics_path = "../RAG Results/multiquery_rag_metrics.txt"
cached_answers_path = "../RAG Results/cached_rag_answers.json"  # NEW: Cache file
TOP_K = 3

GROQ_RAG_MODEL = "llama-3.3-70b-versatile"
GROQ_RAGAS_MODEL = "llama-3.3-70b-versatile"
EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"

# Rate limiting config for llama-3.3-70b-versatile:
# RPM: 30 (requests per minute)
# RPD: 1,000 (requests per day)
# TPM: 12,000 (tokens per minute)
# TPD: 100,000 (tokens per day)
REQUEST_DELAY = 2.5  # seconds between requests (allows ~24 RPM, safe margin below 30 RPM)
BATCH_SIZE = 5  # Process in small batches to avoid hitting token limits
MAX_RETRIES = 3  # Retry failed requests

print("Exists:", os.path.exists(testbed_path))
print("Size:", os.path.getsize(testbed_path), "bytes")

with open(testbed_path, "r", encoding="utf-8") as f:
    first_200 = f.read(200)
print("First few characters:\n", first_200)


# ==== 1️⃣ Load test data ====
with open(testbed_path, "r", encoding="utf-8") as f:
    test_data = json.load(f)

print(f"[INFO] Loaded {len(test_data)} QA pairs from testbed.")


# ==== 2️⃣ Groq generation with retry logic ====
def generate_with_groq(prompt, model_name=GROQ_RAG_MODEL, retries=MAX_RETRIES):
    for attempt in range(retries):
        try:
            chat_completion = groq_client.chat.completions.create(
                messages=[
                    {
                        "role": "user",
                        "content": prompt,
                    }
                ],
                model=model_name,
                temperature=0.7,
            )
            time.sleep(REQUEST_DELAY)
            return chat_completion.choices[0].message.content.strip()
        except Exception as e:
            if "rate_limit" in str(e).lower():
                wait_time = REQUEST_DELAY * (attempt + 2)  # Exponential backoff
                print(f"[WARN] Rate limit hit. Waiting {wait_time}s before retry {attempt + 1}/{retries}")
                time.sleep(wait_time)
            else:
                print(f"[ERROR] Groq API call failed (attempt {attempt + 1}): {e}")
                if attempt == retries - 1:
                    time.sleep(REQUEST_DELAY)
                    return None
                time.sleep(REQUEST_DELAY)
    return None


# ==== 3️⃣ Groq wrapper for RAGAS following BaseRagasLLM interface ====
from ragas.llms.base import BaseRagasLLM as RagasBaseLLM
from ragas.run_config import RunConfig

class GroqRagasLLM(RagasBaseLLM):
    """Groq LLM wrapper implementing RAGAS BaseRagasLLM interface."""
    
    def __init__(self, model_name):
        super().__init__(run_config=RunConfig())
        self.model_name = model_name
        self.client = Groq(api_key=os.environ["GROQ_API_KEY"])
        self.request_count = 0
        self.last_request_time = time.time()

    def _rate_limit_check(self):
        """Ensure we don't exceed rate limits"""
        current_time = time.time()
        time_since_last = current_time - self.last_request_time
        
        # Ensure minimum delay between requests
        if time_since_last < REQUEST_DELAY:
            sleep_time = REQUEST_DELAY - time_since_last
            time.sleep(sleep_time)
        
        self.last_request_time = time.time()
        self.request_count += 1

    def _extract_text_from_prompt(self, prompt: PromptValue) -> str:
        """Extract text from PromptValue object."""
        # PromptValue has .to_string() method
        if hasattr(prompt, "to_string"):
            return prompt.to_string()
        # Fallback to string conversion
        return str(prompt)

    def generate_text(
        self,
        prompt: PromptValue,
        n: int = 1,
        temperature: float = 0.01,
        stop=None,
        callbacks=None,
    ) -> LLMResult:
        """Synchronous generation - required by BaseRagasLLM."""
        prompt_text = self._extract_text_from_prompt(prompt)
        generations = []
        
        for i in range(n):
            for attempt in range(MAX_RETRIES):
                try:
                    # Rate limit check
                    self._rate_limit_check()
                    
                    chat_completion = self.client.chat.completions.create(
                        messages=[{"role": "user", "content": prompt_text}],
                        model=self.model_name,
                        temperature=temperature,
                    )
                    
                    text = chat_completion.choices[0].message.content.strip()
                    generations.append([Generation(text=text)])
                    break  # Success
                    
                except Exception as e:
                    if "rate_limit" in str(e).lower() and attempt < MAX_RETRIES - 1:
                        wait_time = REQUEST_DELAY * (attempt + 2)
                        print(f"[WARN] Rate limit hit. Waiting {wait_time}s (attempt {attempt + 1})")
                        time.sleep(wait_time)
                    else:
                        print(f"[ERROR] Failed (attempt {attempt + 1}): {e}")
                        if attempt == MAX_RETRIES - 1:
                            generations.append([Generation(text=f"[Error: {e}]")])
                        else:
                            time.sleep(REQUEST_DELAY)
        
        return LLMResult(generations=generations)

    async def agenerate_text(
        self,
        prompt: PromptValue,
        n: int = 1,
        temperature: float = 0.01,
        stop=None,
        callbacks=None,
    ) -> LLMResult:
        """Asynchronous generation - required by BaseRagasLLM."""
        prompt_text = self._extract_text_from_prompt(prompt)
        generations = []
        
        for i in range(n):
            for attempt in range(MAX_RETRIES):
                try:
                    # Rate limit check
                    await asyncio.sleep(REQUEST_DELAY)
                    
                    # Run blocking SDK call in thread
                    chat_completion = await asyncio.to_thread(
                        self.client.chat.completions.create,
                        messages=[{"role": "user", "content": prompt_text}],
                        model=self.model_name,
                        temperature=temperature,
                    )
                    
                    text = chat_completion.choices[0].message.content.strip()
                    generations.append([Generation(text=text)])
                    break  # Success
                    
                except Exception as e:
                    if "rate_limit" in str(e).lower() and attempt < MAX_RETRIES - 1:
                        wait_time = REQUEST_DELAY * (attempt + 2)
                        print(f"[WARN] Rate limit hit. Waiting {wait_time}s (attempt {attempt + 1})")
                        await asyncio.sleep(wait_time)
                    else:
                        print(f"[ERROR] Failed (attempt {attempt + 1}): {e}")
                        if attempt == MAX_RETRIES - 1:
                            generations.append([Generation(text=f"[Error: {e}]")])
                        else:
                            await asyncio.sleep(REQUEST_DELAY)
        
        return LLMResult(generations=generations)

    def is_finished(self, response: LLMResult) -> bool:
        """Check if response is complete - required by BaseRagasLLM."""
        return True


# ==== 4️⃣ Check collection availability ====
try:
    collection.query(query_texts=["test"], n_results=1)
except NameError:
    print("\n[CRITICAL WARNING] The 'collection' object (ChromaDB) is NOT defined.")
    print("Please initialize your ChromaDB client/collection before running this cell.")
    raise SystemExit


# ==== 5️⃣ Generate records with caching and rate limiting ====
records = []

# Check if cached answers exist
if os.path.exists(cached_answers_path):
    print(f"[INFO] Found cached answers at '{cached_answers_path}'")
    try:
        with open(cached_answers_path, "r", encoding="utf-8") as f:
            cached_data = json.load(f)
        
        # Validate cache matches current test data
        if len(cached_data) == len(test_data):
            questions_match = all(
                cached_data[i]["question"] == test_data[i]["question"] 
                for i in range(len(test_data))
            )
            
            if questions_match:
                print(f"[INFO] Loading {len(cached_data)} cached answers (skipping generation)")
                records = cached_data
            else:
                print("[WARN] Cached questions don't match test data. Regenerating...")
        else:
            print(f"[WARN] Cache size mismatch ({len(cached_data)} vs {len(test_data)}). Regenerating...")
    except Exception as e:
        print(f"[ERROR] Failed to load cache: {e}. Regenerating...")

# Generate new answers if cache not usable
if not records:
    print(f"[INFO] Generating RAG answers with rate limiting (max 30 RPM)...")
    print(f"[INFO] Request delay: {REQUEST_DELAY}s | Batch size: {BATCH_SIZE}")
    
    for item in tqdm(test_data, desc="Generating Groq RAG answers"):
        question = item["question"]
        ideal_answer = item["ideal_answer"]

        retrieved = collection.query(query_texts=[question], n_results=TOP_K)
        retrieved_docs = retrieved["documents"][0]
        retrieved_context = "\n".join(retrieved_docs)

        prompt = (
            f"Context:\n{retrieved_context}\n\n"
            f"Question:\n{question}\n\nAnswer:"
        )

        generated_answer = generate_with_groq(prompt)
        if not generated_answer:
            generated_answer = f"[Fallback mock answer] Context excerpt: {retrieved_docs[0][:150]}..."

        records.append({
            "question": question,
            "contexts": retrieved_docs,
            "answer": generated_answer,
            "ground_truth": ideal_answer,
        })
        
        # Progress update every 5 questions
        if len(records) % 5 == 0:
            print(f"[INFO] Processed {len(records)}/{len(test_data)} questions")
    
    # Save generated answers to cache
    try:
        os.makedirs(os.path.dirname(cached_answers_path), exist_ok=True)
        with open(cached_answers_path, "w", encoding="utf-8") as f:
            json.dump(records, f, indent=2, ensure_ascii=False)
        print(f"[SUCCESS] Cached {len(records)} answers to '{cached_answers_path}'")
    except Exception as e:
        print(f"[WARN] Failed to save cache: {e}")


# ==== 6️⃣ Convert to HF Dataset ====
dataset = Dataset.from_list(records)
print(f"[INFO] Created dataset with {len(dataset)} samples")


# ==== 7️⃣ Custom HuggingFace Embedding Wrapper ====
class CustomHuggingfaceEmbeddings(HuggingfaceEmbeddings):
    """Implements both sync + async embedding methods for latest RAGAS."""
    def __init__(self, model_name: str):
        # ✅ Do not call super()
        self.model_name = model_name
        self.model = SentenceTransformer(model_name)

    # --- Sync methods ---
    def embed_documents(self, texts):
        return self.model.encode(texts, show_progress_bar=False).tolist()

    def embed_query(self, text):
        return self.model.encode([text], show_progress_bar=False).tolist()[0]

    # --- Async methods ---
    async def aembed_documents(self, texts):
        return self.embed_documents(texts)

    async def aembed_query(self, text):
        return self.embed_query(text)


# ==== 8️⃣ Evaluate with RAGAS ====
llm = GroqRagasLLM(GROQ_RAGAS_MODEL)
embeddings = CustomHuggingfaceEmbeddings(model_name=EMBED_MODEL)

print(f"\n[INFO] Starting RAGAS evaluation with {GROQ_RAGAS_MODEL}...")
print(f"[INFO] Rate limits: 30 RPM | 12K TPM | Using {REQUEST_DELAY}s delays")
print(f"[INFO] Estimated time: ~{len(dataset) * REQUEST_DELAY / 60:.1f} minutes")

start_time = time.time()

results = evaluate(
    dataset=dataset,
    metrics=[faithfulness, answer_relevancy],
    llm=llm,
    embeddings=embeddings
)

end_time = time.time()
elapsed_time = end_time - start_time

print(f"\n[SUCCESS] Evaluation completed in {elapsed_time / 60:.2f} minutes")


# ==== 9️⃣ Save Results ====
faithfulness_scores = results["faithfulness"]
answer_relevancy_scores = results["answer_relevancy"]

# ✅ Compute mean values
faithfulness_mean = float(np.mean(faithfulness_scores))
answer_relevancy_mean = float(np.mean(answer_relevancy_scores))

os.makedirs(os.path.dirname(output_metrics_path), exist_ok=True)

with open(output_metrics_path, "w", encoding="utf-8") as f:
    f.write("=== RAG Evaluation Metrics (Groq + RAGAS) ===\n")
    f.write(f"Timestamp: {datetime.now()}\n")
    f.write(f"Evaluation Duration: {elapsed_time / 60:.2f} minutes\n\n")
    f.write(f"RAG Generation Model: {GROQ_RAG_MODEL}\n")
    f.write(f"RAGAS Evaluation Model: {GROQ_RAGAS_MODEL}\n")
    f.write(f"Rate Limiting: {REQUEST_DELAY}s delay between requests\n")
    f.write(f"Cached Answers: {os.path.basename(cached_answers_path)}\n\n")
    f.write(f"Faithfulness (avg): {faithfulness_mean:.4f}\n")
    f.write(f"Answer Relevancy (avg): {answer_relevancy_mean:.4f}\n\n")
    f.write("Full Results:\n")
    f.write(str(results))

print(f"\n✅ Evaluation complete! Metrics saved to '{output_metrics_path}'")
print(f"Faithfulness (avg): {faithfulness_mean:.4f} | Answer Relevancy (avg): {answer_relevancy_mean:.4f}")
print(f"\n[TIP] To regenerate answers, delete: {cached_answers_path}")

Exists: True
Size: 3530 bytes
First few characters:
 [
    {
        "question": "How does Caesar first enter the play?",
        "ideal_answer": "In a triumphal procession; he has defeated the sons of his deceased rival, Pompey"
    },
{
"question": "W
[INFO] Loaded 25 QA pairs from testbed.
[INFO] Found cached answers at '../RAG Results/cached_rag_answers.json'
[INFO] Loading 25 cached answers (skipping generation)
[INFO] Created dataset with 25 samples
[INFO] Found cached answers at '../RAG Results/cached_rag_answers.json'
[INFO] Loading 25 cached answers (skipping generation)
[INFO] Created dataset with 25 samples

[INFO] Starting RAGAS evaluation with llama-3.3-70b-versatile...
[INFO] Rate limits: 30 RPM | 12K TPM | Using 2.5s delays
[INFO] Estimated time: ~1.0 minutes

[INFO] Starting RAGAS evaluation with llama-3.3-70b-versatile...
[INFO] Rate limits: 30 RPM | 12K TPM | Using 2.5s delays
[INFO] Estimated time: ~1.0 minutes


Evaluating:  10%|█         | 5/50 [00:07<00:52,  1.17s/it]

[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (att

Evaluating:  12%|█▏        | 6/50 [00:18<02:34,  3.51s/it]

[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 5.0s (att

Evaluating:  14%|█▍        | 7/50 [00:42<06:11,  8.64s/it]

[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01k9ys8bswepfshkmm14afrk4c` service tier `on_demand` on tokens per minute (TPM): Limit 12000, Used 11787, Requested 667. Please try again in 2.269999999s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01k9ys8bswepfshkmm14afrk4c` service tier `on_demand` on tokens per minute (TPM): Limit 12000, Used 11753, Requested 626. Please try again in 1.894999999s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit.

Evaluating:  16%|█▌        | 8/50 [00:48<05:25,  7.76s/it]

[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)


Evaluating:  18%|█▊        | 9/50 [00:55<05:14,  7.68s/it]

[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01k9ys8bswepfshkmm14afrk4c` service tier `on_demand` on tokens per minute (TPM): Limit 12000, Used 11751, Requested 1547. Please try again in 6.489999999s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01k9ys8bswepfshkmm14afrk4c` s

Exception raised in Job[16]: OutputParserException(Invalid json output: The output string did not satisfy the constraints given in the prompt. The task requires analyzing the complexity of each sentence in the answer and breaking it down into fully understandable statements without using pronouns. However, the provided output string contains an error message related to a rate limit being reached for a specific model, which does not comply with the required JSON schema. To fix this, the output should be reformatted according to the specified schema, focusing on generating statements that are free of pronouns and adhere to the given JSON structure. For instance, given the input question and answer, the output should be a JSON object containing an array of statements that are direct, understandable, and pronoun-free, such as: {"statements": ["The subject of the question was a historical figure.", "This figure was known for specific achievements."]}. Ensuring the output complies with the {

[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01k9ys8bswepfshkmm14afrk4c` service tier `on_demand` on tokens per minute (TPM): Limit 12000, Used 11984, Requested 1492. Please try again in 7.38s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01k9ys8bswepfshkmm14afrk4c` service tier `on_demand` on tokens per minute (TPM): Limit 12000, Used 11984, Requested 1492. Please try again in 7.38s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limi

Evaluating:  22%|██▏       | 11/50 [01:09<04:41,  7.21s/it]

[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01k9ys8bswepfshkmm14afrk4c` service tier `on_demand` on tokens per minute (TPM): Limit 12000, Used 11412, Requested 1733. Please try again in 5.725s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01k9ys8bswepfshkmm14afrk4c` service tier `on_demand` on tokens per minute (TPM): Limit 12000, Used 11412, Requested 1733. Please try again in 5.725s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit 

Exception raised in Job[15]: OutputParserException(Invalid json output: The output string did not satisfy the constraints given in the prompt. The input contains an error message indicating a rate limit has been reached for the model. To generate a question for the given answer and identify if the answer is noncommittal, the actual response is needed, not an error message. However, based on the provided error message, it is not possible to generate a question or determine noncommittal status as the error does not contain a response that can be analyzed. The error message is: {"error": {"message": "Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01k9ys8bswepfshkmm14afrk4c` service tier `on_demand` on tokens per minute (TPM): Limit 12000, Used 11986, Requested 455. Please try again in 2.205s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing", "type": "tokens", "code": "rate_limit_exceeded"}}. Given this context, the ques

[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01k9ys8bswepfshkmm14afrk4c` service tier `on_demand` on tokens per minute (TPM): Limit 12000, Used 11610, Requested 939. Please try again in 2.745s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01k9ys8bswepfshkmm14afrk4c` service tier `on_demand` on tokens per minute (TPM): Limit 12000, Used 11746, Requested 1836. Please try again in 7.91s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in org

Evaluating:  26%|██▌       | 13/50 [01:26<04:48,  7.80s/it]

[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)


Exception raised in Job[11]: OutputParserException(Invalid json output: The output string did not satisfy the constraints given in the prompt. Fix the output string and return it.\nPlease return the output in a JSON format that complies with the following schema as specified in JSON Schema: \{\"properties\": {\"text\": {\"title\": \"Text\", \"type\": \"string\"}}, \"required\": [\"text\"], \"title\": \"StringIO\", \"type\": \"object\"}\}
For troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE )
Evaluating:  28%|██▊       | 14/50 [01:37<05:14,  8.75s/it]

[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)


Evaluating:  30%|███       | 15/50 [01:45<04:57,  8.49s/it]

[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01k9ys8bswepfshkmm14afrk4c` service tier `on_demand` on tokens per minute (TPM): Limit 12000, Used 11099, Requested 1329. Please try again in 2.14s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/b

Evaluating:  32%|███▏      | 16/50 [01:55<05:04,  8.95s/it]

[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)


Evaluating:  34%|███▍      | 17/50 [02:03<04:46,  8.68s/it]

[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01k9ys8bswepfshkmm14afrk4c` service tier `on_demand` on tokens per minute (TPM): Limit 12000, Used 11104, Requested 1305. Please try again in 2.045s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01k9ys8bswepfshkmm14afrk4c` service tier `on_demand` on tokens per minute (TPM): Limit 12000, Used 11104, Requested 1305. Please tr

Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt statement_generator_prompt failed to parse output: The output parser failed to parse the output including retries.
Exception raised in Job[6]: RagasOutputParserException(The output parser failed to parse the output including retries.)
Evaluating:  36%|███▌      | 18/50 [02:12<04:41,  8.81s/it]Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt statement_generator_prompt failed to parse output: The output parser failed to parse the output including retries.
Exception ra

[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01k9ys8bswepfshkmm14afrk4c` service tier `on_demand` on tokens per minute (TPM): Limit 12000, Used 11910, Requested 1438. Please try again in 6.739999999s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01k9ys8bswepfshkmm14afrk4c` service tier `on_demand` on tokens per minute (TPM): Limit 12000, Used 11816, Requested 1960. Please try again in 8.879999999s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-ver

Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt statement_generator_prompt failed to parse output: The output parser failed to parse the output including retries.
Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt statement_generator_prompt failed to parse output: The output parser failed to parse the output including retries.
Exception raised in Job[4]: RagasOutputParserException(The output parser failed to parse the output including retries.)
Evaluating:  38%|███▊      | 19/50 [02:16<03:53,  7.54s/it]Exception ra

[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01k9ys8bswepfshkmm14afrk4c` service tier `on_demand` on tokens per minute (TPM): Limit 12000, Used 11802, Requested 1992. Please try again in 8.969999999s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01k9ys8bswepfshkmm14afrk4c` service tier `on_demand` on tokens per minute (TPM): Limit 12000, Used 11765, Requested 2010. Please try again in 8.875s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Wai

Evaluating:  40%|████      | 20/50 [02:21<03:20,  6.68s/it]

[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01k9ys8bswepfshkmm14afrk4c` service tier `on_demand` on tokens per minute (TPM): Limit 12000, Used 11549, Requested 1818. Please try again in 6.835s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)


Evaluating:  42%|████▏     | 21/50 [02:33<04:02,  8.36s/it]

[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01k9ys8bswepfshkmm14afrk4c` service tier `on_demand` on tokens per minute (TPM): Limit 12000, Used 11176, Requested 1926. Please try again in 5.51s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01k9ys8bswepfshkmm14afrk4c` service tier `on_demand` on tokens per minute (TPM): Limit 12000, Used 11176, Requested 1926. Please try again in 5.51s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limi

Evaluating:  44%|████▍     | 22/50 [02:38<03:20,  7.15s/it]

[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01k9ys8bswepfshkmm14afrk4c` service tier `on_demand` on tokens per minute (TPM): Limit 12000, Used 11430, Requested 2186. Please try again in 8.08s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01k9ys8bswepfshkmm14afrk4c` service tier `on_demand` on tokens per minute (TPM): Limit 12000, Used 11789, Requested 2225. Please try again in 10.07s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit r

Evaluating:  46%|████▌     | 23/50 [02:52<04:12,  9.37s/it]

[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01k9ys8bswepfshkmm14afrk4c` service tier `on_demand` on tokens per minute (TPM): Limit 12000, Used 10843, Requested 1784. Please try again in 3.135s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01k9ys8bswepfshkmm14afrk4c` service tier `on_demand` on tokens per minute (TPM): Limit 12000, Used 10843, Requested 1784. Please try again in 3.135s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_li

Exception raised in Job[2]: TimeoutError()
Exception raised in Job[0]: TimeoutError()
Exception raised in Job[0]: TimeoutError()
Evaluating:  48%|████▊     | 24/50 [03:00<03:47,  8.77s/it]Exception raised in Job[8]: TimeoutError()
Exception raised in Job[14]: TimeoutError()
Exception raised in Job[10]: TimeoutError()
Evaluating:  48%|████▊     | 24/50 [03:00<03:47,  8.77s/it]Exception raised in Job[8]: TimeoutError()
Exception raised in Job[14]: TimeoutError()
Exception raised in Job[10]: TimeoutError()
Exception raised in Job[12]: TimeoutError()
Exception raised in Job[13]: TimeoutError()
Evaluating:  60%|██████    | 30/50 [03:00<00:49,  2.47s/it]Exception raised in Job[12]: TimeoutError()
Exception raised in Job[13]: TimeoutError()
Evaluating:  60%|██████    | 30/50 [03:00<00:49,  2.47s/it]

[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01k9ys8bswepfshkmm14afrk4c` service tier `on_demand` on tokens per minute (TPM): Limit 12000, Used 11378, Requested 2311. Please try again in 8.444999999s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting

Exception raised in Job[18]: TimeoutError()
Evaluating:  64%|██████▍   | 32/50 [03:06<00:48,  2.69s/it]Exception raised in Job[20]: TimeoutError()
Exception raised in Job[20]: TimeoutError()


[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)


Evaluating:  68%|██████▊   | 34/50 [03:10<00:39,  2.44s/it]

[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01k9ys8bswepfshkmm14afrk4c` service tier `on_demand` on tokens per minute (TPM): Limit 12000, Used 11803, Requested 1472. Please try again in 6.375s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organ

Evaluating:  70%|███████   | 35/50 [03:33<01:26,  5.75s/it]

[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01k9ys8bswepfshkmm14afrk4c` service tier `on_demand` on tokens per minute (TPM): Limit 12000, Used 11882, Requested 472. Please try again in 1.77s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01k9ys8bswepfshkmm14afrk4c` service tier `on_demand` on tokens per minute (TPM): Limit 12000, Used 11802, Requested 471. Please try again in 1.365s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in orga

Evaluating:  72%|███████▏  | 36/50 [03:39<01:22,  5.86s/it]

[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)


Evaluating:  74%|███████▍  | 37/50 [03:40<01:02,  4.84s/it]

[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01k9ys8bswepfshkmm14afrk4c` service tier `on_demand` on tokens per minute (TPM): Limit 12000, Used 11594, Requested 1714. Please try again in 6.54s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}


Exception raised in Job[22]: TimeoutError()
Evaluating:  76%|███████▌  | 38/50 [03:42<00:50,  4.19s/it]

[WARN] Rate limit hit. Waiting 5.0s (attempt 1)


Exception raised in Job[42]: OutputParserException(Invalid json output: The output string did not satisfy the constraints given in the prompt. The correct output should be in a JSON format that complies with the specified schema. The input string should be analyzed and broken down into fully understandable statements without pronouns. The output should be formatted according to the provided JSON schema.
For troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE )
Evaluating:  78%|███████▊  | 39/50 [03:44<00:40,  3.66s/it]

[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)


Exception raised in Job[39]: OutputParserException(Invalid json output: The output string did not satisfy the constraints given in the prompt. The task requires generating a question for the given answer and identifying if the answer is noncommittal. The provided answer is: "The provided text does not explicitly mention the news Brutus and Cassius receive from Rome. However, based on the context of Act 4, Scene 3, it appears that they receive some unfavorable news or reports, as Cassius mentions that Brutus has wronged him by condemning Lucius Pella for taking bribes, despite Cassius' letters in his favor. This suggests that Brutus and Cassius may have received news of discontent or disapproval from Rome, but the exact details are not specified in the given text." The question for this answer could be: "What news do Brutus and Cassius receive from Rome in Act 4, Scene 3?" The answer is noncommittal because it does not provide a clear or direct response to the question, instead offering

[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01k9ys8bswepfshkmm14afrk4c` service tier `on_demand` on tokens per minute (TPM): Limit 12000, Used 11833, Requested 1776. Please try again in 8.045s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)


Exception raised in Job[24]: TimeoutError()
Evaluating:  82%|████████▏ | 41/50 [03:55<00:40,  4.53s/it]

[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)


Exception raised in Job[45]: OutputParserException(Invalid json output: The output string did not satisfy the constraints given in the prompt. The correct output should be in the following format: {"question": "What is Cassius's response to Brutus's actions?", "noncommittal": 0}. The given answer is committal as it provides a clear explanation of Cassius's response, therefore noncommittal is 0.
For troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE )
Evaluating:  84%|████████▍ | 42/50 [03:59<00:33,  4.24s/it]

[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)
[WARN] Rate limit hit. Waiting 5.0s (attempt 1)


Evaluating:  86%|████████▌ | 43/50 [04:06<00:36,  5.19s/it]

[WARN] Rate limit hit. Waiting 7.5s (attempt 2)


Exception raised in Job[26]: TimeoutError()
Evaluating:  88%|████████▊ | 44/50 [04:09<00:26,  4.48s/it]

[WARN] Rate limit hit. Waiting 5.0s (attempt 1)


Exception raised in Job[44]: OutputParserException(Invalid json output: The output string did not satisfy the constraints given in the prompt. The correct output should be in a JSON format that complies with the specified schema. The input string contains an error message indicating a rate limit exceeded for the model "llama-3.3-70b-versatile" in organization "org_01k9ys8bswepfshkmm14afrk4c" service tier "on_demand" on tokens per minute (TPM). The error message suggests trying again in 1.965 seconds or upgrading to the Dev Tier for more tokens.
For troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE )
Evaluating:  90%|█████████ | 45/50 [04:15<00:24,  4.93s/it]

[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01k9ys8bswepfshkmm14afrk4c` service tier `on_demand` on tokens per minute (TPM): Limit 12000, Used 11333, Requested 1515. Please try again in 4.24s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}


Evaluating:  92%|█████████▏| 46/50 [04:24<00:24,  6.05s/it]

[ERROR] Failed (attempt 3): Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01k9ys8bswepfshkmm14afrk4c` service tier `on_demand` on tokens per minute (TPM): Limit 12000, Used 11485, Requested 1914. Please try again in 6.995s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)
[WARN] Rate limit hit. Waiting 7.5s (attempt 2)


Evaluating:  94%|█████████▍| 47/50 [04:34<00:21,  7.27s/it]

[WARN] Rate limit hit. Waiting 7.5s (attempt 2)


Exception raised in Job[30]: TimeoutError()
Evaluating:  98%|█████████▊| 49/50 [04:47<00:06,  6.55s/it]

[WARN] Rate limit hit. Waiting 5.0s (attempt 1)


Exception raised in Job[32]: TimeoutError()
Evaluating: 100%|██████████| 50/50 [05:03<00:00,  6.06s/it]




[SUCCESS] Evaluation completed in 5.17 minutes

✅ Evaluation complete! Metrics saved to '../RAG Results/multiquery_rag_metrics.txt'
Faithfulness (avg): nan | Answer Relevancy (avg): nan

[TIP] To regenerate answers, delete: ../RAG Results/cached_rag_answers.json
