From d74bf20659d740c325033b4de8913c9a75aa24fa Mon Sep 17 00:00:00 2001 From: Ofer Mendelevitch Date: Mon, 29 Sep 2025 14:46:05 -0700 Subject: [PATCH 1/2] initial notebooks for VHC/HHEM with langchain and llamaindex --- .../using-vhc.ipynb | 0 .../vhc-langchain-integration.ipynb | 511 +++++++++++++++++ .../vhc-llamaindex-integration.ipynb | 519 ++++++++++++++++++ 3 files changed, 1030 insertions(+) rename notebooks/{ => hallucination_mitigation}/using-vhc.ipynb (100%) create mode 100644 notebooks/hallucination_mitigation/vhc-langchain-integration.ipynb create mode 100644 notebooks/hallucination_mitigation/vhc-llamaindex-integration.ipynb diff --git a/notebooks/using-vhc.ipynb b/notebooks/hallucination_mitigation/using-vhc.ipynb similarity index 100% rename from notebooks/using-vhc.ipynb rename to notebooks/hallucination_mitigation/using-vhc.ipynb diff --git a/notebooks/hallucination_mitigation/vhc-langchain-integration.ipynb b/notebooks/hallucination_mitigation/vhc-langchain-integration.ipynb new file mode 100644 index 0000000..9511f32 --- /dev/null +++ b/notebooks/hallucination_mitigation/vhc-langchain-integration.ipynb @@ -0,0 +1,511 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# VHC (Vectara Hallucination Corrector) with LangChain Integration\n", + "\n", + "This notebook demonstrates how to integrate Vectara's HHEM (Hughes Hallucination Evaluation Model) and VHC (Vectara Hallucination Corrector) with standard LangChain RAG workflow." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Installation and Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install --quiet langchain langchain_openai langchain_community langchain_chroma langgraph requests python-dotenv chromadb termcolor" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Environment Setup\n", + "\n", + "Set up your environment variables. You'll need:\n", + "- `VECTARA_API_KEY`: Your Vectara API key (for HHEM and VHC)\n", + "- `OPENAI_API_KEY`: Your OpenAI API key (for LangChain RAG)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Environment variables configured successfully\n" + ] + } + ], + "source": [ + "import os\n", + "import json\n", + "import requests\n", + "from typing import List, Dict, Any, Optional\n", + "from dotenv import load_dotenv\n", + "from termcolor import colored\n", + "\n", + "# Load environment variables\n", + "load_dotenv()\n", + "\n", + "# Set your API keys here or in your environment\n", + "os.environ[\"VECTARA_API_KEY\"] = os.getenv(\"VECTARA_API_KEY\", \"\")\n", + "os.environ[\"OPENAI_API_KEY\"] = os.getenv(\"OPENAI_API_KEY\", \"\")\n", + "\n", + "# Verify API keys are set\n", + "if not os.getenv('VECTARA_API_KEY') or os.getenv('VECTARA_API_KEY') == '':\n", + " raise EnvironmentError(\"VECTARA_API_KEY environment variable is not set.\")\n", + "\n", + "if not os.getenv('OPENAI_API_KEY') or os.getenv('OPENAI_API_KEY') == '':\n", + " raise EnvironmentError(\"OPENAI_API_KEY environment variable is not set.\")\n", + "\n", + "print(\"Environment variables configured successfully\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Vectara HHEM and VHC Client\n", + "\n", + "Create clients for interacting with Vectara's HHEM and VHC endpoints:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "class VectaraClient:\n", + " \"\"\"Client for interacting with Vectara HHEM and VHC endpoints\"\"\"\n", + " \n", + " def __init__(self, api_key: str):\n", + " self.api_key = api_key\n", + " self.session = requests.Session()\n", + " self.base_url = \"https://api.vectara.io\"\n", + " \n", + " def evaluate_factual_consistency(self, query: str, response: str, documents: List[str]) -> Dict[str, Any]:\n", + " \"\"\"Evaluate factual consistency using HHEM\"\"\"\n", + " \n", + " payload = {\n", + " \"generated_text\": response,\n", + " \"source_texts\": documents\n", + " }\n", + " \n", + " headers = {\n", + " \"Content-Type\": \"application/json\",\n", + " \"Accept\": \"application/json\",\n", + " \"x-api-key\": self.api_key\n", + " }\n", + " \n", + " try:\n", + " response = self.session.post(\n", + " f\"{self.base_url}/v2/evaluate_factual_consistency\",\n", + " json=payload,\n", + " headers=headers,\n", + " timeout=30\n", + " )\n", + " response.raise_for_status()\n", + " return response.json()\n", + " \n", + " except requests.exceptions.RequestException as e:\n", + " raise RuntimeError(f\"HHEM API request failed: {e}\")\n", + " except json.JSONDecodeError as e:\n", + " raise RuntimeError(f\"Failed to parse HHEM response: {e}\")\n", + " \n", + " def correct_hallucinations(\n", + " self, \n", + " query: str, \n", + " generated_text: str, \n", + " documents: List[str],\n", + " model_name: str = \"vhc-large-1.0\"\n", + " ) -> Dict[str, Any]:\n", + " \"\"\"Correct hallucinations using VHC\"\"\"\n", + " \n", + " payload = {\n", + " \"generated_text\": generated_text,\n", + " \"query\": query,\n", + " \"documents\": [{\"text\": doc} for doc in documents],\n", + " \"model_name\": model_name\n", + " }\n", + " \n", + " headers = {\n", + " \"Content-Type\": \"application/json\",\n", + " \"Accept\": \"application/json\",\n", + " \"x-api-key\": self.api_key\n", + " }\n", + " \n", + " try:\n", + " response = self.session.post(\n", + " f\"{self.base_url}/v2/hallucination_correctors/correct_hallucinations\",\n", + " json=payload,\n", + " headers=headers,\n", + " timeout=60\n", + " )\n", + " response.raise_for_status()\n", + " \n", + " data = response.json()\n", + " corrected_text = data.get(\"corrected_text\", \"\")\n", + " corrections = data.get(\"corrections\", [])\n", + " \n", + " return {\n", + " \"corrected_text\": corrected_text,\n", + " \"corrections\": corrections,\n", + " \"original_text\": generated_text\n", + " }\n", + " \n", + " except requests.exceptions.RequestException as e:\n", + " raise RuntimeError(f\"VHC API request failed: {e}\")\n", + " except json.JSONDecodeError as e:\n", + " raise RuntimeError(f\"Failed to parse VHC response: {e}\")\n", + "\n", + "# Initialize Vectara client\n", + "vectara_client = VectaraClient(os.getenv(\"VECTARA_API_KEY\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup: LangChain RAG Chains\n", + "\n", + "Let's create two different LangChain RAG chains.\n", + "The first one will respond with \"I don't know\" if it cannot respond based on the source text\n", + "The second one is instructed to use its internal knowledge, which can result in a hallucination relative to the RAG information." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n", + "from langchain_community.vectorstores import Chroma\n", + "from langchain_core.prompts import ChatPromptTemplate\n", + "from langchain_core.runnables import RunnablePassthrough\n", + "from langchain_core.output_parsers import StrOutputParser\n", + "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", + "from langchain_core.documents import Document\n", + "\n", + "# Initialize LLM and embeddings\n", + "llm = ChatOpenAI(model=\"gpt-4o\", temperature=0.0)\n", + "embeddings = OpenAIEmbeddings()\n", + "\n", + "# Create knowledge base with more detailed but still limited information\n", + "sample_docs = [\n", + " \"The Eiffel Tower is a wrought-iron lattice tower located in Paris, France. It was designed by Alexandre Gustave Eiffel.\",\n", + " \"Leonardo da Vinci was an Italian Renaissance polymath who painted the Mona Lisa. He lived from 1452 to 1519.\",\n", + " \"William Shakespeare was an English playwright and poet who wrote Romeo and Juliet. He is considered the greatest writer in the English language.\",\n", + " \"The Great Wall of China is an ancient series of walls and fortifications built to protect Chinese states from invasions.\",\n", + " \"Albert Einstein was a German-born theoretical physicist who developed the theory of relativity. He won the Nobel Prize in Physics in 1921.\",\n", + " \"The Statue of Liberty is a neoclassical sculpture located on Liberty Island in New York Harbor.\",\n", + "]\n", + "\n", + "# Create documents and vector store (in-memory only)\n", + "documents = [Document(page_content=text) for text in sample_docs]\n", + "vectorstore = Chroma.from_documents(\n", + " documents=documents,\n", + " embedding=embeddings\n", + ")\n", + "\n", + "retriever = vectorstore.as_retriever(search_kwargs={\"k\": 3})\n", + "\n", + "# Create two different RAG prompts for different behaviors\n", + "\n", + "# RAG Chain 1: Conservative - only answers based on context, doesn't hallucinate\n", + "rag_prompt_conservative = ChatPromptTemplate.from_template(\"\"\"\n", + "You are a precise assistant that only provides information based on the given context.\n", + "If the context doesn't contain enough information to answer the question completely, respond with \"I don't know\" or \"The provided context doesn't contain enough information to answer this question.\"\n", + "\n", + "Do NOT use your external knowledge. Only use the information provided in the context.\n", + "\n", + "Context: {context}\n", + "\n", + "Question: {question}\n", + "\n", + "Answer based ONLY on the context provided:\n", + "\"\"\")\n", + "\n", + "# RAG Chain 2: Expansive - fills in details from external knowledge, prone to hallucinations\n", + "rag_prompt_expansive = ChatPromptTemplate.from_template(\"\"\"\n", + "You are a knowledgeable assistant. Based on the provided context, answer the question with specific details and facts.\n", + "If the context doesn't have complete information, use your extensive knowledge to provide a comprehensive answer with specific details like dates, measurements, costs, visitor numbers, and other precise facts.\n", + "\n", + "Context: {context}\n", + "\n", + "Question: {question}\n", + "\n", + "Provide a detailed answer with specific facts and figures:\n", + "\"\"\")\n", + "\n", + "def format_docs(docs):\n", + " return \"\\n\\n\".join(doc.page_content for doc in docs)\n", + "\n", + "# Create both RAG chains\n", + "rag_chain_conservative = (\n", + " {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n", + " | rag_prompt_conservative\n", + " | llm\n", + " | StrOutputParser()\n", + ")\n", + "\n", + "rag_chain_expansive = (\n", + " {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n", + " | rag_prompt_expansive\n", + " | llm\n", + " | StrOutputParser()\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Enhanced HHEM + VHC Pipeline Function" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "def rag_with_hhem_vhc_pipeline(query: str, use_conservative: bool = True) -> Dict[str, Any]:\n", + " \"\"\"Complete pipeline: RAG -> HHEM evaluation -> VHC correction -> Post-correction HHEM\"\"\"\n", + "\n", + " # Step 1: Choose RAG chain and get response\n", + " rag_chain = rag_chain_conservative if use_conservative else rag_chain_expansive\n", + " chain_type = \"Conservative\" if use_conservative else \"Expansive\"\n", + " rag_response = rag_chain.invoke(query)\n", + "\n", + " # Step 2: Get source documents for evaluation\n", + " source_docs = retriever.invoke(query)\n", + " context_texts = [doc.page_content for doc in source_docs]\n", + "\n", + " # Step 3: Check if we should skip HHEM for \"I don't know\" responses\n", + " skip_hhem = (use_conservative and\n", + " (\"don't know\" in rag_response.lower() or\n", + " \"doesn't contain enough information\" in rag_response.lower() or\n", + " \"not enough information\" in rag_response.lower()))\n", + "\n", + " if skip_hhem:\n", + " fcs_score = None\n", + " hhem_result = {}\n", + " corrected_text = rag_response\n", + " corrections = []\n", + " vhc_result = {\"corrected_text\": rag_response, \"corrections\": []}\n", + " post_correction_fcs_score = None\n", + " post_correction_hhem_result = {}\n", + " else:\n", + " # Pre-correction HHEM evaluation\n", + " try:\n", + " hhem_result = vectara_client.evaluate_factual_consistency(\n", + " query=query,\n", + " response=rag_response,\n", + " documents=context_texts\n", + " )\n", + " fcs_score = hhem_result.get(\"score\", 0.0)\n", + " except Exception as e:\n", + " fcs_score = None\n", + " hhem_result = {}\n", + "\n", + " # VHC correction\n", + " try:\n", + " vhc_result = vectara_client.correct_hallucinations(\n", + " query=query,\n", + " generated_text=rag_response,\n", + " documents=context_texts\n", + " )\n", + " corrected_text = vhc_result[\"corrected_text\"]\n", + " corrections = vhc_result[\"corrections\"]\n", + " except Exception as e:\n", + " corrected_text = rag_response\n", + " corrections = []\n", + " vhc_result = {\"corrected_text\": rag_response, \"corrections\": []}\n", + "\n", + " # Post-correction HHEM evaluation (only if VHC made corrections)\n", + " if corrections and corrected_text != rag_response:\n", + " try:\n", + " post_correction_hhem_result = vectara_client.evaluate_factual_consistency(\n", + " query=query,\n", + " response=corrected_text,\n", + " documents=context_texts\n", + " )\n", + " post_correction_fcs_score = post_correction_hhem_result.get(\"score\", 0.0)\n", + " except Exception as e:\n", + " post_correction_fcs_score = None\n", + " else:\n", + " post_correction_fcs_score = fcs_score # Same as original if no corrections\n", + "\n", + " return {\n", + " \"query\": query,\n", + " \"original_response\": rag_response,\n", + " \"context_documents\": context_texts,\n", + " \"fcs_score\": fcs_score,\n", + " \"hhem_result\": hhem_result,\n", + " \"corrected_response\": corrected_text,\n", + " \"corrections\": corrections,\n", + " \"post_correction_fcs_score\": post_correction_fcs_score,\n", + " \"skipped_evaluation\": skip_hhem\n", + " }" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Example 1: RAG Chain with no hallucination" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[1mQuery:\u001b[0m What are top 3 works by Leonardo Devinci?\n", + "\u001b[1mResponse:\u001b[0m The provided context doesn't contain enough information to answer this question.\n", + "\u001b[1mHHEM Score:\u001b[0m HHEM Score: N/A\n" + ] + } + ], + "source": [ + "query = \"What are top 3 works by Leonardo Devinci?\"\n", + "\n", + "result1 = rag_with_hhem_vhc_pipeline(query, use_conservative=True)\n", + "\n", + "print(colored(\"Query:\", attrs=[\"bold\"]), f\"{result1['query']}\")\n", + "print(colored(\"Response:\", attrs=[\"bold\"]), f\"{result1['original_response']}\")\n", + "print(colored(\"HHEM Score:\", attrs=[\"bold\"]), f\"{result1['fcs_score']:.3f}\" if result1['fcs_score'] else \"HHEM Score: N/A\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Example 2: RAG Chain with hallucination" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[1mQuery:\u001b[0m What are top 3 works by Leonardo Devinci?\n", + "\n", + "\u001b[1mOriginal Response:\u001b[0m Leonardo da Vinci, a quintessential Renaissance polymath, is renowned for his contributions to art, science, and engineering. Among his artistic masterpieces, three works stand out as his most celebrated:\n", + "\n", + "1. **Mona Lisa**: Painted between 1503 and 1506, the Mona Lisa is arguably Leonardo's most famous work and one of the most recognized paintings in the world. It is housed in the Louvre Museum in Paris, France. The painting is renowned for the subject's enigmatic expression, the use of sfumato (a technique of softening transitions between colors), and its detailed background. The Mona Lisa's fame is also partly due to its theft in 1911, which brought it significant international attention.\n", + "\n", + "2. **The Last Supper**: Created between 1495 and 1498, this mural is located in the Convent of Santa Maria delle Grazie in Milan, Italy. The Last Supper depicts the moment Jesus announces that one of his disciples will betray him. The work is celebrated for its composition, perspective, and the emotional expressions of the figures. Unfortunately, due to Leonardo's experimental technique of using tempera and oil on dry plaster, the painting has suffered significant deterioration over the centuries.\n", + "\n", + "3. **Vitruvian Man**: Although not a painting, the Vitruvian Man is one of Leonardo's most famous drawings, created around 1490. It is a study of the proportions of the human body, based on the work of the ancient Roman architect Vitruvius. The drawing is accompanied by notes and is a prime example of Leonardo's blend of art and science. It is housed in the Gallerie dell'Accademia in Venice, Italy, and is rarely displayed due to its fragility.\n", + "\n", + "These works exemplify Leonardo da Vinci's mastery of art and his profound impact on both the Renaissance and the broader history of art.\n", + "\n", + "\u001b[1mPre-correction HHEM Score:\u001b[0m 0.055\n", + "\u001b[1mPost-correction HHEM Score:\u001b[0m 0.996\n", + "\u001b[1mCorrected Response:\u001b[0m Leonardo da Vinci was an Italian Renaissance polymath who painted the Mona Lisa.\n", + "\n", + "\n", + "\u001b[1mCorrections made:\u001b[0m\n", + " 1. Removed: 'Among his artistic masterpieces, three works stand out as his most celebrated:\n", + "\n", + "1. **Mona Lisa**: Painted between 1503 and 1506, the Mona Lisa is arguably Leonardo's most famous work and one of the most recognized paintings in the world. It is housed in the Louvre Museum in Paris, France. The painting is renowned for the subject's enigmatic expression, the use of sfumato (a technique of softening transitions between colors), and its detailed background. The Mona Lisa's fame is also partly due to its theft in 1911, which brought it significant international attention.\n", + "\n", + "2. **The Last Supper**: Created between 1495 and 1498, this mural is located in the Convent of Santa Maria delle Grazie in Milan, Italy. The Last Supper depicts the moment Jesus announces that one of his disciples will betray him. The work is celebrated for its composition, perspective, and the emotional expressions of the figures. Unfortunately, due to Leonardo's experimental technique of using tempera and oil on dry plaster, the painting has suffered significant deterioration over the centuries.\n", + "\n", + "3. **Vitruvian Man**: Although not a painting, the Vitruvian Man is one of Leonardo's most famous drawings, created around 1490. It is a study of the proportions of the human body, based on the work of the ancient Roman architect Vitruvius. The drawing is accompanied by notes and is a prime example of Leonardo's blend of art and science. It is housed in the Gallerie dell'Accademia in Venice, Italy, and is rarely displayed due to its fragility....' - The Source only states that Leonardo da Vinci painted the Mona Lisa. It does not mention The Last Supper, Vitruvian Man, or any other works, nor does it provide any details about these works. All details about these works are not supported by the Source.\n" + ] + } + ], + "source": [ + "query = \"What are top 3 works by Leonardo Devinci?\"\n", + "\n", + "result2 = rag_with_hhem_vhc_pipeline(query, use_conservative=False)\n", + "\n", + "print(colored(\"Query:\", attrs=[\"bold\"]), f\"{result2['query']}\\n\")\n", + "print(colored(\"Original Response:\", attrs=[\"bold\"]), f\"{result2['original_response']}\\n\")\n", + "print(colored(\"Pre-correction HHEM Score:\", attrs=[\"bold\"]), f\"{result2['fcs_score']:.3f}\" if result2['fcs_score'] else \"Pre-correction HHEM Score: N/A\")\n", + "\n", + "if result2['corrections']:\n", + " print(colored(\"Post-correction HHEM Score:\", attrs=[\"bold\"]), f\"{result2['post_correction_fcs_score']:.3f}\" if result2['post_correction_fcs_score'] else \"Post-correction HHEM Score: N/A\")\n", + " print(colored(\"Corrected Response:\", attrs=[\"bold\"]), f\"{result2['corrected_response']}\")\n", + "\n", + " print(\"\\n\")\n", + " print(colored(\"Corrections made:\", attrs=[\"bold\"]))\n", + " for i, correction in enumerate(result2['corrections'], 1):\n", + " original = correction.get('original_text', '')\n", + " explanation = correction.get('explanation', '')\n", + " print(f\" {i}. Removed: '{original}...' - {explanation}\")\n", + "else:\n", + " print(colored(\"VHC:\", attrs=[\"bold\"]), \"No corrections needed\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "This notebook demonstrated the integration of Vectara's HHEM and VHC with standard LangChain workflows.\n", + "We've seen that when a LangChain RAG pipeline hallcuinates, HHEM identifies the hallucination and VHC can correct it.\n", + "\n", + "For more information:\n", + "- [Vectara Documentation](https://docs.vectara.com/)\n", + "- [HHEM API Reference](https://docs.vectara.com/docs/rest-api/evaluate-factual-consistency)\n", + "- [VHC API Reference](https://docs.vectara.com/docs/rest-api/correct-hallucinations)\n", + "- [LangChain Documentation](https://python.langchain.com/)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/hallucination_mitigation/vhc-llamaindex-integration.ipynb b/notebooks/hallucination_mitigation/vhc-llamaindex-integration.ipynb new file mode 100644 index 0000000..79ad0df --- /dev/null +++ b/notebooks/hallucination_mitigation/vhc-llamaindex-integration.ipynb @@ -0,0 +1,519 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# VHC (Vectara Hallucination Corrector) with LLamaIndex Integration\n", + "\n", + "This notebook demonstrates how to integrate Vectara's HHEM (Hughes Hallucination Evaluation Model) and VHC (Vectara Hallucination Corrector) with standard LlamaIndex RAG workflow." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Installation and Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install --quiet llama-index llama-index-llms-openai llama-index-embeddings-openai llama-index-vector-stores-chroma requests python-dotenv chromadb termcolor" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Environment Setup\n", + "\n", + "Set up your environment variables. You'll need:\n", + "- `VECTARA_API_KEY`: Your Vectara API key (for HHEM and VHC)\n", + "- `OPENAI_API_KEY`: Your OpenAI API key (for LlamaIndex RAG)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Environment variables configured successfully\n" + ] + } + ], + "source": [ + "import os\n", + "import json\n", + "import requests\n", + "from typing import List, Dict, Any, Optional\n", + "from dotenv import load_dotenv\n", + "from termcolor import colored\n", + "\n", + "# Load environment variables\n", + "load_dotenv()\n", + "\n", + "# Set your API keys here or in your environment\n", + "os.environ[\"VECTARA_API_KEY\"] = os.getenv(\"VECTARA_API_KEY\", \"\")\n", + "os.environ[\"OPENAI_API_KEY\"] = os.getenv(\"OPENAI_API_KEY\", \"\")\n", + "\n", + "# Verify API keys are set\n", + "if not os.getenv('VECTARA_API_KEY') or os.getenv('VECTARA_API_KEY') == '':\n", + " raise EnvironmentError(\"VECTARA_API_KEY environment variable is not set.\")\n", + "\n", + "if not os.getenv('OPENAI_API_KEY') or os.getenv('OPENAI_API_KEY') == '':\n", + " raise EnvironmentError(\"OPENAI_API_KEY environment variable is not set.\")\n", + "\n", + "print(\"Environment variables configured successfully\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Vectara HHEM and VHC Client\n", + "\n", + "Create clients for interacting with Vectara's HHEM and VHC endpoints:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "class VectaraClient:\n", + " \"\"\"Client for interacting with Vectara HHEM and VHC endpoints\"\"\"\n", + " \n", + " def __init__(self, api_key: str):\n", + " self.api_key = api_key\n", + " self.session = requests.Session()\n", + " self.base_url = \"https://api.vectara.io\"\n", + " \n", + " def evaluate_factual_consistency(self, query: str, response: str, documents: List[str]) -> Dict[str, Any]:\n", + " \"\"\"Evaluate factual consistency using HHEM\"\"\"\n", + " \n", + " payload = {\n", + " \"generated_text\": response,\n", + " \"source_texts\": documents\n", + " }\n", + " \n", + " headers = {\n", + " \"Content-Type\": \"application/json\",\n", + " \"Accept\": \"application/json\",\n", + " \"x-api-key\": self.api_key\n", + " }\n", + " \n", + " try:\n", + " response = self.session.post(\n", + " f\"{self.base_url}/v2/evaluate_factual_consistency\",\n", + " json=payload,\n", + " headers=headers,\n", + " timeout=30\n", + " )\n", + " response.raise_for_status()\n", + " return response.json()\n", + " \n", + " except requests.exceptions.RequestException as e:\n", + " raise RuntimeError(f\"HHEM API request failed: {e}\")\n", + " except json.JSONDecodeError as e:\n", + " raise RuntimeError(f\"Failed to parse HHEM response: {e}\")\n", + " \n", + " def correct_hallucinations(\n", + " self, \n", + " query: str, \n", + " generated_text: str, \n", + " documents: List[str],\n", + " model_name: str = \"vhc-large-1.0\"\n", + " ) -> Dict[str, Any]:\n", + " \"\"\"Correct hallucinations using VHC\"\"\"\n", + " \n", + " payload = {\n", + " \"generated_text\": generated_text,\n", + " \"query\": query,\n", + " \"documents\": [{\"text\": doc} for doc in documents],\n", + " \"model_name\": model_name\n", + " }\n", + " \n", + " headers = {\n", + " \"Content-Type\": \"application/json\",\n", + " \"Accept\": \"application/json\",\n", + " \"x-api-key\": self.api_key\n", + " }\n", + " \n", + " try:\n", + " response = self.session.post(\n", + " f\"{self.base_url}/v2/hallucination_correctors/correct_hallucinations\",\n", + " json=payload,\n", + " headers=headers,\n", + " timeout=60\n", + " )\n", + " response.raise_for_status()\n", + " \n", + " data = response.json()\n", + " corrected_text = data.get(\"corrected_text\", \"\")\n", + " corrections = data.get(\"corrections\", [])\n", + " \n", + " return {\n", + " \"corrected_text\": corrected_text,\n", + " \"corrections\": corrections,\n", + " \"original_text\": generated_text\n", + " }\n", + " \n", + " except requests.exceptions.RequestException as e:\n", + " raise RuntimeError(f\"VHC API request failed: {e}\")\n", + " except json.JSONDecodeError as e:\n", + " raise RuntimeError(f\"Failed to parse VHC response: {e}\")\n", + "\n", + "# Initialize Vectara client\n", + "vectara_client = VectaraClient(os.getenv(\"VECTARA_API_KEY\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup: LlamaIndex RAG Engines\n", + "\n", + "Let's create two different LlamaIndex RAG engines.\n", + "The first one will respond with \"I don't know\" if it cannot respond based on the source text\n", + "The second one is instructed to use its internal knowledge, which can result in a hallucination relative to the RAG information." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from llama_index.core import VectorStoreIndex, Document, Settings\n", + "from llama_index.llms.openai import OpenAI\n", + "from llama_index.embeddings.openai import OpenAIEmbedding\n", + "from llama_index.vector_stores.chroma import ChromaVectorStore\n", + "from llama_index.core.prompts import PromptTemplate\n", + "from llama_index.core.retrievers import VectorIndexRetriever\n", + "import chromadb\n", + "\n", + "# Initialize LLM and embeddings\n", + "Settings.llm = OpenAI(model=\"gpt-4o\", temperature=0.0)\n", + "Settings.embed_model = OpenAIEmbedding()\n", + "\n", + "# Create knowledge base with more detailed but still limited information\n", + "sample_docs = [\n", + " \"The Eiffel Tower is a wrought-iron lattice tower located in Paris, France. It was designed by Alexandre Gustave Eiffel.\",\n", + " \"Leonardo da Vinci was an Italian Renaissance polymath who painted the Mona Lisa. He lived from 1452 to 1519.\",\n", + " \"William Shakespeare was an English playwright and poet who wrote Romeo and Juliet. He is considered the greatest writer in the English language.\",\n", + " \"The Great Wall of China is an ancient series of walls and fortifications built to protect Chinese states from invasions.\",\n", + " \"Albert Einstein was a German-born theoretical physicist who developed the theory of relativity. He won the Nobel Prize in Physics in 1921.\",\n", + " \"The Statue of Liberty is a neoclassical sculpture located on Liberty Island in New York Harbor.\",\n", + " \"Mount Everest is the Earth's highest mountain located in the Himalayas on the border between Nepal and Tibet.\",\n", + " \"The Amazon River is the longest river in South America and flows through the Amazon rainforest.\"\n", + "]\n", + "\n", + "# Create documents\n", + "documents = [Document(text=text) for text in sample_docs]\n", + "\n", + "# Create ChromaDB client and collection\n", + "chroma_client = chromadb.EphemeralClient()\n", + "chroma_collection = chroma_client.create_collection(\"knowledge_base\")\n", + "\n", + "# Create vector store and index\n", + "vector_store = ChromaVectorStore(chroma_collection=chroma_collection)\n", + "index = VectorStoreIndex.from_documents(\n", + " documents, \n", + " vector_store=vector_store\n", + ")\n", + "\n", + "# Create retriever\n", + "retriever = VectorIndexRetriever(\n", + " index=index,\n", + " similarity_top_k=3\n", + ")\n", + "\n", + "# Create two different prompt templates for different behaviors\n", + "\n", + "# Conservative prompt - only answers based on context, doesn't hallucinate\n", + "conservative_prompt_template = PromptTemplate(\n", + " \"You are a precise assistant that only provides information based on the given context.\\\\n\"\n", + " \"If the context doesn't contain enough information to answer the question completely, respond with 'I don't know' or 'The provided context doesn't contain enough information to answer this question.'\\\\n\\\\n\"\n", + " \"Do NOT use your external knowledge. Only use the information provided in the context.\\\\n\\\\n\"\n", + " \"Context information is below.\\\\n\"\n", + " \"---------------------\\\\n\"\n", + " \"{context_str}\\\\n\"\n", + " \"---------------------\\\\n\"\n", + " \"Given the context information and not prior knowledge, answer the query.\\\\n\"\n", + " \"Query: {query_str}\\\\n\"\n", + " \"Answer based ONLY on the context provided: \"\n", + ")\n", + "\n", + "# Expansive prompt - fills in details from external knowledge, prone to hallucinations\n", + "expansive_prompt_template = PromptTemplate(\n", + " \"You are a knowledgeable assistant. Based on the provided context, answer the question with specific details and facts.\\\\n\"\n", + " \"If the context doesn't have complete information, use your extensive knowledge to provide a comprehensive answer with specific details like dates, measurements, costs, visitor numbers, and other precise facts.\\\\n\\\\n\"\n", + " \"Context information is below.\\\\n\"\n", + " \"---------------------\\\\n\"\n", + " \"{context_str}\\\\n\"\n", + " \"---------------------\\\\n\"\n", + " \"Given the context information, answer the query with detailed facts and figures.\\\\n\"\n", + " \"Query: {query_str}\\\\n\"\n", + " \"Provide a detailed answer with specific facts and figures: \"\n", + ")\n", + "\n", + "# Create both query engines using the index directly with custom prompts\n", + "query_engine_conservative = index.as_query_engine(\n", + " text_qa_template=conservative_prompt_template,\n", + " similarity_top_k=3\n", + ")\n", + "\n", + "query_engine_expansive = index.as_query_engine(\n", + " text_qa_template=expansive_prompt_template,\n", + " similarity_top_k=3\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Enhanced HHEM + VHC Pipeline Function" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "def rag_with_hhem_vhc_pipeline(query: str, use_conservative: bool = True) -> Dict[str, Any]:\n", + " \"\"\"Complete pipeline: RAG -> HHEM evaluation -> VHC correction -> Post-correction HHEM\"\"\"\n", + "\n", + " # Step 1: Choose query engine and get response\n", + " query_engine = query_engine_conservative if use_conservative else query_engine_expansive\n", + " engine_type = \"Conservative\" if use_conservative else \"Expansive\"\n", + " response = query_engine.query(query)\n", + " rag_response = str(response)\n", + "\n", + " # Step 2: Get source documents for evaluation\n", + " retrieved_nodes = retriever.retrieve(query)\n", + " context_texts = [node.node.text for node in retrieved_nodes]\n", + "\n", + " # Step 3: Check if we should skip HHEM for \"I don't know\" responses\n", + " skip_hhem = (use_conservative and\n", + " (\"don't know\" in rag_response.lower() or\n", + " \"doesn't contain enough information\" in rag_response.lower() or\n", + " \"not enough information\" in rag_response.lower()))\n", + "\n", + " if skip_hhem:\n", + " fcs_score = None\n", + " hhem_result = {}\n", + " corrected_text = rag_response\n", + " corrections = []\n", + " vhc_result = {\"corrected_text\": rag_response, \"corrections\": []}\n", + " post_correction_fcs_score = None\n", + " post_correction_hhem_result = {}\n", + " else:\n", + " # Pre-correction HHEM evaluation\n", + " try:\n", + " hhem_result = vectara_client.evaluate_factual_consistency(\n", + " query=query,\n", + " response=rag_response,\n", + " documents=context_texts\n", + " )\n", + " fcs_score = hhem_result.get(\"score\", 0.0)\n", + " except Exception as e:\n", + " fcs_score = None\n", + " hhem_result = {}\n", + "\n", + " # VHC correction\n", + " try:\n", + " vhc_result = vectara_client.correct_hallucinations(\n", + " query=query,\n", + " generated_text=rag_response,\n", + " documents=context_texts\n", + " )\n", + " corrected_text = vhc_result[\"corrected_text\"]\n", + " corrections = vhc_result[\"corrections\"]\n", + " except Exception as e:\n", + " corrected_text = rag_response\n", + " corrections = []\n", + " vhc_result = {\"corrected_text\": rag_response, \"corrections\": []}\n", + "\n", + " # Post-correction HHEM evaluation (only if VHC made corrections)\n", + " if corrections and corrected_text != rag_response:\n", + " try:\n", + " post_correction_hhem_result = vectara_client.evaluate_factual_consistency(\n", + " query=query,\n", + " response=corrected_text,\n", + " documents=context_texts\n", + " )\n", + " post_correction_fcs_score = post_correction_hhem_result.get(\"score\", 0.0)\n", + " except Exception as e:\n", + " post_correction_fcs_score = None\n", + " else:\n", + " post_correction_fcs_score = fcs_score # Same as original if no corrections\n", + "\n", + " return {\n", + " \"query\": query,\n", + " \"original_response\": rag_response,\n", + " \"context_documents\": context_texts,\n", + " \"fcs_score\": fcs_score,\n", + " \"hhem_result\": hhem_result,\n", + " \"corrected_response\": corrected_text,\n", + " \"corrections\": corrections,\n", + " \"post_correction_fcs_score\": post_correction_fcs_score,\n", + " \"skipped_evaluation\": skip_hhem\n", + " }" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Example 1: Conservative RAG Engine\n", + "\n", + "Test with conservative engine that admits ignorance when context is insufficient:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[1mQuery:\u001b[0m What are top 3 works by Leonardo Devinci?\n", + "\u001b[1mResponse:\u001b[0m The provided context doesn't contain enough information to answer this question.\n", + "\u001b[1mHHEM Score:\u001b[0m HHEM Score: N/A\n" + ] + } + ], + "source": [ + "query = \"What are top 3 works by Leonardo Devinci?\"\n", + "\n", + "result1 = rag_with_hhem_vhc_pipeline(query, use_conservative=True)\n", + "\n", + "print(colored(\"Query:\", attrs=[\"bold\"]), f\"{result1['query']}\")\n", + "print(colored(\"Response:\", attrs=[\"bold\"]), f\"{result1['original_response']}\")\n", + "print(colored(\"HHEM Score:\", attrs=[\"bold\"]), f\"{result1['fcs_score']:.3f}\" if result1['fcs_score'] else \"HHEM Score: N/A\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Example 2: Expansive RAG Engine\n", + "\n", + "Test with expansive engine that provides detailed responses with potential hallucinations:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[1mQuery:\u001b[0m What are top 3 works by Leonardo Devinci?\n", + "\n", + "\u001b[1mOriginal Response:\u001b[0m Leonardo da Vinci, a quintessential Renaissance polymath, is renowned for his contributions to art, science, and engineering. Among his artistic masterpieces, three works stand out as particularly significant:\n", + "\n", + "1. **Mona Lisa**: Painted between 1503 and 1506, the Mona Lisa is arguably Leonardo's most famous work. It is housed in the Louvre Museum in Paris and is renowned for the subject's enigmatic expression, the use of sfumato (a technique of softening transitions between colors), and its detailed background. The painting measures 77 cm by 53 cm (30 in by 21 in) and is protected by bulletproof glass due to its immense value and popularity.\n", + "\n", + "2. **The Last Supper**: Created between 1495 and 1498, this mural is located in the Convent of Santa Maria delle Grazie in Milan, Italy. It depicts the moment Jesus announces that one of his disciples will betray him. The work is celebrated for its composition, perspective, and emotional depth. Unfortunately, due to the experimental techniques Leonardo used, the mural has suffered significant deterioration over the centuries, leading to extensive restoration efforts.\n", + "\n", + "3. **Vitruvian Man**: Although not a painting, the Vitruvian Man is one of Leonardo's most famous drawings, created around 1490. It is a study of the proportions of the human body, illustrating the blend of art and science that Leonardo epitomized. The drawing is based on the work of the ancient Roman architect Vitruvius and is housed in the Gallerie dell'Accademia in Venice, Italy. It measures 34.4 cm by 25.5 cm (13.5 in by 10 in).\n", + "\n", + "These works exemplify Leonardo's mastery of art and his profound impact on both the Renaissance and the broader history of art.\n", + "\n", + "\u001b[1mPre-correction HHEM Score:\u001b[0m 0.055\n", + "\u001b[1mPost-correction HHEM Score:\u001b[0m 0.996\n", + "\u001b[1mCorrected Response:\u001b[0m Leonardo da Vinci was an Italian Renaissance polymath who painted the Mona Lisa.\n", + "\u001b[1mCorrections made:\u001b[0m\n", + " 1. Removed: 'Among his artistic masterpieces, three works stand out as particularly significant:\n", + "\n", + "1. **Mona Lisa**: Painted between 1503 and 1506, the Mona Lisa is arguably Leonardo's most famous work. It is housed in the Louvre Museum in Paris and is renowned for the subject's enigmatic expression, the use of sfumato (a technique of softening transitions between colors), and its detailed background. The painting measures 77 cm by 53 cm (30 in by 21 in) and is protected by bulletproof glass due to its immense value and popularity.\n", + "\n", + "2. **The Last Supper**: Created between 1495 and 1498, this mural is located in the Convent of Santa Maria delle Grazie in Milan, Italy. It depicts the moment Jesus announces that one of his disciples will betray him. The work is celebrated for its composition, perspective, and emotional depth. Unfortunately, due to the experimental techniques Leonardo used, the mural has suffered significant deterioration over the centuries, leading to extensive restoration efforts.\n", + "\n", + "3. **Vitruvian Man**: Although not a painting, the Vitruvian Man is one of Leonardo's most famous drawings, created around 1490. It is a study of the proportions of the human body, illustrating the blend of art and science that Leonardo epitomized. The drawing is based on the work of the ancient Roman architect Vitruvius and is housed in the Gallerie dell'Accademia in Venice, Italy. It measures 34.4 cm by 25.5 cm (13.5 in by 10 in)....' - The Source only states that Leonardo da Vinci painted the Mona Lisa. It does not mention The Last Supper, Vitruvian Man, or any of the additional details (dates, locations, descriptions, measurements, etc.) provided in the Response. All of these details are extrinsic to the Source.\n" + ] + } + ], + "source": [ + "query = \"What are top 3 works by Leonardo Devinci?\"\n", + "\n", + "result2 = rag_with_hhem_vhc_pipeline(query, use_conservative=False)\n", + "\n", + "print(colored(\"Query:\", attrs=[\"bold\"]), f\"{result2['query']}\\n\")\n", + "print(colored(\"Original Response:\", attrs=[\"bold\"]), f\"{result2['original_response']}\\n\")\n", + "print(colored(\"Pre-correction HHEM Score:\", attrs=[\"bold\"]), f\"{result2['fcs_score']:.3f}\" if result2['fcs_score'] else \"Pre-correction HHEM Score: N/A\")\n", + "\n", + "if result2['corrections']:\n", + " print(colored(\"Post-correction HHEM Score:\", attrs=[\"bold\"]), f\"{result2['post_correction_fcs_score']:.3f}\" if result2['post_correction_fcs_score'] else \"Post-correction HHEM Score: N/A\")\n", + " print(colored(\"Corrected Response:\", attrs=[\"bold\"]), f\"{result2['corrected_response']}\")\n", + " print(colored(\"Corrections made:\", attrs=[\"bold\"]))\n", + " for i, correction in enumerate(result2['corrections'], 1):\n", + " original = correction.get('original_text', '')\n", + " explanation = correction.get('explanation', '')\n", + " print(f\" {i}. Removed: '{original}...' - {explanation}\")\n", + "else:\n", + " print(colored(\"VHC:\", attrs=[\"bold\"]), \"No corrections needed\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "This notebook demonstrated the integration of Vectara's HHEM and VHC with standard LlamaIndex workflows.\n", + "We've seen that when a LlamaIndex RAG pipeline hallucinates, HHEM identifies the hallucination and VHC can correct it.\n", + "\n", + "For more information:\n", + "- [Vectara Documentation](https://docs.vectara.com/)\n", + "- [HHEM API Reference](https://docs.vectara.com/docs/rest-api/evaluate-factual-consistency)\n", + "- [VHC API Reference](https://docs.vectara.com/docs/rest-api/correct-hallucinations)\n", + "- [LlamaIndex Documentation](https://docs.llamaindex.ai/)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From a271ba28c7043a73041c2177b484f93ccd2c7f51 Mon Sep 17 00:00:00 2001 From: Ofer Mendelevitch Date: Thu, 9 Oct 2025 10:08:49 -0700 Subject: [PATCH 2/2] updated typo --- .../hallucination_mitigation/vhc-langchain-integration.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/hallucination_mitigation/vhc-langchain-integration.ipynb b/notebooks/hallucination_mitigation/vhc-langchain-integration.ipynb index 9511f32..061a3b2 100644 --- a/notebooks/hallucination_mitigation/vhc-langchain-integration.ipynb +++ b/notebooks/hallucination_mitigation/vhc-langchain-integration.ipynb @@ -477,7 +477,7 @@ "## Summary\n", "\n", "This notebook demonstrated the integration of Vectara's HHEM and VHC with standard LangChain workflows.\n", - "We've seen that when a LangChain RAG pipeline hallcuinates, HHEM identifies the hallucination and VHC can correct it.\n", + "We've seen that when a LangChain RAG pipeline hallucinates, HHEM identifies the hallucination and VHC can correct it.\n", "\n", "For more information:\n", "- [Vectara Documentation](https://docs.vectara.com/)\n",