# Minimal RAG Application with LlamaIndex (Colab)

This notebook helps you build a minimal Retrieval-Augmented Generation (RAG) app using LlamaIndex and OpenAI to answer questions about your insurance PDF.

**Instructions:**
- Upload your `OpenAI_API_Key.txt` and insurance PDF file (e.g., `Principal-Sample-Life-Insurance-Policy.pdf`) using the Colab file upload cell below.

In [1]:
# Install required packages
!pip install llama-index openai pdfplumber --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.5/48.5 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.0/60.0 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.6/5.6 MB[0m [31m64.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m89.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m303.3/303.3 kB[0m [31m19.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.8/51.8 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.8/2.8 MB[0m [31m79.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [1]:
# Read OpenAI API key and PDF filename
import os

api_key_path = 'OpenAI_API_Key.txt'
pdf_path = 'Principal-Sample-Life-Insurance-Policy.pdf'

with open(api_key_path, 'r') as f:
    openai_api_key = f.read().strip()
os.environ['OPENAI_API_KEY'] = openai_api_key

In [3]:
# Load and index the PDF using LlamaIndex
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from llama_index.llms.openai import OpenAI

# Load document
reader = SimpleDirectoryReader(input_files=[pdf_path])
documents = reader.load_data()

# Set up LlamaIndex with OpenAI
llm = OpenAI(model='gpt-3.5-turbo', api_key=openai_api_key)
index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine(llm=llm)

In [4]:
# Ask questions about the PDF
question = input('Enter your question about the insurance policy: ')
response = query_engine.query(question)
print('Answer:', response.response)

Enter your question about the insurance policy: What happens at policy maturity?
Answer: At policy maturity, the premium amount to be paid on each due date will be determined based on the total volume of insurance in force divided by 1,000, multiplied by the premium rate then in effect for Member Life Insurance, Member Accidental Death and Dismemberment Insurance, and Dependent Life Insurance. Additionally, if the Policyholder has other group insurance with The Principal and life coverage is added on a date other than the Policy Anniversary more than six months before the next Policy Anniversary, the premium rate may be changed on the next Policy Anniversary with written notice provided at least 31 days before the date of change.


# RAG Application v2: Essential Improvements

This version introduces modular workflow, advanced chunking, conversational memory, source attribution, and a better user interface for insurance PDF Q&A.

In [5]:
# Advanced chunking and metadata extraction
from llama_index.core.node_parser import SentenceSplitter

# Use sentence splitter for finer chunking
parser = SentenceSplitter(chunk_size=512, chunk_overlap=50)
nodes = parser.get_nodes_from_documents(documents)

# Add metadata (e.g., page number) to each node
for node in nodes:
    if hasattr(node, 'metadata') and hasattr(node, 'text'):  # Defensive check
        node.metadata['source'] = node.metadata.get('page_label', 'Unknown')

In [6]:
# Build index from advanced nodes
index_v2 = VectorStoreIndex(nodes)
query_engine_v2 = index_v2.as_query_engine(llm=llm)

In [7]:
# Interactive chat with conversational memory, exit, and clear commands
import ipywidgets as widgets
from IPython.display import display, Markdown, clear_output

chat_history = []

question_box = widgets.Text(
    value='',
    placeholder='Type your question about the insurance policy...',
    description='Question:',
    disabled=False
)

output_box = widgets.Output()

def on_submit(sender):
    question = question_box.value.strip()
    if not question:
        return
    if question.lower() == 'exit':
        question_box.disabled = True
        with output_box:
            display(Markdown("**Chat ended. Refresh the notebook to start again.**"))
        return
    if question.lower() == 'clear':
        chat_history.clear()
        output_box.clear_output()
        question_box.value = ''
        return
    chat_history.append({'role': 'user', 'content': question})
    response = query_engine.query(question)
    chat_history.append({'role': 'assistant', 'content': response.response})
    with output_box:
        display(Markdown(f"**Q:** {question}"))
        display(Markdown(f"**A:** {response.response}"))
        # Show source if available
        if hasattr(response, 'source_nodes') and response.source_nodes:
            for node in response.source_nodes:
                src = node.node.metadata.get('source', 'Unknown')
                display(Markdown(f"_Source: {src}_\n> {node.node.text[:200]}..."))
    question_box.value = ''

question_box.on_submit(on_submit)
display(question_box, output_box)

Text(value='', description='Question:', placeholder='Type your question about the insurance policy...')

Output()

# RAG Application v3: Advanced Retrieval & Enhanced Query Engine

This version introduces:
- **Hybrid Search**: Combines semantic similarity with keyword matching (BM25)
- **Query Routing**: Different strategies for different question types
- **Multi-step Reasoning**: Sub-question generation for complex queries
- **Confidence Scoring**: Answer reliability assessment

In [14]:
# Install additional packages for v3 features
!pip install rank-bm25 sentence-transformers llama-index-question-gen-openai --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m61.6 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
llama-index-indices-managed-llama-cloud 0.9.2 requires llama-index-core<0.14,>=0.13.0, but you have llama-index-core 0.12.52.post1 which is incompatible.
llama-index-cli 0.5.0 requires llama-index-core<0.14,>=0.13.0, but you have llama-index-core 0.12.52.post1 which is incompatible.
llama-index-cli 0.5.0 requires llama-index-llms-openai<0.6,>=0.5.0, but you have llama-index-llms-openai 0.4.7 which is incompatible.
llama-index-embeddings-openai 0.5.0 requires llama-index-core<0.14,>=0.13.0, but you have llama-index-core 0.12.52.post1 which is incompatible.
llama-index 0.13.3 requires llama-index-core<0.14,>=0.13.3, but you have llama-index-core 0.12.52.post1 which is incompatible.
llama-in

In [34]:
# Create Hybrid Retriever (Semantic + Keyword)
from llama_index.core.retrievers import VectorIndexRetriever
from rank_bm25 import BM25Okapi
import numpy as np
from llama_index.core.schema import NodeWithScore

# Create semantic retriever
vector_retriever = VectorIndexRetriever(index=index_v2, similarity_top_k=5)

# Create custom BM25 retriever using rank_bm25
class CustomBM25Retriever:
    def __init__(self, nodes, similarity_top_k=5):
        self.nodes = nodes
        self.similarity_top_k = similarity_top_k
        # Tokenize documents for BM25
        tokenized_docs = [node.text.lower().split() for node in nodes]
        self.bm25 = BM25Okapi(tokenized_docs)

    def retrieve(self, query_str):
        # Ensure we have a string input
        if hasattr(query_str, 'query_str'):
            query_text = query_str.query_str
        elif hasattr(query_str, 'text'):
            query_text = query_str.text
        else:
            query_text = str(query_str)

        # Tokenize query
        tokenized_query = query_text.lower().split()
        # Get BM25 scores
        scores = self.bm25.get_scores(tokenized_query)
        # Get top k indices
        top_indices = np.argsort(scores)[::-1][:self.similarity_top_k]
        # Return nodes with scores
        return [NodeWithScore(node=self.nodes[i], score=scores[i]) for i in top_indices if scores[i] > 0]

    # Add async version for compatibility
    async def aretrieve(self, query_str):
        return self.retrieve(query_str)

# Create BM25 retriever
bm25_retriever = CustomBM25Retriever(nodes, similarity_top_k=5)

# Simple hybrid retriever that combines results
class SimpleHybridRetriever:
    def __init__(self, vector_retriever, bm25_retriever, similarity_top_k=5):
        self.vector_retriever = vector_retriever
        self.bm25_retriever = bm25_retriever
        self.similarity_top_k = similarity_top_k

    def retrieve(self, query_str):
        # Ensure we have a string input
        if hasattr(query_str, 'query_str'):
            query_text = query_str.query_str
        elif hasattr(query_str, 'text'):
            query_text = query_str.text
        else:
            query_text = str(query_str)

        # Get results from both retrievers
        vector_results = self.vector_retriever.retrieve(query_text)
        bm25_results = self.bm25_retriever.retrieve(query_text)

        # Combine and deduplicate results
        all_results = vector_results + bm25_results
        seen_texts = set()
        unique_results = []

        for result in all_results:
            if result.node.text not in seen_texts:
                seen_texts.add(result.node.text)
                unique_results.append(result)

        # Return top k results
        return unique_results[:self.similarity_top_k]

    # Add async version to handle both sync and async calls
    async def aretrieve(self, query_str):
        return self.retrieve(query_str)

hybrid_retriever = SimpleHybridRetriever(vector_retriever, bm25_retriever, similarity_top_k=5)

print("✅ Hybrid retriever created with async support!")

✅ Hybrid retriever created with async support!


In [35]:
# Query Routing and Classification
import re

def classify_question(question):
    """
    Classify question type to route to appropriate strategy
    """
    # Handle both string and QueryBundle objects
    if hasattr(question, 'query_str'):
        question_text = question.query_str
    elif hasattr(question, 'text'):
        question_text = question.text
    else:
        question_text = str(question)

    question_lower = question_text.lower()

    # Factual questions
    if any(word in question_lower for word in ['what', 'who', 'when', 'where', 'which']):
        return 'factual'

    # Comparison questions
    elif any(word in question_lower for word in ['compare', 'difference', 'vs', 'versus', 'better']):
        return 'comparison'

    # How-to/procedural questions
    elif any(word in question_lower for word in ['how', 'process', 'procedure', 'steps']):
        return 'procedural'

    # Summary questions
    elif any(word in question_lower for word in ['summarize', 'summary', 'overview', 'explain']):
        return 'summary'

    # Default to factual
    else:
        return 'factual'

print("Query classification system ready!")

Query classification system ready!


In [36]:
# Enhanced Query Engines with Multi-step Reasoning
from llama_index.core.query_engine import SubQuestionQueryEngine
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.response_synthesizers import get_response_synthesizer

# Create different query engines for different question types

# 1. Standard hybrid query engine
hybrid_query_engine = RetrieverQueryEngine(
    retriever=hybrid_retriever,
    response_synthesizer=get_response_synthesizer(response_mode="compact")
)

# 2. Try to create sub-question query engine for complex queries
try:
    query_engine_tools = [
        QueryEngineTool(
            query_engine=hybrid_query_engine,
            metadata=ToolMetadata(
                name="insurance_policy",
                description="Provides information about insurance policy details, coverage, terms, and conditions"
            )
        )
    ]

    sub_question_engine = SubQuestionQueryEngine.from_defaults(
        query_engine_tools=query_engine_tools,
        llm=llm
    )
    print("Enhanced query engines created successfully!")

except (ImportError, AttributeError) as e:
    print(f"SubQuestionQueryEngine not available: {e}")
    print("Using standard hybrid query engine for all queries.")
    # Fallback: use hybrid query engine for all question types
    sub_question_engine = hybrid_query_engine

Enhanced query engines created successfully!


In [42]:
# Confidence Scoring System
def calculate_confidence_score(response, retrieved_nodes):
    """
    Calculate confidence score based on multiple factors
    """
    score = 0.0
    factors = []
    response_text = response.lower()

    # Factor 1: Number of supporting sources (max 25 points)
    num_sources = len(retrieved_nodes) if retrieved_nodes else 0
    source_score = min(num_sources * 5, 25)  # Up to 5 sources
    score += source_score
    factors.append(f"Sources: {num_sources} (+{source_score}pts)")

    # Factor 2: Response length and completeness (max 20 points)
    response_length = len(response.split())
    if 30 <= response_length <= 150:
        length_score = 20  # Optimal length
    elif 20 <= response_length < 30 or 150 < response_length <= 200:
        length_score = 15  # Good length
    elif 10 <= response_length < 20 or 200 < response_length <= 300:
        length_score = 10  # Acceptable length
    else:
        length_score = 5   # Too short or too long
    score += length_score
    factors.append(f"Length: {response_length} words (+{length_score}pts)")

    # Factor 3: Specific policy references (max 25 points)
    specific_indicators = [
        'section', 'page', 'part', 'according to', 'states that', 'specifically',
        'outlined', 'policy', 'coverage', 'benefit', 'procedure', 'days', 'within'
    ]
    specificity_count = sum(1 for word in specific_indicators if word in response_text)
    specificity_score = min(specificity_count * 3, 25)
    score += specificity_score
    factors.append(f"Policy specificity: {specificity_count} terms (+{specificity_score}pts)")

    # Factor 4: Uncertainty and generic responses (penalty)
    uncertainty_phrases = [
        'not sure', 'unclear', 'might be', 'possibly', 'perhaps', 'generally',
        'typically', 'usually', 'contact the', 'consult with', 'it is advisable'
    ]
    uncertainty_count = sum(1 for phrase in uncertainty_phrases if phrase in response_text)
    uncertainty_penalty = min(uncertainty_count * 8, 20)  # Max 20 point penalty
    score -= uncertainty_penalty
    if uncertainty_penalty > 0:
        factors.append(f"Generic/uncertain language: -{uncertainty_penalty}pts")

    # Factor 5: Numerical precision bonus (max 15 points)
    numbers_found = len([word for word in response.split() if any(char.isdigit() for char in word)])
    precision_score = min(numbers_found * 3, 15)  # Numbers suggest specific data
    score += precision_score
    if precision_score > 0:
        factors.append(f"Numerical precision: {numbers_found} values (+{precision_score}pts)")

    # Factor 6: Source quality assessment (max 15 points)
    if retrieved_nodes:
        # Check if sources contain substantial content (not just headers)
        substantial_sources = 0
        for node in retrieved_nodes:
            if len(node.node.text.strip()) > 100:  # More than just headers
                substantial_sources += 1

        source_quality = min(substantial_sources * 5, 15)
        score += source_quality
        if source_quality > 0:
            factors.append(f"Source quality: {substantial_sources} substantial (+{source_quality}pts)")

    # Normalize to 0-100 scale and add some variability
    import random
    variability = random.uniform(-3, 3)  # Small random factor to avoid identical scores
    final_score = max(0, min(100, score + variability))

    return round(final_score), factors

print("Enhanced confidence scoring system ready!")

Enhanced confidence scoring system ready!


In [43]:
# Enhanced Chat Interface with Persistent History
import ipywidgets as widgets
from IPython.display import display, Markdown, clear_output, HTML
import time

# Reset chat history for new session
chat_history_v3_enhanced = []

# Create UI components
question_box_enhanced = widgets.Text(
    value='',
    placeholder='Ask about your insurance policy (Enhanced v3 with persistent history)...',
    description='Question:',
    disabled=False,
    layout=widgets.Layout(width='700px')
)

# Create a scrollable output area
output_area_enhanced = widgets.Output(
    layout=widgets.Layout(
        height='400px',
        width='100%',
        border='1px solid #ccc',
        overflow_y='auto'
    )
)

def display_chat_history():
    """Display the entire chat history in a formatted way"""
    with output_area_enhanced:
        clear_output(wait=True)

        if not chat_history_v3_enhanced:
            display(Markdown("*Start your conversation by asking a question about your insurance policy...*"))
            return

        for i in range(0, len(chat_history_v3_enhanced), 2):
            if i + 1 < len(chat_history_v3_enhanced):
                user_msg = chat_history_v3_enhanced[i]
                assistant_msg = chat_history_v3_enhanced[i + 1]

                # Display exchange number
                exchange_num = (i // 2) + 1
                display(Markdown(f"### 💬 Exchange {exchange_num}"))

                # Display question
                display(Markdown(f"**🤔 Q:** {user_msg['content']}"))

                # Display answer with metadata if available
                response_content = assistant_msg['content']
                if isinstance(assistant_msg.get('metadata'), dict):
                    meta = assistant_msg['metadata']
                    context_indicator = "🔄" if meta.get('context_used', False) else "🆕"
                    display(Markdown(f"**📊 Analysis:** {context_indicator} Type: `{meta.get('question_type', 'unknown')}` | Time: `{meta.get('processing_time', 0):.2f}s` | Confidence: {meta.get('confidence', 0):.0f}/100"))

                    # Show sub-question information if available (formatted)
                    if meta.get('sub_questions_info'):
                        # Parse and format sub-question information
                        sub_info = meta['sub_questions_info']
                        if 'Generated' in sub_info and 'sub questions' in sub_info:
                            # Extract number of sub-questions
                            import re
                            match = re.search(r'Generated (\d+) sub questions', sub_info)
                            if match:
                                num_questions = match.group(1)
                                display(Markdown(f"**🔍 Query Processing:** Used multi-step reasoning with {num_questions} sub-questions"))
                        else:
                            display(Markdown(f"**🔍 Query Processing:** {sub_info}"))

                display(Markdown(f"**🤖 A:** {response_content}"))

                # Enhanced source citation with page numbers and sections
                if isinstance(assistant_msg.get('metadata'), dict) and assistant_msg['metadata'].get('source_nodes'):
                    source_nodes = assistant_msg['metadata']['source_nodes']
                    if source_nodes:
                        display(Markdown("**📚 Sources Referenced:**"))
                        for i, node in enumerate(source_nodes[:3], 1):  # Show top 3 sources
                            # Extract source information
                            source_meta = node.node.metadata
                            page_info = source_meta.get('page_label', source_meta.get('source', 'Unknown'))

                            # Get text preview
                            text_preview = node.node.text[:120].replace('\n', ' ').strip()

                            # Format source citation
                            if page_info != 'Unknown':
                                display(Markdown(f"**{i}.** Page {page_info}: *\"{text_preview}...\"*"))
                            else:
                                display(Markdown(f"**{i}.** Document Section: *\"{text_preview}...\"*"))

                display(Markdown("---"))

def enhanced_query_processing(question):
    """Enhanced query processing with better context handling"""
    start_time = time.time()

    # Ensure we work with string input
    question_str = str(question).strip()

    # Step 1: Classify question type
    question_type = classify_question(question_str)

    # Step 2: Enhanced context handling using the enhanced history
    if chat_history_v3_enhanced:
        # Get last 2 exchanges for context
        recent_history = chat_history_v3_enhanced[-4:]

        # Detect follow-up questions
        follow_up_indicators = [
            'elaborate', 'explain more', 'tell me more', 'expand', 'details',
            'that', 'it', 'this', 'further', 'more about', 'specific',
            'can you', 'what about', 'how about'
        ]
        is_follow_up = any(indicator in question_str.lower() for indicator in follow_up_indicators)

        if is_follow_up and len(recent_history) >= 2:
            # Enhanced follow-up handling
            last_question = recent_history[-2]['content'] if recent_history[-2]['role'] == 'user' else ""
            last_answer = recent_history[-1]['content'] if recent_history[-1]['role'] == 'assistant' else ""

            contextual_question = f"""Previous Question: {last_question}
Previous Answer: {last_answer}

User Follow-up Request: {question_str}

Please provide more detailed information, elaborate further, or answer the follow-up question about the same topic."""
        else:
            # Regular context for independent questions
            context_str = "\n".join([
                f"{msg['role'].title()}: {msg['content'][:100]}..." if len(msg['content']) > 100 else f"{msg['role'].title()}: {msg['content']}"
                for msg in recent_history
            ])
            contextual_question = f"Context:\n{context_str}\n\nNew Question: {question_str}"
    else:
        contextual_question = question_str

    # Step 3: Route to appropriate query engine with output capture
    import sys
    from io import StringIO
    import contextlib

    # Capture sub-question engine output
    captured_output = StringIO()

    with contextlib.redirect_stdout(captured_output):
        if question_type in ['comparison', 'summary'] or len(question_str.split()) > 15:
            response = sub_question_engine.query(contextual_question)
        else:
            response = hybrid_query_engine.query(contextual_question)

    # Get and clean captured sub-question information
    sub_questions_output = captured_output.getvalue()

    # Clean and format the sub-question output
    cleaned_sub_info = None
    if sub_questions_output.strip():
        # Remove extra whitespace and format
        lines = [line.strip() for line in sub_questions_output.strip().split('\n') if line.strip()]
        if lines:
            # Join meaningful lines
            cleaned_sub_info = ' | '.join(lines[:3])  # Take first 3 meaningful lines

    # Step 4: Calculate confidence
    source_nodes = getattr(response, 'source_nodes', [])
    confidence, factors = calculate_confidence_score(response.response, source_nodes)

    processing_time = time.time() - start_time

    return {
        'response': response,
        'question_type': question_type,
        'confidence': confidence,
        'factors': factors,
        'processing_time': processing_time,
        'source_nodes': source_nodes,
        'context_used': len(chat_history_v3_enhanced) > 0,
        'sub_questions_info': cleaned_sub_info
    }

def on_submit_enhanced(sender):
    question = question_box_enhanced.value.strip()
    if not question:
        return

    if question.lower() == 'exit':
        question_box_enhanced.disabled = True
        with output_area_enhanced:
            clear_output()
            display(Markdown("**🔚 Chat session ended. Run the cell again to restart.**"))
        return

    if question.lower() == 'clear':
        # Clear all conversation histories
        chat_history_v3_enhanced.clear()
        # Also clear the regular v3 history used by other components
        global chat_history_v3
        chat_history_v3.clear()

        # Clear ALL outputs including sub-question engine outputs
        from IPython.display import clear_output as global_clear_output
        global_clear_output(wait=True)

        # Re-display the interface
        display(Markdown("### 🚀 Enhanced RAG Chat (v3+)\n*Features: Persistent History, Better Follow-ups, Scrollable Output*"))
        display(question_box_enhanced)
        display(output_area_enhanced)

        # Reset the display with cleared message
        display_chat_history()
        question_box_enhanced.value = ''

        # Show confirmation message
        with output_area_enhanced:
            display(Markdown("✅ **Conversation history cleared!** All context has been reset."))
        return

    # Add user question to history
    chat_history_v3_enhanced.append({'role': 'user', 'content': question})

    # Show processing message
    with output_area_enhanced:
        # Keep existing history and add processing message
        display(Markdown(f"**🤔 Q:** {question}"))
        display(Markdown("*🔄 Processing with enhanced v3 features...*"))

    try:
        # Process the question
        result = enhanced_query_processing(question)

        # Add assistant response with metadata to history
        chat_history_v3_enhanced.append({
            'role': 'assistant',
            'content': result['response'].response,
            'metadata': {
                'question_type': result['question_type'],
                'confidence': result['confidence'],
                'processing_time': result['processing_time'],
                'context_used': result['context_used'],
                'sub_questions_info': result.get('sub_questions_info'),
                'source_nodes': result.get('source_nodes', [])
            }
        })

        # Refresh the display with complete history
        display_chat_history()

    except Exception as e:
        with output_area_enhanced:
            display(Markdown(f"**❌ Error:** {str(e)}"))

    question_box_enhanced.value = ''

# Set up the interface
question_box_enhanced.on_submit(on_submit_enhanced)

# Display the enhanced interface
display(Markdown("### 🚀 Enhanced RAG Chat (v3+)\n*Features: Persistent History, Better Follow-ups, Scrollable Output*"))
display(question_box_enhanced)
display(output_area_enhanced)

# Initialize with welcome message
display_chat_history()

### 🚀 Enhanced RAG Chat (v3+)
*Features: Persistent History, Better Follow-ups, Scrollable Output*

Text(value='clear', description='Question:', layout=Layout(width='700px'), placeholder='Ask about your insuran…

Output(layout=Layout(border='1px solid #ccc', height='400px', overflow_y='auto', width='100%'))

In [50]:
# Create Hybrid Retriever (Semantic + Keyword)
from llama_index.core.retrievers import VectorIndexRetriever
from rank_bm25 import BM25Okapi
import numpy as np
from llama_index.core.schema import NodeWithScore

# Create semantic retriever
vector_retriever = VectorIndexRetriever(index=index_v2, similarity_top_k=5)

# Create custom BM25 retriever with content quality boosting
class CustomBM25Retriever:
    def __init__(self, nodes, similarity_top_k=5):
        self.nodes = nodes
        self.similarity_top_k = similarity_top_k
        # Tokenize documents for BM25
        tokenized_docs = [node.text.lower().split() for node in nodes]
        self.bm25 = BM25Okapi(tokenized_docs)

    def _boost_content_quality(self, scores, query_text):
        """
        Boost scores for content-rich nodes and penalize structural content
        """
        boosted_scores = scores.copy()
        query_lower = query_text.lower()

        for i, node in enumerate(self.nodes):
            node_text = node.text.lower()

            # Heavy penalties for table of contents and structural content
            severe_penalty_phrases = [
                'table of contents', 'gc 6001 table of contents',
                'this policy has been updated effective january 1, 2014 gc 6001'
            ]

            moderate_penalty_phrases = [
                'section a -', 'section b -', 'section c -', 'section d -',
                'part i -', 'part ii -', 'part iii -', 'part iv -',
                'page 1', 'page 2', 'page 3', 'page 4', 'page 5'
            ]

            # Apply severe penalties
            for phrase in severe_penalty_phrases:
                if phrase in node_text:
                    boosted_scores[i] *= 0.01  # Nearly eliminate table of contents
                    break
            else:
                # Apply moderate penalties if no severe penalty applied
                for phrase in moderate_penalty_phrases:
                    if phrase in node_text and len(node_text) < 300:
                        boosted_scores[i] *= 0.3  # Reduce structural content
                        break

            # Boost content-rich sections
            if any(term in query_lower for term in ['exclusion', 'procedure', 'payment', 'claim']):
                content_boost_phrases = [
                    'coverage exclusion', 'claim procedure', 'premium payment',
                    'death benefit', 'proof of loss', 'notice of claim',
                    'medical examination', 'autopsy', 'legal action'
                ]

                for phrase in content_boost_phrases:
                    if phrase in node_text:
                        boosted_scores[i] *= 1.5  # Boost relevant content
                        break

        return boosted_scores

    def retrieve(self, query_str):
        # Ensure we have a string input
        if hasattr(query_str, 'query_str'):
            query_text = query_str.query_str
        elif hasattr(query_str, 'text'):
            query_text = query_str.text
        else:
            query_text = str(query_str)

        # Tokenize query
        tokenized_query = query_text.lower().split()
        # Get BM25 scores
        scores = self.bm25.get_scores(tokenized_query)

        # Apply content quality boosting
        boosted_scores = self._boost_content_quality(scores, query_text)

        # Get top k indices
        top_indices = np.argsort(boosted_scores)[::-1][:self.similarity_top_k]
        # Return nodes with scores
        return [NodeWithScore(node=self.nodes[i], score=boosted_scores[i]) for i in top_indices if boosted_scores[i] > 0]

    # Add async version for compatibility
    async def aretrieve(self, query_str):
        return self.retrieve(query_str)

# Create BM25 retriever
bm25_retriever = CustomBM25Retriever(nodes, similarity_top_k=5)

# Simple hybrid retriever that combines results with content filtering
class SimpleHybridRetriever:
    def __init__(self, vector_retriever, bm25_retriever, similarity_top_k=5):
        self.vector_retriever = vector_retriever
        self.bm25_retriever = bm25_retriever
        self.similarity_top_k = similarity_top_k

    def _is_substantial_content(self, node):
        """
        Filter out low-quality content like table of contents, headers, etc.
        """
        text = node.text.lower().strip()

        # Strict filter for table of contents and structural content
        strict_filter_phrases = [
            'table of contents',
            'gc 6001 table of contents',
            'this policy has been updated effective january 1, 2014 gc 6001'
        ]

        # Hard reject these regardless of length
        for phrase in strict_filter_phrases:
            if phrase in text:
                return False

        # Filter out very short structural content
        if len(text.strip()) < 100:
            return False

        # Less aggressive filtering for medium-length content
        if len(text) < 200:
            structural_phrases = [
                'section a -', 'section b -', 'section c -', 'section d -',
                'part i -', 'part ii -', 'part iii -', 'part iv -'
            ]
            for phrase in structural_phrases:
                if phrase in text:
                    return False

        # Check for actual content indicators (more lenient)
        content_indicators = [
            'coverage', 'benefit', 'exclusion', 'procedure', 'payment',
            'claim', 'premium', 'death', 'accident', 'medical',
            'within', 'days', 'shall', 'must', 'required', 'employee',
            'insurance', 'policy', 'amount', 'termination', 'effective'
        ]

        # Lower threshold for content indicators
        content_score = sum(1 for indicator in content_indicators if indicator in text)
        return content_score >= 1  # Require at least 1 content indicator (less strict)

    def retrieve(self, query_str):
        # Ensure we have a string input
        if hasattr(query_str, 'query_str'):
            query_text = query_str.query_str
        elif hasattr(query_str, 'text'):
            query_text = query_str.text
        else:
            query_text = str(query_str)

        # Get results from both retrievers
        vector_results = self.vector_retriever.retrieve(query_text)
        bm25_results = self.bm25_retriever.retrieve(query_text)

        # Combine and filter for substantial content
        all_results = vector_results + bm25_results
        seen_texts = set()
        filtered_results = []

        for result in all_results:
            # Skip if already seen
            if result.node.text in seen_texts:
                continue

            # Apply content filtering
            if self._is_substantial_content(result.node):
                seen_texts.add(result.node.text)
                filtered_results.append(result)

        # If we have too few substantial results, add selective backup
        if len(filtered_results) < 2:
            for result in all_results:
                if result.node.text not in seen_texts and len(filtered_results) < self.similarity_top_k:
                    text = result.node.text.lower().strip()
                    # Strict exclusion of table of contents even in backup
                    if ('table of contents' in text or
                        'gc 6001 table of contents' in text or
                        len(text) < 80):
                        continue

                    # Only include if it has policy-related content
                    if any(word in text for word in ['coverage', 'benefit', 'claim', 'insurance', 'policy', 'employee', 'procedure']):
                        filtered_results.append(result)
                        seen_texts.add(result.node.text)

        # Return top k results
        return filtered_results[:self.similarity_top_k]

    # Add async version to handle both sync and async calls
    async def aretrieve(self, query_str):
        return self.retrieve(query_str)

hybrid_retriever = SimpleHybridRetriever(vector_retriever, bm25_retriever, similarity_top_k=5)

print("✅ Hybrid retriever created with async support!")

✅ Hybrid retriever created with async support!


In [51]:
# Query Routing and Classification
import re

def classify_question(question):
    """
    Classify question type to route to appropriate strategy
    """
    # Handle both string and QueryBundle objects
    if hasattr(question, 'query_str'):
        question_text = question.query_str
    elif hasattr(question, 'text'):
        question_text = question.text
    else:
        question_text = str(question)

    question_lower = question_text.lower()

    # Factual questions
    if any(word in question_lower for word in ['what', 'who', 'when', 'where', 'which']):
        return 'factual'

    # Comparison questions
    elif any(word in question_lower for word in ['compare', 'difference', 'vs', 'versus', 'better']):
        return 'comparison'

    # How-to/procedural questions
    elif any(word in question_lower for word in ['how', 'process', 'procedure', 'steps']):
        return 'procedural'

    # Summary questions
    elif any(word in question_lower for word in ['summarize', 'summary', 'overview', 'explain']):
        return 'summary'

    # Default to factual
    else:
        return 'factual'

print("Query classification system ready!")

Query classification system ready!


In [52]:
# Enhanced Query Engines with Multi-step Reasoning
from llama_index.core.query_engine import SubQuestionQueryEngine
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.response_synthesizers import get_response_synthesizer

# Create different query engines for different question types

# 1. Standard hybrid query engine
hybrid_query_engine = RetrieverQueryEngine(
    retriever=hybrid_retriever,
    response_synthesizer=get_response_synthesizer(response_mode="compact")
)

# 2. Try to create sub-question query engine for complex queries
try:
    query_engine_tools = [
        QueryEngineTool(
            query_engine=hybrid_query_engine,
            metadata=ToolMetadata(
                name="insurance_policy",
                description="Provides information about insurance policy details, coverage, terms, and conditions"
            )
        )
    ]

    sub_question_engine = SubQuestionQueryEngine.from_defaults(
        query_engine_tools=query_engine_tools,
        llm=llm
    )
    print("Enhanced query engines created successfully!")

except (ImportError, AttributeError) as e:
    print(f"SubQuestionQueryEngine not available: {e}")
    print("Using standard hybrid query engine for all queries.")
    # Fallback: use hybrid query engine for all question types
    sub_question_engine = hybrid_query_engine

Enhanced query engines created successfully!


In [53]:
# Confidence Scoring System
def calculate_confidence_score(response, retrieved_nodes):
    """
    Calculate confidence score based on multiple factors
    """
    score = 0.0
    factors = []
    response_text = response.lower()

    # Factor 1: Number of supporting sources (max 25 points)
    num_sources = len(retrieved_nodes) if retrieved_nodes else 0
    source_score = min(num_sources * 5, 25)  # Up to 5 sources
    score += source_score
    factors.append(f"Sources: {num_sources} (+{source_score}pts)")

    # Factor 2: Response length and completeness (max 20 points)
    response_length = len(response.split())
    if 30 <= response_length <= 150:
        length_score = 20  # Optimal length
    elif 20 <= response_length < 30 or 150 < response_length <= 200:
        length_score = 15  # Good length
    elif 10 <= response_length < 20 or 200 < response_length <= 300:
        length_score = 10  # Acceptable length
    else:
        length_score = 5   # Too short or too long
    score += length_score
    factors.append(f"Length: {response_length} words (+{length_score}pts)")

    # Factor 3: Specific policy references (max 25 points)
    specific_indicators = [
        'section', 'page', 'part', 'according to', 'states that', 'specifically',
        'outlined', 'policy', 'coverage', 'benefit', 'procedure', 'days', 'within'
    ]
    specificity_count = sum(1 for word in specific_indicators if word in response_text)
    specificity_score = min(specificity_count * 3, 25)
    score += specificity_score
    factors.append(f"Policy specificity: {specificity_count} terms (+{specificity_score}pts)")

    # Factor 4: Uncertainty and generic responses (penalty)
    uncertainty_phrases = [
        'not sure', 'unclear', 'might be', 'possibly', 'perhaps', 'generally',
        'typically', 'usually', 'contact the', 'consult with', 'it is advisable'
    ]
    uncertainty_count = sum(1 for phrase in uncertainty_phrases if phrase in response_text)
    uncertainty_penalty = min(uncertainty_count * 8, 20)  # Max 20 point penalty
    score -= uncertainty_penalty
    if uncertainty_penalty > 0:
        factors.append(f"Generic/uncertain language: -{uncertainty_penalty}pts")

    # Factor 5: Numerical precision bonus (max 15 points)
    numbers_found = len([word for word in response.split() if any(char.isdigit() for char in word)])
    precision_score = min(numbers_found * 3, 15)  # Numbers suggest specific data
    score += precision_score
    if precision_score > 0:
        factors.append(f"Numerical precision: {numbers_found} values (+{precision_score}pts)")

    # Factor 6: Enhanced source quality assessment (max 20 points)
    if retrieved_nodes:
        substantial_sources = 0
        content_quality_bonus = 0

        for node in retrieved_nodes:
            node_text = node.node.text.lower().strip()

            # Check for substantial content length
            if len(node_text) > 150:
                substantial_sources += 1

                # Additional quality bonuses
                # Penalty for table of contents and structural content
                if any(phrase in node_text for phrase in [
                    'table of contents', 'this policy has been updated effective',
                    'section a -', 'part i -'
                ]):
                    content_quality_bonus -= 2  # Penalty for low-quality sources

                # Bonus for content-rich sources
                elif any(phrase in node_text for phrase in [
                    'coverage amount', 'exclusion', 'claim procedure', 'premium payment',
                    'death benefit', 'medical examination', 'proof of loss'
                ]):
                    content_quality_bonus += 3  # Bonus for relevant content

        # Calculate source quality score
        base_quality = min(substantial_sources * 4, 16)  # Base score for substantial sources
        quality_bonus = max(-8, min(8, content_quality_bonus))  # Bonus/penalty for content quality
        source_quality = max(0, base_quality + quality_bonus)

        score += source_quality
        if source_quality > 0:
            factors.append(f"Source quality: {substantial_sources} substantial (+{source_quality}pts)")
        elif substantial_sources == 0:
            factors.append(f"Source quality: Low-quality sources (-5pts)")
            score -= 5  # Penalty for no substantial sources

    # Normalize to 0-100 scale and add some variability
    import random
    variability = random.uniform(-3, 3)  # Small random factor to avoid identical scores
    final_score = max(0, min(100, score + variability))

    return round(final_score), factors

print("Enhanced confidence scoring system ready!")

Enhanced confidence scoring system ready!


In [54]:
# Enhanced Chat Interface with Persistent History
import ipywidgets as widgets
from IPython.display import display, Markdown, clear_output, HTML
import time

# Reset chat history for new session
chat_history_v3_enhanced = []

# Create UI components
question_box_enhanced = widgets.Text(
    value='',
    placeholder='Ask about your insurance policy (Enhanced v3 with persistent history)...',
    description='Question:',
    disabled=False,
    layout=widgets.Layout(width='700px')
)

# Create a scrollable output area
output_area_enhanced = widgets.Output(
    layout=widgets.Layout(
        height='400px',
        width='100%',
        border='1px solid #ccc',
        overflow_y='auto'
    )
)

def display_chat_history():
    """Display the entire chat history in a formatted way"""
    with output_area_enhanced:
        clear_output(wait=True)

        if not chat_history_v3_enhanced:
            display(Markdown("*Start your conversation by asking a question about your insurance policy...*"))
            return

        for i in range(0, len(chat_history_v3_enhanced), 2):
            if i + 1 < len(chat_history_v3_enhanced):
                user_msg = chat_history_v3_enhanced[i]
                assistant_msg = chat_history_v3_enhanced[i + 1]

                # Display exchange number
                exchange_num = (i // 2) + 1
                display(Markdown(f"### 💬 Exchange {exchange_num}"))

                # Display question
                display(Markdown(f"**🤔 Q:** {user_msg['content']}"))

                # Display answer with metadata if available
                response_content = assistant_msg['content']
                if isinstance(assistant_msg.get('metadata'), dict):
                    meta = assistant_msg['metadata']
                    context_indicator = "🔄" if meta.get('context_used', False) else "🆕"
                    display(Markdown(f"**📊 Analysis:** {context_indicator} Type: `{meta.get('question_type', 'unknown')}` | Time: `{meta.get('processing_time', 0):.2f}s` | Confidence: {meta.get('confidence', 0):.0f}/100"))

                    # Show sub-question information if available (formatted)
                    if meta.get('sub_questions_info'):
                        # Parse and format sub-question information
                        sub_info = meta['sub_questions_info']
                        if 'Generated' in sub_info and 'sub questions' in sub_info:
                            # Extract number of sub-questions
                            import re
                            match = re.search(r'Generated (\d+) sub questions', sub_info)
                            if match:
                                num_questions = match.group(1)
                                display(Markdown(f"**🔍 Query Processing:** Used multi-step reasoning with {num_questions} sub-questions"))
                        else:
                            display(Markdown(f"**🔍 Query Processing:** {sub_info}"))

                display(Markdown(f"**🤖 A:** {response_content}"))

                # Enhanced source citation with page numbers and sections
                if isinstance(assistant_msg.get('metadata'), dict) and assistant_msg['metadata'].get('source_nodes'):
                    source_nodes = assistant_msg['metadata']['source_nodes']
                    if source_nodes:
                        display(Markdown("**📚 Sources Referenced:**"))
                        for i, node in enumerate(source_nodes[:3], 1):  # Show top 3 sources
                            # Extract source information
                            source_meta = node.node.metadata
                            page_info = source_meta.get('page_label', source_meta.get('source', 'Unknown'))

                            # Get text preview
                            text_preview = node.node.text[:120].replace('\n', ' ').strip()

                            # Format source citation
                            if page_info != 'Unknown':
                                display(Markdown(f"**{i}.** Page {page_info}: *\"{text_preview}...\"*"))
                            else:
                                display(Markdown(f"**{i}.** Document Section: *\"{text_preview}...\"*"))

                display(Markdown("---"))

def enhanced_query_processing(question):
    """Enhanced query processing with better context handling"""
    start_time = time.time()

    # Ensure we work with string input
    question_str = str(question).strip()

    # Step 1: Classify question type
    question_type = classify_question(question_str)

    # Step 2: Enhanced context handling using the enhanced history
    if chat_history_v3_enhanced:
        # Get last 2 exchanges for context
        recent_history = chat_history_v3_enhanced[-4:]

        # Detect follow-up questions
        follow_up_indicators = [
            'elaborate', 'explain more', 'tell me more', 'expand', 'details',
            'that', 'it', 'this', 'further', 'more about', 'specific',
            'can you', 'what about', 'how about'
        ]
        is_follow_up = any(indicator in question_str.lower() for indicator in follow_up_indicators)

        if is_follow_up and len(recent_history) >= 2:
            # Enhanced follow-up handling
            last_question = recent_history[-2]['content'] if recent_history[-2]['role'] == 'user' else ""
            last_answer = recent_history[-1]['content'] if recent_history[-1]['role'] == 'assistant' else ""

            contextual_question = f"""Previous Question: {last_question}
Previous Answer: {last_answer}

User Follow-up Request: {question_str}

Please provide more detailed information, elaborate further, or answer the follow-up question about the same topic."""
        else:
            # Regular context for independent questions
            context_str = "\n".join([
                f"{msg['role'].title()}: {msg['content'][:100]}..." if len(msg['content']) > 100 else f"{msg['role'].title()}: {msg['content']}"
                for msg in recent_history
            ])
            contextual_question = f"Context:\n{context_str}\n\nNew Question: {question_str}"
    else:
        contextual_question = question_str

    # Step 3: Enhanced prompting for better content extraction
    import sys
    from io import StringIO
    import contextlib

    # Enhance the question for better content retrieval
    enhanced_contextual_question = contextual_question

    # For complex or summary questions, add specific instructions
    if question_type in ['summary', 'comparison'] or len(question_str.split()) > 10:
        enhanced_contextual_question = f"""{contextual_question}

Please provide specific details including:
- Exact timeframes, deadlines, and numerical values when mentioned
- Specific document sections, page references, or policy numbers
- Detailed procedures, requirements, and step-by-step processes
- Concrete examples rather than general statements
- Avoid generic advice like "contact the company" - extract specific policy information instead

Focus on extracting precise information directly from the insurance policy document."""

    # Capture sub-question engine output
    captured_output = StringIO()

    with contextlib.redirect_stdout(captured_output):
        if question_type in ['comparison', 'summary'] or len(question_str.split()) > 15:
            response = sub_question_engine.query(enhanced_contextual_question)
        else:
            response = hybrid_query_engine.query(enhanced_contextual_question)

    # Get and clean captured sub-question information
    sub_questions_output = captured_output.getvalue()

    # Clean and format the sub-question output
    cleaned_sub_info = None
    if sub_questions_output.strip():
        # Remove extra whitespace and format
        lines = [line.strip() for line in sub_questions_output.strip().split('\n') if line.strip()]
        if lines:
            # Join meaningful lines
            cleaned_sub_info = ' | '.join(lines[:3])  # Take first 3 meaningful lines

    # Step 4: Calculate confidence
    source_nodes = getattr(response, 'source_nodes', [])
    confidence, factors = calculate_confidence_score(response.response, source_nodes)

    processing_time = time.time() - start_time

    return {
        'response': response,
        'question_type': question_type,
        'confidence': confidence,
        'factors': factors,
        'processing_time': processing_time,
        'source_nodes': source_nodes,
        'context_used': len(chat_history_v3_enhanced) > 0,
        'sub_questions_info': cleaned_sub_info
    }

def on_submit_enhanced(sender):
    question = question_box_enhanced.value.strip()
    if not question:
        return

    if question.lower() == 'exit':
        question_box_enhanced.disabled = True
        with output_area_enhanced:
            clear_output()
            display(Markdown("**🔚 Chat session ended. Run the cell again to restart.**"))
        return

    if question.lower() == 'clear':
        # Clear all conversation histories
        chat_history_v3_enhanced.clear()
        # Also clear the regular v3 history used by other components
        global chat_history_v3
        chat_history_v3.clear()

        # Clear ALL outputs including sub-question engine outputs
        from IPython.display import clear_output as global_clear_output
        global_clear_output(wait=True)

        # Re-display the interface
        display(Markdown("### 🚀 Enhanced RAG Chat (v3+)\n*Features: Persistent History, Better Follow-ups, Scrollable Output*"))
        display(question_box_enhanced)
        display(output_area_enhanced)

        # Reset the display with cleared message
        display_chat_history()
        question_box_enhanced.value = ''

        # Show confirmation message
        with output_area_enhanced:
            display(Markdown("✅ **Conversation history cleared!** All context has been reset."))
        return

    # Add user question to history
    chat_history_v3_enhanced.append({'role': 'user', 'content': question})

    # Show processing message
    with output_area_enhanced:
        # Keep existing history and add processing message
        display(Markdown(f"**🤔 Q:** {question}"))
        display(Markdown("*🔄 Processing with enhanced v3 features...*"))

    try:
        # Process the question
        result = enhanced_query_processing(question)

        # Add assistant response with metadata to history
        chat_history_v3_enhanced.append({
            'role': 'assistant',
            'content': result['response'].response,
            'metadata': {
                'question_type': result['question_type'],
                'confidence': result['confidence'],
                'processing_time': result['processing_time'],
                'context_used': result['context_used'],
                'sub_questions_info': result.get('sub_questions_info'),
                'source_nodes': result.get('source_nodes', [])
            }
        })

        # Refresh the display with complete history
        display_chat_history()

    except Exception as e:
        with output_area_enhanced:
            display(Markdown(f"**❌ Error:** {str(e)}"))

    question_box_enhanced.value = ''

# Set up the interface
question_box_enhanced.on_submit(on_submit_enhanced)

# Display the enhanced interface
display(Markdown("### 🚀 Enhanced RAG Chat (v3+)\n*Features: Persistent History, Better Follow-ups, Scrollable Output*"))
display(question_box_enhanced)
display(output_area_enhanced)

# Initialize with welcome message
display_chat_history()

### 🚀 Enhanced RAG Chat (v3+)
*Features: Persistent History, Better Follow-ups, Scrollable Output*

Text(value='clear', description='Question:', layout=Layout(width='700px'), placeholder='Ask about your insuran…

Output(layout=Layout(border='1px solid #ccc', height='400px', overflow_y='auto', width='100%'))