# TCS Annual Report RAG System

Learning embeddings through building a simple question-answering system for TCS Annual Report.

This notebook follows a step-by-step approach to understand how embeddings work in retrieval-augmented generation (RAG).

In [None]:
# Step 1: Helper function for readable text display
def word_wrap(text, width=80):
    """
    Simple word wrap function to make long text readable.
    Wraps text at word boundaries within the specified width.
    """
    words = text.split()
    lines = []
    current_line = []
    current_length = 0
    
    for word in words:
        if current_length + len(word) + len(current_line) > width:
            if current_line:
                lines.append(' '.join(current_line))
                current_line = [word]
                current_length = len(word)
            else:
                lines.append(word)
                current_length = 0
        else:
            current_line.append(word)
            current_length += len(word)
    
    if current_line:
        lines.append(' '.join(current_line))
    
    return '\n'.join(lines)

# Test the function
test_text = "This is a very long sentence that we will use to test our word wrapping function to make sure it works correctly and makes text readable."
print(word_wrap(test_text))

In [None]:
# Step 2: PDF Reading - Extract text from TCS Annual Report
from pypdf import PdfReader

# Load the PDF and extract text from all pages
reader = PdfReader("TCS_Annual_Report.pdf")
pdf_texts = [p.extract_text().strip() for p in reader.pages]

# Filter out empty strings (blank pages)
pdf_texts = [text for text in pdf_texts if text]

print(f"Total pages with content: {len(pdf_texts)}")
print("\nFirst page content:")
print("=" * 50)
print(word_wrap(pdf_texts[0]))

In [None]:
# Step 3: Character Chunking - Split into 1000-character chunks with overlap
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Create character splitter with 50-character overlap (improvement over reference)
character_splitter = RecursiveCharacterTextSplitter(
    separators=["\n\n", "\n", ". ", " ", ""],
    chunk_size=1000,
    chunk_overlap=50  # Adding overlap to preserve context
)

# Join all pages and split into character chunks
character_split_texts = character_splitter.split_text('\n\n'.join(pdf_texts))

print("Sample chunk (index 10):")
print("=" * 40)
print(word_wrap(character_split_texts[10]))
print(f"\nTotal character chunks: {len(character_split_texts)}")
print(f"First chunk length: {len(character_split_texts[0])} characters")
print(f"Last chunk length: {len(character_split_texts[-1])} characters")

In [None]:
# Step 4: Token Chunking - Further split into 256-token chunks with overlap
from langchain.text_splitter import SentenceTransformersTokenTextSplitter

# Create token splitter with 20-token overlap (improvement over reference)
token_splitter = SentenceTransformersTokenTextSplitter(
    chunk_overlap=20,  # Adding overlap to preserve context
    tokens_per_chunk=256
)

# Split each character chunk into token chunks
token_split_texts = []
for text in character_split_texts:
    token_split_texts += token_splitter.split_text(text)

print("Sample token chunk (index 10):")
print("=" * 40)
print(word_wrap(token_split_texts[10]))
print(f"\nTotal token chunks: {len(token_split_texts)}")

# Let's also check a few more details
print(f"Character chunks: {len(character_split_texts)}")
print(f"Token chunks: {len(token_split_texts)}")
print(f"Ratio (token/char chunks): {len(token_split_texts)/len(character_split_texts):.1f}")

In [None]:
# Step 5: Embedding Generation - Convert text chunks to numerical vectors
import chromadb
from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction

# Create embedding function (uses sentence-transformers model)
embedding_function = SentenceTransformerEmbeddingFunction()

# Test with one chunk to see what embeddings look like
sample_embedding = embedding_function([token_split_texts[10]])
print("Sample embedding (first 10 values):")
print(sample_embedding[0][:10])
print(f"\nEmbedding dimensions: {len(sample_embedding[0])}")
print(f"Data type: {type(sample_embedding[0][0])}")

# Quick check - embeddings are normalized vectors (should sum to ~1.0 when squared)
import numpy as np
magnitude = np.linalg.norm(sample_embedding[0])
print(f"Vector magnitude (should be ~1.0): {magnitude:.3f}")

In [12]:
# Step 6: ChromaDB Setup - Create collection and store all document chunks
# Use persistent storage in the repo directory
chroma_client = chromadb.PersistentClient(path="./chroma_db")

# Create collection for TCS annual report (or get existing one)
try:
    chroma_collection = chroma_client.get_collection(
        "tcs_annual_report_2024",
        embedding_function=embedding_function
    )
    print("📁 Using existing collection from disk")
    skip_adding = True
except:
    chroma_collection = chroma_client.create_collection(
        "tcs_annual_report_2024",
        embedding_function=embedding_function
    )
    print("📁 Created new persistent collection")
    skip_adding = False

# Only add documents if we created a new collection
if not skip_adding:
    # Create IDs for each chunk (simple sequential numbering)
    ids = [str(i) for i in range(len(token_split_texts))]
    
    # Add all chunks to the collection (this will generate embeddings for all chunks)
    print(f"Adding {len(token_split_texts)} chunks to ChromaDB...")
    chroma_collection.add(ids=ids, documents=token_split_texts)

# Verify the collection
count = chroma_collection.count()
print(f"✅ Collection ready!")
print(f"Total documents in collection: {count}")
print(f"Collection name: {chroma_collection.name}")
print(f"Storage location: ./chroma_db/")

📁 Using existing collection from disk
✅ Collection ready!
Total documents in collection: 1324
Collection name: tcs_annual_report_2024
Storage location: ./chroma_db/


In [13]:
# Step 7: Environment Setup & Retrieval Testing
import os
from dotenv import load_dotenv
from openai import OpenAI

# Load environment variables from .env file
load_dotenv()

# Initialize OpenAI client
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

def test_retrieval(query, n_results=3):
    """
    Test function to see what chunks our embeddings retrieve for a given query.
    This helps us understand how semantic search works.
    """
    print(f"🔍 Query: '{query}'")
    print("=" * 60)
    
    # Query ChromaDB for similar chunks
    results = chroma_collection.query(query_texts=[query], n_results=n_results)
    
    if not results['documents'][0]:
        print("❌ No relevant chunks found!")
        return []
    
    print(f"📄 Found {len(results['documents'][0])} relevant chunks:")
    print()
    
    # Display each retrieved chunk
    for i, doc in enumerate(results['documents'][0]):
        print(f"--- Chunk {i+1} ---")
        print(word_wrap(doc))
        print()
    
    return results['documents'][0]

# Test retrieval with a sample query
print("🧪 Testing semantic search retrieval:")
print()
chunks = test_retrieval("What is TCS's revenue?")

🧪 Testing semantic search retrieval:

🔍 Query: 'What is TCS's revenue?'
📄 Found 3 relevant chunks:

--- Chunk 1 ---
( in fy 2021 ) and settlement ( in fy 2024 ) of legal claim tcs has consistently
grown its earnings per share ( eps ), achieving a cagr of 9. 3 % over the past
five financial years. this steady increase highlights the company ’ s growing
earnings and its commitment to delivering long - term value to shareholders.
earnings per share

--- Chunk 2 ---
capabilities in independent analyst reports. tcs has been ranked as a leader in
analyst competitive surveys across multiple firms covering areas such as ai,
genai, analytics, data and automation. 1includes multiple investors in group
meetings growth 12. 0 % $ 20. 2 $ 22. 7 fy 2021 fy 2025 fy 2021 fy 2025 228 298
130 64 101 48 us $ 100mn + us $ 50mn + us $ 20mn + 148 182 fy 2024 fy 2025 tcs '
global innovation network includes 11 pace ports and studios, fostering
collaboration on cutting - edge solutions through cxo discussions,

In [14]:
# Step 8: Complete RAG Implementation with OpenAI Responses API
def ask_tcs_report(question):
    """
    Complete RAG function that:
    1. Retrieves relevant chunks using semantic search
    2. Shows retrieved chunks for learning
    3. Uses OpenAI Responses API to generate answer
    4. Returns the AI-generated response
    """
    print(f"🔍 Question: {question}")
    print("=" * 70)
    
    # Step 1: Retrieve relevant chunks from ChromaDB
    results = chroma_collection.query(query_texts=[question], n_results=3)
    
    # Handle case where no relevant chunks are found
    if not results['documents'][0]:
        print("❌ Sorry, I couldn't find relevant information in the TCS report.")
        return "No relevant information found."
    
    context_chunks = results['documents'][0]
    
    # Step 2: Display retrieved chunks for educational purposes
    print(f"📄 Retrieved {len(context_chunks)} relevant chunks from TCS report:")
    print()
    
    for i, chunk in enumerate(context_chunks):
        print(f"--- Retrieved Chunk {i+1} ---")
        print(word_wrap(chunk))
        print()
    
    # Step 3: Combine chunks into context for OpenAI
    context = "\n\n".join(context_chunks)
    
    # Step 4: Call OpenAI Responses API with GPT-4.1
    try:
        print("🤖 Generating AI response...")
        print("-" * 40)
        
        response = client.responses.create(
            model="gpt-4.1",
            input=f"""Based on the following excerpts from the TCS Annual Report, please answer this question: {question}

Context from TCS Annual Report:
{context}

Please provide a clear, accurate answer based only on the information provided above. If the context doesn't contain enough information to fully answer the question, please say so."""
        )
        
        # Display the AI-generated answer
        print("💡 Answer:")
        print("=" * 40)
        answer = response.output_text if hasattr(response, 'output_text') else str(response)
        print(word_wrap(answer))
        print()
        
        return answer
        
    except Exception as e:
        error_msg = f"❌ Error calling OpenAI API: {str(e)}"
        print(error_msg)
        return error_msg

# Test the complete RAG system with our first question
print("🚀 Testing Complete RAG System:")
print("=" * 70)
print()

answer = ask_tcs_report("What is TCS's revenue?")

🚀 Testing Complete RAG System:

🔍 Question: What is TCS's revenue?
📄 Retrieved 3 relevant chunks from TCS report:

--- Retrieved Chunk 1 ---
( in fy 2021 ) and settlement ( in fy 2024 ) of legal claim tcs has consistently
grown its earnings per share ( eps ), achieving a cagr of 9. 3 % over the past
five financial years. this steady increase highlights the company ’ s growing
earnings and its commitment to delivering long - term value to shareholders.
earnings per share

--- Retrieved Chunk 2 ---
capabilities in independent analyst reports. tcs has been ranked as a leader in
analyst competitive surveys across multiple firms covering areas such as ai,
genai, analytics, data and automation. 1includes multiple investors in group
meetings growth 12. 0 % $ 20. 2 $ 22. 7 fy 2021 fy 2025 fy 2021 fy 2025 228 298
130 64 101 48 us $ 100mn + us $ 50mn + us $ 20mn + 148 182 fy 2024 fy 2025 tcs '
global innovation network includes 11 pace ports and studios, fostering
collaboration on cutting - edge

In [15]:
# Step 9: Multi-Query Testing - Explore Semantic Search Capabilities
# Test different types of questions to see how embeddings handle various concepts

test_questions = [
    "Who leads the company?",                    # Leadership/Executive query
    "What services does TCS provide?",           # Business model/offerings query  
    "How did TCS perform financially?",          # Financial performance query
    "What are the main business segments?",      # Organizational structure query
]

print("🧪 TESTING SEMANTIC SEARCH WITH DIFFERENT QUESTION TYPES")
print("=" * 80)
print()
print("This will demonstrate how embeddings capture different concepts and find")
print("relevant information even when exact keywords don't match!")
print()

for i, question in enumerate(test_questions, 1):
    print(f"\n{'🔸' * 40}")
    print(f"TEST {i}/4")
    print(f"{'🔸' * 40}")
    print()
    
    try:
        answer = ask_tcs_report(question)
        
        # Add separator between questions for readability
        print("\n" + "🔹" * 80)
        print("MOVING TO NEXT QUESTION...")
        print("🔹" * 80)
        
    except Exception as e:
        print(f"❌ Error with question '{question}': {str(e)}")
        continue

print("\n" + "✅" * 30)
print("🎉 SEMANTIC SEARCH TESTING COMPLETE!")
print("✅" * 30)
print()
print("Key observations to note:")
print("• How different questions retrieve different chunks")
print("• How semantic similarity works beyond keyword matching") 
print("• How context from multiple chunks creates comprehensive answers")
print("• The power of embeddings in understanding meaning vs. exact words")

🧪 TESTING SEMANTIC SEARCH WITH DIFFERENT QUESTION TYPES

This will demonstrate how embeddings capture different concepts and find
relevant information even when exact keywords don't match!


🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸
TEST 1/4
🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸🔸

🔍 Question: Who leads the company?
📄 Retrieved 3 relevant chunks from TCS report:

--- Retrieved Chunk 1 ---
governance experience in developing governance practices, serving the best
interests of all stakeholders, maintaining board and management accountability,
building long - term effective stakeholder engagements and driving corporate
ethics and values. the eligibility of a person to be appointed as a director of
the company is dependent on whether the person possesses the requisite skill
sets identified by the board as above and whether the person is a proven leader
in running a business that is relevant to the company ’ s business or is a
proven academician in the field relevant to the company ’ s busi

In [None]:
# Step 10: Evaluation Question Set for Query Expansion Testing
# Creating 15 questions with varying difficulty levels to test our RAG system

evaluation_questions = [
    # Easy Questions (Single-chunk answers)
    "What is TCS's revenue for FY 2025?",
    "How many employees does TCS have?",
    "What is TCS's profit for FY 2024?",
    "In which countries does TCS operate?",
    "What is TCS's dividend per share?",
    
    # Medium Questions (2-3 chunk synthesis)
    "How has TCS's revenue grown over the past 3 years?",
    "What are TCS's main business segments and their performance?",
    "What digital transformation services does TCS offer to clients?",
    "What are the key risks TCS faces in its business operations?",
    "How does TCS invest in employee development and training?",
    
    # Hard Questions (Multi-chunk synthesis + reasoning)
    "What strategic initiatives is TCS taking to compete with other IT services companies?",
    "How is TCS addressing sustainability and ESG concerns in its operations?",
    "What role does artificial intelligence and automation play in TCS's service delivery?",
    "How has TCS's client mix and geographic revenue distribution changed recently?",
    "What are TCS's long-term growth prospects and investment priorities?"
]

print(f"📋 Created evaluation set with {len(evaluation_questions)} questions")
print()
print("Question Categories:")
print("🟢 Easy (1-5): Single-chunk factual answers")
print("🟡 Medium (6-10): Multi-chunk synthesis")  
print("🔴 Hard (11-15): Complex reasoning and analysis")
print()
print("Sample questions:")
for i, q in enumerate(evaluation_questions[:3], 1):
    print(f"  {i}. {q}")
print("  ...")
print()
print("✅ Ready for query expansion testing!")

In [None]:
# Step 11: Query Expansion Implementation - Generate Hypothetical Answers
# Following L3-student.md approach: use LLM to generate hypothetical answer, then combine with original query

def augment_query_generated(query, model="gpt-4.1"):
    """
    Generate a hypothetical answer to the query using GPT-4.1.
    This hypothetical answer will be combined with the original query
    to improve semantic search retrieval.
    
    Args:
        query (str): The original user question
        model (str): OpenAI model to use for generation
    
    Returns:
        str: Generated hypothetical answer
    """
    try:
        response = client.responses.create(
            model=model,
            input=f"""You are a helpful expert financial research assistant. Provide an example answer to the given question, that might be found in a document like an annual report.

Question: {query}

Generate a realistic, detailed answer that would typically appear in an annual report:"""
        )
        
        content = response.output_text if hasattr(response, 'output_text') else str(response)
        return content.strip()
        
    except Exception as e:
        print(f"❌ Error generating hypothetical answer: {str(e)}")
        return ""

# Test the function with a sample query
print("🧪 Testing Query Expansion with Hypothetical Answer Generation")
print("=" * 70)
print()

sample_query = "What is TCS's revenue for FY 2025?"
print(f"🔍 Original Query: '{sample_query}'")
print()

hypothetical_answer = augment_query_generated(sample_query)
print("🤖 Generated Hypothetical Answer:")
print("-" * 40)
print(word_wrap(hypothetical_answer))
print()

# Show how we combine them
joint_query = f"{sample_query} {hypothetical_answer}"
print("🔗 Combined Query (Original + Hypothetical):")
print("-" * 50)
print(word_wrap(joint_query))
print()
print("✅ Query expansion function working! This combined query will be used for retrieval.")

In [None]:
# Step 12: Enhanced RAG with Query Expansion
# This function uses query expansion to improve retrieval before generating final answers

def ask_tcs_report_with_expansion(question, show_process=True):
    """
    Enhanced RAG function using query expansion:
    1. Generate hypothetical answer with GPT-4.1
    2. Combine original question + hypothetical answer for retrieval
    3. Use retrieved context (NOT hypothetical answer) for final response
    4. Generate answer using GPT-4.1 with only retrieved chunks
    
    Args:
        question (str): The user's question
        show_process (bool): Whether to display the expansion process
    
    Returns:
        str: The AI-generated answer based on retrieved context
    """
    if show_process:
        print(f"🔍 Question: {question}")
        print("=" * 70)
    
    # Step 1: Generate hypothetical answer for query expansion
    hypothetical_answer = augment_query_generated(question)
    
    if show_process and hypothetical_answer:
        print("🤖 Generated Hypothetical Answer:")
        print("-" * 40)
        print(word_wrap(hypothetical_answer))
        print()
    
    # Step 2: Create expanded query (original + hypothetical)
    if hypothetical_answer:
        joint_query = f"{question} {hypothetical_answer}"
        if show_process:
            print("🔗 Using Expanded Query for Retrieval")
            print("-" * 40)
            print(word_wrap(joint_query))
            print()
    else:
        # Fallback to original query if hypothetical generation fails
        joint_query = question
        if show_process:
            print("⚠️ Using original query (hypothetical generation failed)")
    
    # Step 3: Retrieve relevant chunks using expanded query
    results = chroma_collection.query(query_texts=[joint_query], n_results=3)
    
    # Handle case where no relevant chunks are found
    if not results['documents'][0]:
        if show_process:
            print("❌ Sorry, I couldn't find relevant information in the TCS report.")
        return "No relevant information found."
    
    context_chunks = results['documents'][0]
    
    # Step 4: Display retrieved chunks for educational purposes
    if show_process:
        print(f"📄 Retrieved {len(context_chunks)} relevant chunks using expanded query:")
        print()
        
        for i, chunk in enumerate(context_chunks):
            print(f"--- Retrieved Chunk {i+1} ---")
            print(word_wrap(chunk))
            print()
    
    # Step 5: Generate final answer using ONLY retrieved context (not hypothetical answer)
    context = "\\n\\n".join(context_chunks)
    
    try:
        if show_process:
            print("🤖 Generating final answer using retrieved context...")
            print("-" * 40)
        
        response = client.responses.create(
            model="gpt-4.1",
            input=f"""Based on the following excerpts from the TCS Annual Report, please answer this question: {question}

Context from TCS Annual Report:
{context}

Please provide a clear, accurate answer based only on the information provided above. If the context doesn't contain enough information to fully answer the question, please say so."""
        )
        
        # Display the AI-generated answer
        answer = response.output_text if hasattr(response, 'output_text') else str(response)
        
        if show_process:
            print("💡 Final Answer (Query Expansion Method):")
            print("=" * 40)
            print(word_wrap(answer))
            print()
        
        return answer.strip()
        
    except Exception as e:
        error_msg = f"❌ Error calling OpenAI API: {str(e)}"
        if show_process:
            print(error_msg)
        return error_msg

# Test the query expansion approach with our sample question
print("🚀 Testing Query Expansion RAG System:")
print("=" * 70)
print()

test_question = "What is TCS's revenue for FY 2025?"
expanded_answer = ask_tcs_report_with_expansion(test_question)

print("\\n" + "✅" * 40)
print("🎉 Query Expansion RAG implementation complete!")
print("✅" * 40)

In [None]:
# Step 13: Evaluation Framework - Compare Original vs Query Expansion
# Run both approaches on all 15 questions and export results to CSV for easy comparison

import pandas as pd
import time
from datetime import datetime

def evaluate_both_approaches():
    """
    Run all 15 evaluation questions through both RAG approaches
    and collect results for comparison.
    
    Returns:
        pandas.DataFrame: Results with columns for question, original_answer, 
                         expansion_answer, and metadata
    """
    print("🧪 COMPREHENSIVE EVALUATION: Original vs Query Expansion")
    print("=" * 80)
    print(f"Testing {len(evaluation_questions)} questions with both approaches...")
    print()
    
    results = []
    
    for i, question in enumerate(evaluation_questions, 1):
        print(f"\\n📋 Question {i}/{len(evaluation_questions)}")
        print("-" * 60)
        print(f"Q: {question}")
        print()
        
        # Test Original Approach
        print("🔵 Testing Original Approach...")
        start_time = time.time()
        try:
            original_answer = ask_tcs_report(question)
            original_time = time.time() - start_time
            original_success = True
        except Exception as e:
            original_answer = f"Error: {str(e)}"
            original_time = time.time() - start_time
            original_success = False
        
        print(f"⏱️ Original took {original_time:.1f}s")
        print()
        
        # Test Query Expansion Approach  
        print("🟠 Testing Query Expansion Approach...")
        start_time = time.time()
        try:
            expansion_answer = ask_tcs_report_with_expansion(question, show_process=False)
            expansion_time = time.time() - start_time
            expansion_success = True
        except Exception as e:
            expansion_answer = f"Error: {str(e)}"
            expansion_time = time.time() - start_time
            expansion_success = False
            
        print(f"⏱️ Expansion took {expansion_time:.1f}s")
        
        # Store results
        results.append({
            'question_id': i,
            'question': question,
            'difficulty': 'Easy' if i <= 5 else ('Medium' if i <= 10 else 'Hard'),
            'original_answer': original_answer,
            'original_time_seconds': round(original_time, 1),
            'original_success': original_success,
            'expansion_answer': expansion_answer,
            'expansion_time_seconds': round(expansion_time, 1),
            'expansion_success': expansion_success,
            'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        })
        
        print("✅ Both approaches completed")
        print("🔹" * 60)
    
    # Convert to DataFrame
    df = pd.DataFrame(results)
    
    print(f"\\n🎉 Evaluation Complete!")
    print(f"📊 Processed {len(df)} questions successfully")
    print(f"⚡ Average time - Original: {df['original_time_seconds'].mean():.1f}s, Expansion: {df['expansion_time_seconds'].mean():.1f}s")
    print(f"✅ Success rate - Original: {df['original_success'].mean()*100:.0f}%, Expansion: {df['expansion_success'].mean()*100:.0f}%")
    
    return df

# Run the evaluation (this will take a few minutes due to API calls)
print("🚀 Starting comprehensive evaluation...")
print("This will test both approaches on all 15 questions.")
print("⏳ Please wait while we process all questions...")
print()

evaluation_results = evaluate_both_approaches()

In [None]:
# Step 14: Export Results to CSV for Easy Comparison
# Save evaluation results in a structured format for manual review

# Export to CSV with timestamp
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
csv_filename = f"tcs_rag_evaluation_{timestamp}.csv"

evaluation_results.to_csv(csv_filename, index=False, encoding='utf-8')

print(f"📄 Results exported to: {csv_filename}")
print()

# Display summary statistics
print("📊 EVALUATION SUMMARY")
print("=" * 50)
print()

# Success rates by difficulty
difficulty_summary = evaluation_results.groupby('difficulty').agg({
    'original_success': 'mean',
    'expansion_success': 'mean',
    'original_time_seconds': 'mean',
    'expansion_time_seconds': 'mean'
}).round(2)

print("Success Rate by Difficulty:")
for difficulty in ['Easy', 'Medium', 'Hard']:
    if difficulty in difficulty_summary.index:
        orig_rate = difficulty_summary.loc[difficulty, 'original_success'] * 100
        exp_rate = difficulty_summary.loc[difficulty, 'expansion_success'] * 100
        print(f"  {difficulty:6}: Original {orig_rate:3.0f}% | Expansion {exp_rate:3.0f}%")

print()
print("Average Response Time by Difficulty:")
for difficulty in ['Easy', 'Medium', 'Hard']:
    if difficulty in difficulty_summary.index:
        orig_time = difficulty_summary.loc[difficulty, 'original_time_seconds']
        exp_time = difficulty_summary.loc[difficulty, 'expansion_time_seconds']
        print(f"  {difficulty:6}: Original {orig_time:4.1f}s | Expansion {exp_time:4.1f}s")

print()
print("📋 CSV Columns Available for Review:")
print("  • question_id, question, difficulty")
print("  • original_answer, expansion_answer") 
print("  • original_time_seconds, expansion_time_seconds")
print("  • original_success, expansion_success")
print("  • timestamp")
print()
print(f"✅ Open {csv_filename} in Excel/Google Sheets to compare answers side by side!")

# Show first few rows as preview
print()
print("📋 Preview of Results (first 3 questions):")
print("-" * 80)
preview_cols = ['question_id', 'difficulty', 'question', 'original_success', 'expansion_success']
print(evaluation_results[preview_cols].head(3).to_string(index=False))

In [None]:
# Step 15: Query Expansion Demonstration - See the Difference
# Compare how both approaches handle a few sample questions

def compare_approaches_demo(question):
    """
    Run both approaches on a single question and display results side by side
    for educational comparison.
    """
    print(f"🔍 QUESTION: {question}")
    print("=" * 80)
    print()
    
    print("🔵 ORIGINAL APPROACH")
    print("-" * 40)
    original_answer = ask_tcs_report(question)
    
    print("\\n\\n🟠 QUERY EXPANSION APPROACH")  
    print("-" * 40)
    expansion_answer = ask_tcs_report_with_expansion(question)
    
    print("\\n" + "🔹" * 80)
    print("✅ Comparison complete! Note the differences in:")
    print("  • Retrieved chunks (different context due to query expansion)")
    print("  • Answer quality and completeness")
    print("  • Specific details captured")
    print("🔹" * 80)
    
    return original_answer, expansion_answer

# Demo with a few strategic questions
demo_questions = [
    "What digital transformation services does TCS offer to clients?",
    "How is TCS addressing sustainability and ESG concerns?",
    "What are TCS's long-term growth prospects?"
]

print("🧪 QUERY EXPANSION DEMONSTRATION")
print("=" * 80)
print("Comparing Original vs Query Expansion approaches on sample questions")
print("This will show how query expansion affects retrieval and final answers")
print()

for i, question in enumerate(demo_questions, 1):
    print(f"\\n{'🎯' * 30}")
    print(f"DEMO {i}/{len(demo_questions)}")
    print('🎯' * 30)
    
    compare_approaches_demo(question)
    
    if i < len(demo_questions):
        print("\\n⏳ Moving to next question...\\n")

print("\\n" + "✅" * 50)
print("🎉 QUERY EXPANSION IMPLEMENTATION COMPLETE!")
print("✅" * 50)
print()
print("📋 What you've built:")
print("  ✅ 15-question evaluation set (easy/medium/hard)")
print("  ✅ Query expansion with GPT-4.1 hypothetical answers")
print("  ✅ Enhanced RAG system using expanded queries")
print("  ✅ Comprehensive evaluation framework")
print("  ✅ CSV export for easy manual comparison")
print("  ✅ Side-by-side demonstrations")
print()
print("🔍 Next steps:")
print("  📊 Review the CSV file to compare answer quality")
print("  🧠 Analyze which questions benefit most from expansion")
print("  📈 Consider when to use each approach in production")
print()
print("🎯 Key learning: Query expansion helps retrieval find more relevant")
print("   context by using hypothetical answers to guide semantic search!")