# Notebook 2: RAG Pipeline Testing

**Objective:** This notebook demonstrates how to use the complete RAG pipeline to ask questions and get answers based on the previously built vector store.

**Prerequisite:** Ensure that `01_data_preprocessing_and_embedding.ipynb` (or `python main.py build`) has been run successfully to create the `vector_store_index/faiss_index`.

**Steps:**
1. Setup: Import necessary libraries and configure paths.
2. Initialize RAG Pipeline: Create an instance of `RAGPipeline` from `src.rag_pipeline`.
3. Ask Questions: Use the `ask()` method of the pipeline.
4. Review Results: Examine the generated answer and retrieved source documents.

In [None]:
import sys
import os
from pathlib import Path

# Add project root to sys.path
project_root = Path(os.getcwd()).parent 
if str(project_root) not in sys.path:
    sys.path.append(str(project_root))

# Imports from our project
from src import config # This will also load .env
from src.rag_pipeline import RAGPipeline

print(f"Project Root: {project_root}")
print(f"OpenAI API Key Loaded: {'Yes' if config.OPENAI_API_KEY else 'No (Please check .env file!)'}")
print(f"Vector Store Path: {config.VECTOR_STORE_PATH}")

## 1. Check if Vector Store Exists

In [None]:
if not config.VECTOR_STORE_PATH.exists():
    print(f"ERROR: Vector store not found at {config.VECTOR_STORE_PATH}")
    print("Please run Notebook 01 or 'python main.py build' first to create it.")
    # You might want to stop execution here or handle this more gracefully
    # For a notebook, just printing the error is often enough.
    rag_system = None
else:
    print("Vector store found. Proceeding to initialize RAG pipeline.")
    try:
        rag_system = RAGPipeline()
        print("RAG Pipeline initialized successfully.")
    except RuntimeError as e:
        print(f"RuntimeError initializing RAGPipeline: {e}")
        rag_system = None
    except Exception as e:
        print(f"An unexpected error occurred during RAGPipeline initialization: {e}")
        rag_system = None

## 2. Ask Questions using the RAG Pipeline

In [None]:
def ask_and_display(pipeline, query):
    if pipeline is None:
        print("RAG system is not initialized. Cannot ask questions.")
        return
    
    print(f"\n{'='*20} QUERY {'='*20}")
    print(f"Question: {query}")
    
    response = pipeline.ask(query)
    
    print(f"\n{'--'*10} ANSWER {'--'*10}")
    print(response.get("answer", "No answer provided."))
    
    source_docs = response.get("source_documents", [])
    if source_docs:
        print(f"\n{'--'*10} SOURCES ({len(source_docs)}) {'--'*10}")
        for i, doc in enumerate(source_docs):
            source_name = doc.metadata.get('source', 'Unknown source')
            page_number = doc.metadata.get('page', None)
            source_info = f"{source_name}"
            if page_number:
                source_info += f" (Page {page_number})"
            print(f"  [{i+1}] {source_info}")
            print(f"      Content snippet: {doc.page_content[:150].replace('\n', ' ')}...")
    print(f"{'='*50}\n")

# Example Questions (assuming sample_document.txt is used)
queries = [
    "What is RAG?",
    "How does Retrieval Augmented Generation help LLMs?",
    "What are the typical steps in a RAG process?",
    "What tools are mentioned for building RAG systems?",
    "What is the capital of France?" # A question likely not in the sample document
]

if rag_system:
    for q in queries:
        ask_and_display(rag_system, q)
else:
    print("Cannot run queries as RAG system failed to initialize.")

--- End of Notebook 2 ---