# 03 - RAG Pipeline Implementation

This notebook covers:
- Building a complete RAG pipeline with LangChain
- Integrating vector search with LLM reasoning
- Implementing multimodal query understanding
- Creating custom retrieval chains
- Prompt engineering for better responses

In [None]:
import os
import sys
sys.path.append('../src')

import numpy as np
import pandas as pd
import faiss
from pathlib import Path
from dotenv import load_dotenv
from typing import List, Dict, Any

# LangChain imports
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
from langchain.schema import Document

# Load environment variables
load_dotenv()

# Paths
PROCESSED_DIR = Path('../data/processed')
INDEX_DIR = Path('../data/processed/indexes')

## 1. Load Vector Database and Metadata

In [None]:
# Load metadata
metadata_df = pd.read_csv(PROCESSED_DIR / 'metadata_processed.csv')
print(f"Loaded {len(metadata_df)} items")

# Load embeddings
text_embeddings = np.load(PROCESSED_DIR / 'text_embeddings.npy')
clip_embeddings = np.load(PROCESSED_DIR / 'clip_text_embeddings.npy')

print(f"Text embeddings shape: {text_embeddings.shape}")
print(f"CLIP embeddings shape: {clip_embeddings.shape}")

metadata_df.head()

## 2. Custom Retriever Class

In [None]:
class MultimodalRetriever:
    """
    Custom retriever that supports both text and image queries
    """
    def __init__(self, index_path, embeddings, metadata_df, embedding_model=None):
        self.index = faiss.read_index(str(index_path))
        self.embeddings = embeddings
        self.metadata_df = metadata_df
        self.embedding_model = embedding_model or OpenAIEmbeddings(model="text-embedding-3-large")
    
    def retrieve(self, query: str, k: int = 5, use_rerank: bool = False) -> List[Dict[str, Any]]:
        """
        Retrieve top-k relevant documents for a text query
        """
        # Generate query embedding
        query_embedding = self.embedding_model.embed_query(query)
        query_vector = np.array(query_embedding, dtype='float32')
        
        # Normalize for cosine similarity
        query_vector = query_vector / np.linalg.norm(query_vector)
        query_vector = query_vector.reshape(1, -1)
        
        # Search
        distances, indices = self.index.search(query_vector, k)
        
        # Prepare results
        results = []
        for idx, dist in zip(indices[0], distances[0]):
            if idx < len(self.metadata_df):
                item = self.metadata_df.iloc[idx]
                results.append({
                    'id': item['id'],
                    'title': item['title'],
                    'description': item['description'],
                    'category': item['category'],
                    'image_path': item['image_path'],
                    'score': float(1 - dist),  # Convert distance to similarity
                    'content': f"{item['title']}. {item['description']}"
                })
        
        return results
    
    def retrieve_with_image(self, image_embedding: np.ndarray, k: int = 5) -> List[Dict[str, Any]]:
        """
        Retrieve top-k relevant documents for an image query
        """
        # Normalize and search
        query_vector = image_embedding / np.linalg.norm(image_embedding)
        query_vector = query_vector.reshape(1, -1).astype('float32')
        
        distances, indices = self.index.search(query_vector, k)
        
        # Prepare results (same as retrieve)
        results = []
        for idx, dist in zip(indices[0], distances[0]):
            if idx < len(self.metadata_df):
                item = self.metadata_df.iloc[idx]
                results.append({
                    'id': item['id'],
                    'title': item['title'],
                    'description': item['description'],
                    'category': item['category'],
                    'image_path': item['image_path'],
                    'score': float(1 - dist),
                    'content': f"{item['title']}. {item['description']}"
                })
        
        return results

# Initialize retriever
retriever = MultimodalRetriever(
    index_path=INDEX_DIR / 'faiss_text.index',
    embeddings=text_embeddings,
    metadata_df=metadata_df
)

print("Retriever initialized successfully")

## 3. Test Retrieval

In [None]:
# Test retrieval
test_query = "comfortable clothing for everyday wear"
results = retriever.retrieve(test_query, k=3)

print(f"Query: {test_query}\n")
print("=" * 80)

for i, result in enumerate(results):
    print(f"\nRank {i+1} | Score: {result['score']:.4f}")
    print(f"Title: {result['title']}")
    print(f"Description: {result['description']}")
    print(f"Category: {result['category']}")
    print("-" * 80)

## 4. RAG Pipeline with LangChain

In [None]:
# Initialize LLM
llm = ChatOpenAI(
    model="gpt-4-turbo-preview",
    temperature=0.7,
    api_key=os.getenv('OPENAI_API_KEY')
)

# Create RAG prompt template
rag_prompt = ChatPromptTemplate.from_messages([
    ("system", """You are a helpful shopping assistant with expertise in product recommendations.
    Use the following retrieved product information to answer the user's question.
    
    If the question cannot be answered using the provided context, say so politely and suggest what information might help.
    
    Be concise, friendly, and focus on the most relevant products.
    
    Retrieved Products:
    {context}
    """),
    ("human", "{question}")
])

print("LLM and prompt template initialized")

In [None]:
def format_context(results: List[Dict[str, Any]]) -> str:
    """
    Format retrieval results into context string for LLM
    """
    context_parts = []
    for i, result in enumerate(results):
        context_parts.append(
            f"Product {i+1}:\n"
            f"- Title: {result['title']}\n"
            f"- Description: {result['description']}\n"
            f"- Category: {result['category']}\n"
            f"- Relevance Score: {result['score']:.2f}\n"
        )
    return "\n".join(context_parts)

def rag_query(question: str, k: int = 5) -> Dict[str, Any]:
    """
    Execute complete RAG pipeline: retrieve + generate
    """
    # Step 1: Retrieve relevant documents
    results = retriever.retrieve(question, k=k)
    
    # Step 2: Format context
    context = format_context(results)
    
    # Step 3: Generate response with LLM
    chain = rag_prompt | llm | StrOutputParser()
    
    response = chain.invoke({
        "context": context,
        "question": question
    })
    
    return {
        "question": question,
        "answer": response,
        "retrieved_products": results,
        "context": context
    }

print("RAG pipeline functions defined")

## 5. Test RAG Pipeline

In [None]:
# Test query 1: General product search
question1 = "What comfortable clothing items do you have for casual wear?"

result1 = rag_query(question1, k=3)

print("Question:", result1['question'])
print("\n" + "="*80)
print("\nAI Assistant Response:")
print(result1['answer'])
print("\n" + "="*80)
print("\nRetrieved Products:")
for i, prod in enumerate(result1['retrieved_products']):
    print(f"\n{i+1}. {prod['title']} (Score: {prod['score']:.3f})")
    print(f"   {prod['description']}")

In [None]:
# Test query 2: Specific product recommendation
question2 = "I'm looking for something to wear with blue jeans. What would you recommend?"

result2 = rag_query(question2, k=3)

print("Question:", result2['question'])
print("\n" + "="*80)
print("\nAI Assistant Response:")
print(result2['answer'])
print("\n" + "="*80)

## 6. Advanced: Multi-Query RAG

In [None]:
# Multi-query generation prompt
multi_query_prompt = ChatPromptTemplate.from_messages([
    ("system", """You are an AI assistant that helps generate multiple search queries.
    Given a user question, generate 3 different search queries that could help find relevant products.
    Each query should approach the question from a different angle.
    
    Return ONLY the queries, one per line, without numbering or explanation."""),
    ("human", "{question}")
])

def multi_query_rag(question: str, k: int = 3) -> Dict[str, Any]:
    """
    Advanced RAG with multi-query expansion
    """
    # Step 1: Generate multiple queries
    query_chain = multi_query_prompt | llm | StrOutputParser()
    generated_queries = query_chain.invoke({"question": question})
    queries = [q.strip() for q in generated_queries.split('\n') if q.strip()]
    queries = [question] + queries[:3]  # Include original + top 3 generated
    
    print(f"Generated {len(queries)} queries:")
    for i, q in enumerate(queries):
        print(f"  {i+1}. {q}")
    
    # Step 2: Retrieve for each query and combine results
    all_results = {}
    for query in queries:
        results = retriever.retrieve(query, k=k)
        for result in results:
            doc_id = result['id']
            if doc_id not in all_results:
                all_results[doc_id] = result
            else:
                # Boost score if retrieved multiple times
                all_results[doc_id]['score'] = max(all_results[doc_id]['score'], result['score'])
    
    # Step 3: Sort by score and take top-k
    combined_results = sorted(all_results.values(), key=lambda x: x['score'], reverse=True)[:k]
    
    # Step 4: Generate final response
    context = format_context(combined_results)
    chain = rag_prompt | llm | StrOutputParser()
    
    response = chain.invoke({
        "context": context,
        "question": question
    })
    
    return {
        "question": question,
        "generated_queries": queries,
        "answer": response,
        "retrieved_products": combined_results
    }

print("Multi-query RAG pipeline defined")

In [None]:
# Test multi-query RAG
question3 = "I need outfit suggestions for a casual weekend"

print("\n" + "="*80)
print("MULTI-QUERY RAG TEST")
print("="*80 + "\n")

result3 = multi_query_rag(question3, k=5)

print("\n" + "="*80)
print("AI Assistant Response:")
print("="*80)
print(result3['answer'])

## 7. Conversational RAG with Memory

In [None]:
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

class ConversationalRAG:
    """
    RAG system with conversation history
    """
    def __init__(self, retriever, llm):
        self.retriever = retriever
        self.llm = llm
        self.conversation_history = []
        
        self.prompt = ChatPromptTemplate.from_messages([
            ("system", """You are a helpful shopping assistant. Use the conversation history and retrieved products to provide personalized recommendations.
            
            Conversation History:
            {history}
            
            Retrieved Products:
            {context}
            """),
            ("human", "{question}")
        ])
    
    def chat(self, question: str, k: int = 3) -> str:
        """Process a conversational query"""
        # Retrieve relevant products
        results = self.retriever.retrieve(question, k=k)
        context = format_context(results)
        
        # Format conversation history
        history = "\n".join([
            f"Human: {h['question']}\nAssistant: {h['answer']}"
            for h in self.conversation_history[-3:]  # Last 3 exchanges
        ])
        
        # Generate response
        chain = self.prompt | self.llm | StrOutputParser()
        response = chain.invoke({
            "history": history,
            "context": context,
            "question": question
        })
        
        # Update conversation history
        self.conversation_history.append({
            "question": question,
            "answer": response,
            "retrieved_products": results
        })
        
        return response
    
    def reset(self):
        """Clear conversation history"""
        self.conversation_history = []

# Initialize conversational RAG
conv_rag = ConversationalRAG(retriever, llm)

print("Conversational RAG initialized")

In [None]:
# Test conversational flow
print("\n" + "="*80)
print("CONVERSATIONAL RAG TEST")
print("="*80 + "\n")

# Turn 1
response1 = conv_rag.chat("Show me some casual clothing")
print("User: Show me some casual clothing")
print(f"\nAssistant: {response1}")
print("\n" + "-"*80 + "\n")

# Turn 2
response2 = conv_rag.chat("What would go well with the first item?")
print("User: What would go well with the first item?")
print(f"\nAssistant: {response2}")
print("\n" + "-"*80 + "\n")

## 8. Save RAG Pipeline for API

In [None]:
import pickle

# Save RAG components
rag_components = {
    'retriever': retriever,
    'llm': llm,
    'rag_prompt': rag_prompt,
    'multi_query_prompt': multi_query_prompt
}

with open(PROCESSED_DIR / 'rag_pipeline.pkl', 'wb') as f:
    pickle.dump(rag_components, f)

print("RAG pipeline components saved successfully")

## Summary

In this notebook, we:
1. Built a custom multimodal retriever supporting text and image queries
2. Created a complete RAG pipeline with LangChain
3. Implemented prompt engineering for better responses
4. Developed multi-query expansion for improved retrieval
5. Added conversational capabilities with memory
6. Saved pipeline components for API integration

Next step: Notebook 04 - Evaluation and Testing