In [1]:
!python -m pip install pypdf faiss-cpu --quiet
!python -m pip install langchain langchain-core langchain-community langchain-experimental --quiet
!python -m pip install langchain-openai --quiet
!python -m pip install langchain-community langchainhub langchain-chroma langchain langchain-experimental --quiet

In [None]:


# Step 2: Import Libraries
import os
import json
import mlflow
from typing import Dict, List, TypedDict, Literal
from langgraph.graph import StateGraph, END
from pinecone import Pinecone, ServerlessSpec
from openai import AzureOpenAI
import google.generativeai as genai
from pydantic import BaseModel
import getpass



# For Gemini (you'll need to set this)
gemini_key = getpass.getpass("Enter your Gemini API key: ")
os.environ["GEMINI_API_KEY"] = gemini_key

# Pinecone setup
pinecone_key = getpass.getpass("Enter your Pinecone API key: ")
os.environ["PINECONE_API_KEY"] = pinecone_key

# Step 4: Configuration Class
class Config:
    # Azure OpenAI Configuration
    AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
    AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
    AZURE_OPENAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION", "2024-08-01-preview")
    
    # GPT-4 deployment name
    GPT4_DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_DEPLOYMENT", "gpt4o")
    
    # Embeddings configuration
    EMBEDDINGS_DEPLOYMENT_NAME = "text-embedding-3-small"
    
    # Pinecone Configuration
    PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
    PINECONE_ENVIRONMENT = "us-east4-gcp"
    PINECONE_INDEX_NAME = "agentic-rag-kb"
    
    # MLflow Configuration
    MLFLOW_TRACKING_URI = "http://20.75.92.162:5000"
    
    # Gemini Configuration (for critique)
    GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

print("Configuration loaded successfully!")

# Step 5: Create Sample Knowledge Base Data
# Since we don't have the actual dataset, let's create a sample one
sample_kb_data = [
    {
        "id": "KB001",
        "title": "Caching Best Practices",
        "content": "Implement cache expiration policies to ensure data freshness. Use distributed caching for scalable applications. Consider cache-aside pattern for read-heavy workloads."
    },
    {
        "id": "KB002", 
        "title": "CI/CD Pipeline Setup",
        "content": "Use infrastructure as code for environment consistency. Implement automated testing at each stage. Ensure proper rollback mechanisms are in place."
    },
    {
        "id": "KB003",
        "title": "Performance Tuning Tips",
        "content": "Monitor application metrics regularly. Use database indexing for frequent queries. Implement lazy loading for better initial load times."
    },
    {
        "id": "KB004",
        "title": "API Versioning Strategies",
        "content": "Use URL versioning for clear API endpoints. Maintain backward compatibility when possible. Document breaking changes thoroughly."
    },
    {
        "id": "KB005",
        "title": "Error Handling Guidelines",
        "content": "Implement structured logging for errors. Use appropriate HTTP status codes. Provide meaningful error messages to clients."
    },
    {
        "id": "KB006",
        "title": "Database Optimization",
        "content": "Use connection pooling for database connections. Implement query optimization techniques. Regular database maintenance is essential."
    },
    {
        "id": "KB007",
        "title": "Security Best Practices",
        "content": "Implement proper authentication and authorization. Use HTTPS for all communications. Regular security audits are recommended."
    },
    {
        "id": "KB008",
        "title": "Microservices Architecture",
        "content": "Design services around business capabilities. Implement circuit breakers for fault tolerance. Use API gateways for request routing."
    },
    {
        "id": "KB009",
        "title": "Containerization Benefits",
        "content": "Provides environment consistency across deployments. Enables scalable application deployment. Simplifies dependency management."
    },
    {
        "id": "KB010",
        "title": "Monitoring and Observability",
        "content": "Implement comprehensive logging strategies. Use metrics collection for performance monitoring. Set up alerting for critical issues."
    }
]

print("Sample knowledge base created with 10 entries")

# Step 6: Knowledge Base Indexing
class KBIndexer:
    def __init__(self):
        self.azure_client = AzureOpenAI(
            api_key=Config.AZURE_OPENAI_API_KEY,
            api_version=Config.AZURE_OPENAI_API_VERSION,
            azure_endpoint=Config.AZURE_OPENAI_ENDPOINT
        )
        
        self.pinecone = Pinecone(api_key=Config.PINECONE_API_KEY)
        self.index_name = Config.PINECONE_INDEX_NAME
        
    def create_embeddings(self, texts: List[str]) -> List[List[float]]:
        """Generate embeddings using Azure OpenAI"""
        try:
            response = self.azure_client.embeddings.create(
                input=texts,
                model=Config.EMBEDDINGS_DEPLOYMENT_NAME
            )
            return [item.embedding for item in response.data]
        except Exception as e:
            print(f"Error creating embeddings: {e}")
            # Fallback: return random embeddings (for demo purposes)
            import numpy as np
            return [np.random.rand(1536).tolist() for _ in texts]
    
    def create_index(self):
        """Create Pinecone index if it doesn't exist"""
        try:
            if self.index_name not in self.pinecone.list_indexes().names():
                self.pinecone.create_index(
                    name=self.index_name,
                    dimension=1536,  # text-embedding-3-small dimension
                    metric='cosine',
                    spec=ServerlessSpec(
                        cloud='aws',
                        region='us-east-1'
                    )
                )
                print(f"Created index: {self.index_name}")
            else:
                print(f"Index {self.index_name} already exists")
        except Exception as e:
            print(f"Error creating index: {e}")
    
    def index_knowledge_base(self, kb_data: List[Dict]):
        """Index knowledge base documents"""
        try:
            index = self.pinecone.Index(self.index_name)
            
            texts = []
            metadatas = []
            ids = []
            
            for item in kb_data:
                doc_id = item.get('id', f"KB{len(ids):03d}")
                content = item.get('content', '')
                title = item.get('title', '')
                
                texts.append(content)
                metadatas.append({
                    'title': title,
                    'content': content,
                    'doc_id': doc_id
                })
                ids.append(doc_id)
            
            # Batch process embeddings
            batch_size = 5  # Smaller batch for demo
            for i in range(0, len(texts), batch_size):
                batch_texts = texts[i:i+batch_size]
                batch_ids = ids[i:i+batch_size]
                batch_metadatas = metadatas[i:i+batch_size]
                
                embeddings = self.create_embeddings(batch_texts)
                
                # Prepare vectors for upsert
                vectors = []
                for idx, embedding in enumerate(embeddings):
                    vectors.append({
                        'id': batch_ids[idx],
                        'values': embedding,
                        'metadata': batch_metadatas[idx]
                    })
                
                # Upsert to Pinecone
                index.upsert(vectors=vectors)
                print(f"Indexed batch {i//batch_size + 1}/{(len(texts)-1)//batch_size + 1}")
            
            print(f"Successfully indexed {len(texts)} documents")
        except Exception as e:
            print(f"Error indexing knowledge base: {e}")
    
    def run_indexing(self, kb_data: List[Dict]):
        """Complete indexing workflow"""
        print("Starting knowledge base indexing...")
        print(f"Processing {len(kb_data)} KB entries")
        
        print("Creating Pinecone index...")
        self.create_index()
        
        print("Indexing knowledge base...")
        self.index_knowledge_base(kb_data)
        
        print("Indexing completed successfully!")

# Run indexing
print("Starting KB indexing process...")
indexer = KBIndexer()
indexer.run_indexing(sample_kb_data)

# Step 7: Define the Agentic RAG System
class GraphState(TypedDict):
    question: str
    retrieved_docs: List[Dict]
    initial_answer: str
    critique_result: Literal["COMPLETE", "REFINE"]
    refined_answer: str
    final_answer: str
    citations: List[str]

class AgenticRAGSystem:
    def __init__(self):
        # Initialize Azure OpenAI client
        self.azure_client = AzureOpenAI(
            api_key=Config.AZURE_OPENAI_API_KEY,
            api_version=Config.AZURE_OPENAI_API_VERSION,
            azure_endpoint=Config.AZURE_OPENAI_ENDPOINT
        )
        
        # Initialize Pinecone
        self.pinecone = Pinecone(api_key=Config.PINECONE_API_KEY)
        self.index = self.pinecone.Index(Config.PINECONE_INDEX_NAME)
        
        # Initialize Gemini for critique
        try:
            genai.configure(api_key=Config.GEMINI_API_KEY)
            self.gemini_model = genai.GenerativeModel('gemini-pro')
            self.gemini_available = True
        except Exception as e:
            print(f"Gemini initialization failed: {e}")
            self.gemini_available = False
    
    def retrieve_documents(self, state: GraphState) -> GraphState:
        """Retrieve top-5 relevant documents from vector database"""
        print(" Retrieving documents...")
        try:
            # Generate query embedding
            response = self.azure_client.embeddings.create(
                input=[state["question"]],
                model=Config.EMBEDDINGS_DEPLOYMENT_NAME
            )
            query_embedding = response.data[0].embedding
            
            # Query Pinecone
            results = self.index.query(
                vector=query_embedding,
                top_k=5,
                include_metadata=True
            )
            
            retrieved_docs = []
            for match in results.matches:
                retrieved_docs.append({
                    'id': match.id,
                    'content': match.metadata['content'],
                    'title': match.metadata['title'],
                    'score': match.score
                })
            
            print(f"Retrieved {len(retrieved_docs)} documents")
            return {**state, "retrieved_docs": retrieved_docs}
        except Exception as e:
            print(f"Error in retrieval: {e}")
            return {**state, "retrieved_docs": []}
    
    def generate_initial_answer(self, state: GraphState) -> GraphState:
        """Generate initial answer using Azure GPT-4"""
        print(" Generating initial answer...")
        try:
            # Prepare context from retrieved documents
            context = "\n\n".join([
                f"[{doc['id']}] {doc['content']}" for doc in state["retrieved_docs"]
            ])
            
            system_prompt = """You are a helpful assistant that answers questions based on the provided knowledge base snippets. 
            Always cite the relevant snippets using [KBxxx] format. If information comes from multiple snippets, cite all relevant ones.
            Be concise and accurate in your responses."""
            
            user_prompt = f"""Question: {state['question']}

            Knowledge Base Context:
            {context}

            Please provide a comprehensive answer citing relevant snippets:"""
            
            response = self.azure_client.chat.completions.create(
                model=Config.GPT4_DEPLOYMENT_NAME,
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ],
                temperature=0,
                max_tokens=1000
            )
            
            initial_answer = response.choices[0].message.content
            print("Initial answer generated successfully")
            return {**state, "initial_answer": initial_answer}
        except Exception as e:
            print(f"Error generating answer: {e}")
            return {**state, "initial_answer": "Unable to generate answer at this time."}
    
    def self_critique(self, state: GraphState) -> GraphState:
        """Use Gemini to critique the initial answer"""
        print(" Performing self-critique...")
        
        if not self.gemini_available:
            print("Gemini not available, defaulting to COMPLETE")
            return {**state, "critique_result": "COMPLETE"}
        
        try:
            critique_prompt = f"""
            Analyze the following question and answer pair. Determine if the answer is COMPLETE or needs REFINEMENT.
            
            Question: {state['question']}
            Answer: {state['initial_answer']}
            
            Consider:
            1. Does the answer fully address the question?
            2. Are there any gaps in the information provided?
            3. Is the answer well-supported with citations?
            
            Respond with only one word: either "COMPLETE" or "REFINE".
            """
            
            response = self.gemini_model.generate_content(critique_prompt)
            critique_result = response.text.strip().upper()
            
            # Validate response
            if critique_result not in ["COMPLETE", "REFINE"]:
                critique_result = "COMPLETE"  # Default to complete
                
            print(f"Critique result: {critique_result}")
            return {**state, "critique_result": critique_result}
            
        except Exception as e:
            print(f"Gemini critique failed: {e}")
            return {**state, "critique_result": "COMPLETE"}  # Default to complete on error
    
    def refine_answer(self, state: GraphState) -> GraphState:
        """Retrieve one more document and refine the answer"""
        print(" Refining answer...")
        try:
            # Retrieve one additional document
            response = self.azure_client.embeddings.create(
                input=[state["question"]],
                model=Config.EMBEDDINGS_DEPLOYMENT_NAME
            )
            query_embedding = response.data[0].embedding
            
            # Get one more document (skip the first 5 we already have)
            results = self.index.query(
                vector=query_embedding,
                top_k=6,  # Get 6 to ensure we get one new one
                include_metadata=True
            )
            
            # Find a new document that wasn't in the initial retrieval
            existing_ids = {doc['id'] for doc in state["retrieved_docs"]}
            additional_doc = None
            
            for match in results.matches:
                if match.id not in existing_ids:
                    additional_doc = {
                        'id': match.id,
                        'content': match.metadata['content'],
                        'title': match.metadata['title'],
                        'score': match.score
                    }
                    break
            
            if additional_doc:
                print(f"Found additional document: {additional_doc['id']}")
                # Add to retrieved docs
                updated_docs = state["retrieved_docs"] + [additional_doc]
                
                # Regenerate answer with additional context
                context = "\n\n".join([
                    f"[{doc['id']}] {doc['content']}" for doc in updated_docs
                ])
                
                system_prompt = """You are a helpful assistant that answers questions based on the provided knowledge base snippets. 
                Always cite the relevant snippets using [KBxxx] format. If information comes from multiple snippets, cite all relevant ones.
                Be concise and accurate in your responses. This is a refined answer with additional context."""
                
                user_prompt = f"""Question: {state['question']}

                Knowledge Base Context (including additional snippet):
                {context}

                Please provide a refined comprehensive answer citing relevant snippets:"""
                
                response = self.azure_client.chat.completions.create(
                    model=Config.GPT4_DEPLOYMENT_NAME,
                    messages=[
                        {"role": "system", "content": system_prompt},
                        {"role": "user", "content": user_prompt}
                    ],
                    temperature=0,
                    max_tokens=1000
                )
                
                refined_answer = response.choices[0].message.content
                print("Refined answer generated successfully")
                return {**state, "refined_answer": refined_answer, "final_answer": refined_answer}
            else:
                print("No additional documents found, using initial answer")
                return {**state, "final_answer": state["initial_answer"]}
                
        except Exception as e:
            print(f"Error in refinement: {e}")
            return {**state, "final_answer": state["initial_answer"]}
    
    def should_continue(self, state: GraphState) -> Literal["refine", "end"]:
        """Decision logic for graph flow"""
        if state["critique_result"] == "REFINE":
            return "refine"
        return "end"
    
    def finalize_answer(self, state: GraphState) -> GraphState:
        """Set final answer based on critique result"""
        print(" Finalizing answer...")
        if state["critique_result"] == "COMPLETE":
            return {**state, "final_answer": state["initial_answer"]}
        return state
    
    def build_graph(self) -> StateGraph:
        """Build the LangGraph workflow"""
        workflow = StateGraph(GraphState)
        
        # Add nodes
        workflow.add_node("retriever", self.retrieve_documents)
        workflow.add_node("answer_generator", self.generate_initial_answer)
        workflow.add_node("critique", self.self_critique)
        workflow.add_node("refinement", self.refine_answer)
        workflow.add_node("finalizer", self.finalize_answer)
        
        # Set entry point
        workflow.set_entry_point("retriever")
        
        # Define edges
        workflow.add_edge("retriever", "answer_generator")
        workflow.add_edge("answer_generator", "critique")
        workflow.add_conditional_edges(
            "critique",
            self.should_continue,
            {
                "refine": "refinement",
                "end": "finalizer"
            }
        )
        workflow.add_edge("refinement", "finalizer")
        workflow.add_edge("finalizer", END)
        
        return workflow.compile()
    
    def query(self, question: str) -> Dict:
        """Execute the RAG pipeline for a question"""
        print(f"\n Processing question: {question}")
        print("-" * 50)
        
        # Initialize state
        initial_state = GraphState(
            question=question,
            retrieved_docs=[],
            initial_answer="",
            critique_result="COMPLETE",
            refined_answer="",
            final_answer=""
        )
        
        # Build and execute graph
        graph = self.build_graph()
        final_state = graph.invoke(initial_state)
        
        return {
            "question": question,
            "final_answer": final_state["final_answer"],
            "retrieved_docs": final_state["retrieved_docs"],
            "critique_result": final_state["critique_result"],
            "initial_answer": final_state.get("initial_answer", ""),
            "refined_answer": final_state.get("refined_answer", "")
        }

# Step 8: Initialize the RAG System
print("Initializing Agentic RAG System...")
rag_system = AgenticRAGSystem()
print("RAG System initialized successfully!")

# Step 9: Test with Sample Queries
sample_queries = [
    "What are best practices for caching?",
    "How should I set up CI/CD pipelines?",
    "What are performance tuning tips?",
    "How do I version my APIs?",
    "What should I consider for error handling?"
]

print("\n" + "="*60)
print("STARTING AGENTIC RAG DEMO")
print("="*60)

In [None]:
for i, query in enumerate(sample_queries, 1):
    print(f"\n Query {i}/5: {query}")
    result = rag_system.query(query)
    
    print(f"\n Final Answer:")
    print(result['final_answer'])
    print(f"\n Critique Result: {result['critique_result']}")
    print(f" Retrieved Documents: {len(result['retrieved_docs'])}")
    
    if result['critique_result'] == 'REFINE':
        print(" Refinement was applied")
    
    print("-" * 80)


In [None]:
# Step 10: Interactive Query Section
print("\n" + "="*60)
print("INTERACTIVE QUERY MODE")
print("="*60)
print("You can now ask your own questions! (Type 'quit' to exit)")






while True:
    user_question = input("\n Enter your question: ")
    if user_question.lower() == 'quit':
        break
    
    if user_question.strip():
        result = rag_system.query(user_question)
        print(f"\n Answer: {result['final_answer']}")
        print(f" Critique: {result['critique_result']}")
        print(f" Documents used: {len(result['retrieved_docs'])}")

print("\n Thank you for using the Agentic RAG System!")

# Step 11: MLflow Integration (Optional)
try:
    import mlflow
    
    # Set up MLflow
    mlflow.set_tracking_uri(Config.MLFLOW_TRACKING_URI)
    mlflow.set_experiment("agentic-rag-system")
    
    # Log a sample run
    with mlflow.start_run():
        mlflow.log_param("model", Config.GPT4_DEPLOYMENT_NAME)
        mlflow.log_param("embedding_model", Config.EMBEDDINGS_DEPLOYMENT_NAME)
        mlflow.log_metric("sample_queries_processed", len(sample_queries))
        
        print("\n MLflow logging completed!")
        
except Exception as e:
    print(f"MLflow setup skipped: {e}")

print("\n" + "="*60)
print("AGENTIC RAG SYSTEM DEMO COMPLETED SUCCESSFULLY!")
print("="*60)