In [2]:
# Load all necessary packages

import pandas as pd
import os
from typing import List, Dict
from pathlib import Path
from openai import AzureOpenAI
from langchain_openai import ChatOpenAI, OpenAIEmbeddings, AzureOpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader, TextLoader, PyPDFDirectoryLoader
from langchain_classic.agents import AgentExecutor, create_tool_calling_agent, create_react_agent, create_openai_tools_agent
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.tools import Tool
from dotenv import load_dotenv
import time
from tqdm import tqdm



In [3]:
load_dotenv()

True

In [4]:
file_path =r"C:\Users\yannik_sassmann\Documents\YASA\Fortbildungen\Data_Science_Bootcamp\Final_Project\Ironhack_Capstone_Project\pdfs\giz"
documents_path=r"C:\Users\yannik_sassmann\Documents\YASA\Fortbildungen\Data_Science_Bootcamp\Final_Project\Ironhack_Capstone_Project\pdfs\giz"

In [None]:
class DeepResearchAgent:
    SYSTEM_PROMPT = """You are a Senior Development Cooperation Analyst specializing in evidence-based project design and institutional learning. You work for a ministry planning new sustainable development projects globally, and your role is to extract actionable, non-obvious lessons from past project documentation.

    CORE MISSION:
    Extract specific, contextual insights from past development cooperation projects that can meaningfully inform the critical review of new project proposals. Your insights must go beyond generic development wisdom to reveal nuanced patterns, context-specific success factors, and overlooked risks.

    RESEARCH PROTOCOL:

    1. INITIAL EXPLORATION (Use deep_search):
    - Cast a wide net to understand the landscape of similar projects
    - Identify patterns across multiple projects, countries, and sectors
    - Look for both successes AND failures - failures often teach more

    2. DEEP DIVE (Use search_documents or diverse_search):
    - Investigate specific mechanisms: WHY did something work or fail?
    - Identify contextual factors: political economy, timing, stakeholder dynamics
    - Look for implementation details, not just outcomes
    - Search for quantitative data, timelines, budget allocations

    3. CRITICAL SYNTHESIS:
    - Compare contradictory findings across different projects
    - Identify conditions under which similar approaches succeeded or failed
    - Distinguish correlation from causation
    - Note what the documents DON'T say (gaps in reporting, avoided topics)

    QUALITY STANDARDS FOR INSIGHTS:

    ‚úÖ GOOD INSIGHTS (Aim for these):
    - "In 3 water projects in Sub-Saharan Africa (Tanzania 2018, Kenya 2019, Uganda 2020), community ownership was only sustained when local government budget allocations for maintenance were secured BEFORE project handover. Projects that relied on community fees alone saw 65% infrastructure failure within 2 years." [Source: X, Page Y]

    - "Agricultural training programs in South Asia showed 40% higher adoption rates when conducted by local farmer-trainers rather than external consultants, BUT only in communities with existing farmer cooperatives. In communities without cooperatives, external trainers performed better." [Source: X, Page Y]

    ‚ùå POOR INSIGHTS (Avoid these):
    - "Stakeholder engagement is important for project success"
    - "Local ownership matters"
    - "Context is crucial"
    - "Monitoring and evaluation should be robust"

    INSIGHT CHARACTERISTICS YOU MUST AIM FOR:

    1. **Specificity**: Include numbers, timeframes, locations, project names
    2. **Conditionality**: "X worked when Y was true, but failed when Z"
    3. **Mechanisms**: Explain HOW and WHY, not just WHAT
    4. **Actionability**: Clear implications for proposal reviewers
    5. **Evidence-based**: Always cite specific sources with page numbers
    6. **Non-obvious**: Would surprise an experienced development practitioner

    ANALYTICAL FRAMEWORK - Always consider:

    - **Design Phase**: What assumptions proved wrong? What baseline data was missing?
    - **Implementation**: What institutional bottlenecks emerged? Which partnerships worked?
    - **Sustainability**: What happened after project end? What cost structures proved unrealistic?
    - **Context**: Political economy factors, timing, cultural dynamics, institutional capacity
    - **Scale**: What worked at pilot but failed at scale? Or vice versa?
    - **Unintended Consequences**: Negative spillovers, market distortions, dependency creation

   SEARCH STRATEGY:

    Phase 1 - LANDSCAPE MAPPING:
    - Use deep_search to understand the topic broadly
    - Use compare_projects if looking at different regions/approaches

    Phase 2 - CRITICAL ANALYSIS:
    - Use find_failure_cases to understand what went wrong
    - Use find_longterm_outcomes to check sustainability

    Phase 3 - EVIDENCE GATHERING:
    - Use analyze_context_factors to understand WHY
    - Use find_implementation_details to understand HOW

    Phase 4 - RISK ASSESSMENT:
    - Use identify_risk_patterns to flag concerns for new proposals

    Phase 5 - TARGETED FOLLOW-UP:
    - Use search_documents for specific clarifications

    OUTPUT FORMAT:

    Structure your response as:

    **Key Findings:**
    [3-5 specific, actionable insights with full citations]

    **Critical Success Factors:**
    [What conditions needed to be in place - be specific about sequence, timing, institutional requirements]

    **Common Failure Modes:**
    [What went wrong and why - include early warning signs]

    **Red Flags for Proposal Review:**
    [Specific things to look for in new proposals based on past failures]

    **Evidence Gaps:**
    [What information you couldn't find that would be valuable]

    CRITICAL GUIDELINES:

    - Prioritize project evaluations, mid-term reviews, and lessons-learned documents
    - Value negative findings as much as positive ones
    - If you find contradictory evidence, present both sides with context
    - Distinguish between project outputs (delivered) and outcomes (sustained change)
    - Be skeptical of self-reported success without independent verification
    - Always consider the political economy: Who benefited? Who lost? What power dynamics existed?
    - Flag when sample sizes are small or evidence is thin
    - Note when "success" was measured too early (before sustainability could be assessed)

    TONE:
    Professional, analytical, constructively critical. Do not use abbreviations. Always write out abbreviations in full. You serve the ministry's learning mission, not project advocacy. Your job is to prevent repeating past mistakes and amplify proven approaches."""

    def __init__(self, persist_directory: str = "./chroma_db", documents_path: str = None):
        """
        Initialize the deep research agent.
        
        Args:
            documents_path: Path to a directory containing PDF files
            persist_directory: Where to store the vector database
        """
        
        # Using GPT-5
        self.llm = ChatOpenAI(model="gpt-5",
                              base_url="https://bootcampai.openai.azure.com/openai/v1/",
                              api_key=os.environ["OPENAI_AZURE_API_KEY"])
        
        
        # Create/load vector store
        # Check if vector store already exists
        if os.path.exists(persist_directory):
            print(f"‚úì Found existing vector store at {persist_directory}")
            # Initialize embeddings (needed for Chroma to work)
            embeddings = OpenAIEmbeddings(
                model="text-embedding-3-large",
                base_url="https://bootcampai.openai.azure.com/openai/v1/",
                api_key=os.environ["OPENAI_AZURE_API_KEY"]
            )
            # Load existing vector store directly
            self.vectorstore = Chroma(
                persist_directory=persist_directory,
                embedding_function=embeddings
            )
            print("‚úì Vector store loaded successfully!\n")
        else:
            # Vector store doesn't exist, create it
            if documents_path is None:
                raise ValueError(
                    f"Vector store not found at {persist_directory} and no documents_path provided. "
                    "Please provide documents_path to create the vector store."
                )
            print(f"Vector store not found. Creating new one from {documents_path}...")
            vector_store_creator = VectorStoreCreator(
                documents_path=documents_path,
                persist_directory=persist_directory
            )
            self.vectorstore = vector_store_creator.vectorstore
            print("‚úì Vector store created!\n")


        # Create tools
        self.tools = self._create_tools()
        
        # Create agent
        self.agent_executor = self._create_agent()
    
    def _format_results(self, docs: List, prefix: str = "RESULTS") -> str:
        """Format search results consistently."""
        if not docs:
            return "No relevant documents found."
            
        results = [f"=== {prefix} ===\n"]
        for i, doc in enumerate(docs, 1):
            source = doc.metadata.get('source', 'Unknown')
            filename = os.path.basename(source)
            page = doc.metadata.get('page', 'N/A')
            results.append(
                f"\nResult {i} (Source: {filename}, Page: {page}):\n"
                f"{doc.page_content}\n"
            )
        return "\n".join(results)

    def _create_tools(self) -> List[Tool]:
        """Create tools for the agent."""
        
        def search_documents(query: str) -> str:
            """Search the document collection for relevant information."""
            docs = self.vectorstore.similarity_search(query, k=20)
            
            results = []
            for i, doc in enumerate(docs, 1):
                source = doc.metadata.get('source', 'Unknown')
                filename = os.path.basename(source)
                page = doc.metadata.get('page', 'N/A')
                results.append(f"Result {i} (Source: {filename}, Page: {page}):\n{doc.page_content}\n")
            
            return "\n".join(results) if results else "No relevant documents found."
        
        def deep_search(query: str) -> str:
            """Perform a deep search by generating multiple query variations."""
            variations_prompt = f"""Given this research question: "{query}"

            Generate 3 different search queries that would help gather comprehensive information.
            Focus on different aspects or angles of the question.

            Format your response as a numbered list:
            1. [query 1]
            2. [query 2]
            3. [query 3]"""
                        
            response = self.llm.invoke(variations_prompt)
            queries = [line.split('. ', 1)[1] for line in response.content.split('\n') 
                      if line.strip() and line[0].isdigit()]
            
            all_queries = [query] + queries[:3]
            
            all_results = {}
            for q in all_queries:
                docs = self.vectorstore.similarity_search(q, k=10)
                for doc in docs:
                    all_results[doc.page_content] = doc
            
            results = []
            for i, doc in enumerate(list(all_results.values())[:10], 1):
                source = doc.metadata.get('source', 'Unknown')
                filename = os.path.basename(source)
                page = doc.metadata.get('page', 'N/A')
                results.append(f"Result {i} (Source: {filename}, Page: {page}):\n{doc.page_content}\n")
            
            return "\n".join(results) if results else "No relevant documents found."
        
        def compare_projects(query: str) -> str:
            """
            Compare similar projects across different contexts to identify 
            what worked where and why. Useful for finding patterns and 
            context-specific success factors.
            
            Example: "Compare water projects in East Africa vs West Africa"
            """
            # Generate comparison queries
            comparison_prompt = f"""Given this comparison request: "{query}"

            Generate 4 search queries to compare projects effectively:
            1. A query for the first group/context
            2. A query for the second group/context  
            3. A query for common challenges across both
            4. A query for divergent outcomes/approaches

            Format as:
            1. [query 1]
            2. [query 2]
            3. [query 3]
            4. [query 4]"""

            response = self.llm.invoke(comparison_prompt)
            queries = [line.split('. ', 1)[1] for line in response.content.split('\n') 
                    if line.strip() and line[0].isdigit()]
            
            # Search and organize by context
            results_by_context = {}
            for q in queries[:4]:
                docs = self.vectorstore.similarity_search(q, k=8)
                for doc in docs:
                    source = doc.metadata.get('source', 'Unknown')
                    if source not in results_by_context:
                        results_by_context[source] = []
                    results_by_context[source].append(doc)
            
            # Format comparative results
            formatted = ["=== COMPARATIVE ANALYSIS ===\n"]
            for source, docs in list(results_by_context.items())[:10]:
                formatted.append(f"\n--- {os.path.basename(source)} ---")
                for doc in docs[:2]:  # Top 2 excerpts per source
                    page = doc.metadata.get('page', 'N/A')
                    formatted.append(f"Page {page}: {doc.page_content[:500]}...\n")
            
            return "\n".join(formatted)

        def find_failure_cases(query: str) -> str:
            """
            Specifically search for project failures, challenges, problems,
            and lessons learned from negative outcomes. Critical for learning
            what NOT to do.
            
            Example: "What caused agricultural projects to fail in the Sahel?"
            """
            # Augment query with failure-related terms
            failure_keywords = [
                f"{query} failures",
                f"{query} challenges problems",
                f"{query} lessons learned difficulties",
                f"{query} sustainability issues abandonment",
                f"{query} unintended consequences negative impacts"
            ]
            
            all_results = {}
            for fail_query in failure_keywords:
                docs = self.vectorstore.similarity_search(fail_query, k=6)
                for doc in docs:
                    # Prioritize documents mentioning failure-related terms
                    content_lower = doc.page_content.lower()
                    if any(term in content_lower for term in 
                        ['fail', 'challenge', 'problem', 'difficult', 'unsustainable', 
                            'abandon', 'discontinue', 'not work', 'ineffective']):
                        all_results[doc.page_content] = doc
            
            results = []
            for i, doc in enumerate(list(all_results.values())[:12], 1):
                source = doc.metadata.get('source', 'Unknown')
                filename = os.path.basename(source)
                page = doc.metadata.get('page', 'N/A')
                results.append(f"Failure Case {i} (Source: {filename}, Page: {page}):\n{doc.page_content}\n")
            
            return "\n".join(results) if results else "No failure cases found."

        def find_longterm_outcomes(query: str) -> str:
            """
            Search for long-term sustainability outcomes, post-project results,
            and what happened after project completion. Essential for understanding
            true impact vs. short-term outputs.
            
            Example: "What happened to health clinics 3 years after project end?"
            """
            temporal_queries = [
                f"{query} post-project sustainability",
                f"{query} after completion follow-up",
                f"{query} long-term outcomes impact",
                f"{query} years later evaluation",
                f"{query} maintained discontinued abandoned"
            ]
            
            all_results = {}
            for temp_query in temporal_queries:
                docs = self.vectorstore.similarity_search(temp_query, k=6)
                for doc in docs:
                    all_results[doc.page_content] = doc
            
            return self._format_results(list(all_results.values())[:15], 
                                    prefix="LONG-TERM OUTCOMES")

        def analyze_context_factors(query: str) -> str:
            """
            Search for political economy, institutional capacity, cultural factors,
            and contextual conditions that influenced project success/failure.
            
            Example: "What political factors affected governance projects in Myanmar?"
            """
            context_queries = [
                f"{query} political economy stakeholders",
                f"{query} institutional capacity governance",
                f"{query} cultural social norms",
                f"{query} enabling environment constraints",
                f"{query} local context conditions"
            ]
            
            all_results = {}
            for ctx_query in context_queries:
                docs = self.vectorstore.similarity_search(ctx_query, k=6)
                for doc in docs:
                    all_results[doc.page_content] = doc
            
            return self._format_results(list(all_results.values())[:15], 
                                        prefix="CONTEXTUAL FACTORS")

        def identify_risk_patterns(query: str) -> str:
            """
            Find early warning signs, risk factors, and red flags from past projects.
            Use to identify what to watch out for in new proposals.
            
            Example: "What were early warning signs of project delays in infrastructure?"
            """
            risk_queries = [
                f"{query} risks challenges constraints",
                f"{query} warnings signs indicators",
                f"{query} delays bottlenecks obstacles",
                f"{query} assumptions proved wrong",
                f"{query} underestimated overlooked"
            ]
            
            all_results = {}
            for risk_query in risk_queries:
                docs = self.vectorstore.similarity_search(risk_query, k=6)
                for doc in docs:
                    all_results[doc.page_content] = doc
            
            return self._format_results(list(all_results.values())[:15], 
                                    prefix="RISK PATTERNS & RED FLAGS")

        def find_implementation_details(query: str) -> str:
            """
            Search for HOW things were implemented: partnerships, management structures,
            procurement, staffing, training approaches. The mechanics that made things work.
            
            Example: "How were community health workers recruited and trained in Ethiopia?"
            """
            implementation_queries = [
                f"{query} implementation approach methodology",
                f"{query} partnership coordination mechanisms",
                f"{query} procurement management systems",
                f"{query} training capacity building",
                f"{query} monitoring supervision quality control"
            ]
            
            all_results = {}
            for impl_query in implementation_queries:
                docs = self.vectorstore.similarity_search(impl_query, k=6)
                for doc in docs:
                    all_results[doc.page_content] = doc
            
            return self._format_results(list(all_results.values())[:15], 
                                    prefix="IMPLEMENTATION DETAILS")

        def synthesize_insights(topic: str) -> str:
            """
            Meta-tool that runs multiple searches and synthesizes findings into
            actionable insights. Use for complex analysis requiring multiple perspectives.
            
            Example: "Synthesize insights on rural electrification sustainability"
            """
            print(f"üîç Running comprehensive synthesis on: {topic}")
            
            # Run multiple specialized searches
            searches = []
            
            # 1. Get comprehensive overview
            try:
                print("  ‚Üí Running deep_search...")
                deep_results = deep_search(f"{topic} successful projects")
                searches.append(("Overview & Successes", deep_results))
            except Exception as e:
                searches.append(("Overview & Successes", f"Error: {str(e)}"))
            
            # 2. Find failures
            try:
                print("  ‚Üí Finding failure cases...")
                failure_results = find_failure_cases(topic)
                searches.append(("Failures & Challenges", failure_results))
            except Exception as e:
                searches.append(("Failures & Challenges", f"Error: {str(e)}"))
            
            # 3. Get long-term outcomes
            try:
                print("  ‚Üí Analyzing long-term outcomes...")
                longterm_results = find_longterm_outcomes(topic)
                searches.append(("Long-term Sustainability", longterm_results))
            except Exception as e:
                searches.append(("Long-term Sustainability", f"Error: {str(e)}"))
            
            # 4. Understand context
            try:
                print("  ‚Üí Analyzing context factors...")
                context_results = analyze_context_factors(topic)
                searches.append(("Contextual Factors", context_results))
            except Exception as e:
                searches.append(("Contextual Factors", f"Error: {str(e)}"))
            
            # 5. Implementation details
            try:
                print("  ‚Üí Gathering implementation details...")
                impl_results = find_implementation_details(topic)
                searches.append(("Implementation", impl_results))
            except Exception as e:
                searches.append(("Implementation", f"Error: {str(e)}"))
            
            # Combine all findings
            combined_results = "\n\n=== COMPREHENSIVE SYNTHESIS REQUEST ===\n"
            combined_results += f"Topic: {topic}\n\n"
            
            for label, results in searches:
                # Truncate each section to avoid context overflow
                truncated = results[:2000] if len(results) > 2000 else results
                combined_results += f"\n{'='*60}\n"
                combined_results += f"{label.upper()}\n"
                combined_results += f"{'='*60}\n"
                combined_results += f"{truncated}\n"
            
            # Ask LLM to synthesize
            synthesis_prompt = f"""Based on the following comprehensive research findings about "{topic}", 
                                synthesize 3-5 specific, actionable insights following our quality standards.

    {combined_results}

            Remember to provide insights with:
            - Specific numbers, locations, timeframes, and examples
            - Clear conditions (when X works, when it doesn't)  
            - Mechanisms (WHY it works, not just WHAT worked)
            - Full source citations with document names and page numbers
            - Non-obvious insights that would surprise experienced practitioners

            Structure your synthesis according to the OUTPUT FORMAT specified in your system instructions."""
            
            print("  ‚Üí Synthesizing findings with LLM...")
            synthesis = self.llm.invoke(synthesis_prompt)
            
            return synthesis.content

        return [
            # Core search
            Tool(
                name="search_documents",
                func=search_documents,
                description="Quick search for specific information or follow-up queries."
            ),
            Tool(
                name="deep_search",
                func=deep_search,
                description="Comprehensive multi-angle search. Use first for new topics."
            ),
            
            # Comparative & analytical
            Tool(
                name="compare_projects",
                func=compare_projects,
                description="Compare similar projects across contexts to identify patterns and context-specific factors."
            ),
            Tool(
                name="find_failure_cases",
                func=find_failure_cases,
                description="Find project failures, challenges, and negative outcomes. Critical for learning what to avoid."
            ),
            
            # Temporal analysis
            Tool(
                name="find_longterm_outcomes",
                func=find_longterm_outcomes,
                description="Find post-project sustainability and long-term results. Essential for true impact assessment."
            ),
            
            # Context & risks
            Tool(
                name="analyze_context_factors",
                func=analyze_context_factors,
                description="Find political economy, institutional, and cultural factors that influenced outcomes."
            ),
            Tool(
                name="identify_risk_patterns",
                func=identify_risk_patterns,
                description="Identify early warning signs and red flags from past projects."
            ),
            
            # Implementation
            Tool(
                name="find_implementation_details",
                func=find_implementation_details,
                description="Find HOW things were implemented: partnerships, training, management approaches."
            ),

            # Meta-analysis tool
            Tool(
            name="synthesize_insights",
            func=synthesize_insights,
            description="Run comprehensive multi-tool analysis and synthesize findings into actionable insights. Use when you need deep, multi-faceted analysis of a complex topic. This tool automatically searches for successes, failures, long-term outcomes, context, and implementation details."
            ),
        ]
    
    def _create_agent(self) -> AgentExecutor:
        """Create the research agent."""
        
        prompt = ChatPromptTemplate.from_messages([
    ("system", self.SYSTEM_PROMPT),
            ("human", "{input}"),
            MessagesPlaceholder(variable_name="agent_scratchpad"),
        ])
        
        agent = create_openai_tools_agent(
            llm=self.llm,
            tools=self.tools,
            prompt=prompt
        )
        
        return AgentExecutor(
            agent=agent,
            tools=self.tools,
            verbose=False, # Set to True to show reasoning
            max_iterations=5, # Beyond 5 iterations, gains were negligible despite significant increases in token consumption
            return_intermediate_steps=True # Keep True to see tool calls
        )
    
    def research(self, question: str) -> Dict:
        """Conduct research on a question."""
        result = self.agent_executor.invoke({"input": question})
        return result

In [18]:
# Example usage
if __name__ == "__main__":

    # Path to chroma db
    persist_directory = "./chroma_db"
    # documents_path = "C:\Users\yannik_sassmann\Documents\YASA\Fortbildungen\Data_Science_Bootcamp\Final_Project\Ironhack_Capstone_Project\pdfs\giz"

    # Initialize agent
    print("Initializing Deep Research Agent...")
    agent = DeepResearchAgent(persist_directory=persist_directory)
    print("Agent ready!\n")

    # Conduct research
    while True:
        question = input("\nYour question (or 'quit' to exit): ")
        if question.lower() in ['quit', 'exit', 'q']:
            break
        
        print(f"\n{'='*80}")
        print(f"RESEARCHING: {question}")
        print(f"{'='*80}\n")
        
        result = agent.research(question)
    
        print(f"\n{'='*80}")
        print(f"RESEARCH QUESTION: {question}")
        print(f"{'='*80}\n")
        
        result = agent.research(question)
        
        print(f"\n{'='*80}")
        print("FINAL ANSWER:")
        print(f"{'='*80}\n")
        print(result['output'])


Initializing Deep Research Agent...
‚úì Found existing vector store at ./chroma_db
‚úì Vector store loaded successfully!

Agent ready!


RESEARCHING: I need to approve an agriculture project in Africa. What should i know or need to be aware off before approving the project?


RESEARCH QUESTION: I need to approve an agriculture project in Africa. What should i know or need to be aware off before approving the project?


FINAL ANSWER:

Key Findings:
- Input subsidy programs rarely deliver sustained productivity gains without tight targeting, on-time delivery, and exit plans. In Malawi‚Äôs FISP and Ghana‚Äôs fertilizer subsidy, evaluations documented high leakage to non-poor farmers, private-sector crowding out, and late input delivery that neutralized yield effects; digital e-voucher pilots improved redemption and targeting in the first 1‚Äì2 years but degraded when fiscal arrears and ‚Äúghost‚Äù registrants proliferated. Actionable implication: if your proposal includes input subsidies 

In [None]:
### Integrate best practices into the deep agents behaviour and tools.
### Include sources in the output.
### Create UI/UX and deploy
### Make downloadabe report