# LangGraph Agent Model Documentation

This notebook demonstrates how to build sophisticated agents using LangGraph with:
- Multiple tools and conditional routing
- State management and memory
- Error handling and validation
- Integration with ValidMind for testing and monitoring



## Setup and Imports


In [1]:
from typing import TypedDict, List, Annotated, Sequence, Optional, Dict, Any
from langchain.tools import tool
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage
from langchain_openai import ChatOpenAI
from langgraph.graph import StateGraph, END, START
from langgraph.prebuilt import ToolNode
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph.message import add_messages
import json

# Load environment variables if using .env file
try:
    from dotenv import load_dotenv
    load_dotenv()
except ImportError:
    print("dotenv not installed. Make sure OPENAI_API_KEY is set in your environment.")


In [None]:
import validmind as vm

vm.init(
    api_host="...",
    api_key="...",
    api_secret="...",
    model="...",
)

## LLM-Powered Tool Selection Router

This section demonstrates how to create an intelligent router that uses an LLM to select the most appropriate tool based on user input and tool docstrings.

### Benefits of LLM-Based Tool Selection:
- **Intelligent Routing**: Understanding of natural language intent
- **Dynamic Selection**: Can handle complex, multi-step requests  
- **Context Awareness**: Considers conversation history and context
- **Flexible Matching**: Not limited to keyword patterns
- **Tool Documentation**: Uses actual tool docstrings for decision making


## Enhanced Tools with Rich Docstrings


In [None]:
# Advanced Calculator Tool
@tool
def advanced_calculator(expression: str) -> str:
    """
    Perform mathematical calculations and solve arithmetic expressions.
    
    This tool can handle:
    - Basic arithmetic: addition (+), subtraction (-), multiplication (*), division (/)
    - Mathematical functions: sqrt, sin, cos, tan, log, exp
    - Constants: pi, e
    - Parentheses for order of operations
    - Decimal numbers and scientific notation
    
    Args:
        expression (str): Mathematical expression to evaluate (e.g., "2 + 3 * 4", "sqrt(16)", "sin(pi/2)")
    
    Returns:
        str: Result of the calculation or error message
        
    Examples:
        - "Calculate 15 * 7 + 23"
        - "What is the square root of 144?"
        - "Solve 2^8"
        - "What's 25% of 200?"
    """
    import math
    import re
    
    try:
        # Sanitize and evaluate safely
        safe_expression = expression.replace('^', '**')  # Handle exponents
        safe_expression = re.sub(r'[^0-9+\-*/().,\s]', '', safe_expression)
        
        # Add math functions
        safe_dict = {
            "__builtins__": {},
            "sqrt": math.sqrt,
            "sin": math.sin,
            "cos": math.cos,
            "tan": math.tan,
            "log": math.log,
            "exp": math.exp,
            "pi": math.pi,
            "e": math.e,
        }
        
        result = eval(safe_expression, safe_dict)
        return f"The result is: {result}"
    except Exception as e:
        return f"Error calculating '{expression}': {str(e)}"

# Weather Service Tool
@tool
def weather_service(location: str, forecast_days: Optional[int] = 1) -> str:
    """
    Get current weather conditions and forecasts for any city worldwide.
    
    This tool provides:
    - Current temperature, humidity, and weather conditions
    - Multi-day weather forecasts (up to 7 days)
    - Weather alerts and warnings
    - Historical weather data
    - Seasonal weather patterns
    
    Args:
        location (str): City name, coordinates, or location identifier
        forecast_days (int, optional): Number of forecast days (1-7). Defaults to 1.
    
    Returns:
        str: Weather information for the specified location
        
    Examples:
        - "What's the weather in Tokyo?"
        - "Give me a 3-day forecast for London"
        - "Is it going to rain in New York tomorrow?"
        - "What's the temperature in Paris right now?"
    """
    import random
    
    conditions = ["sunny", "cloudy", "partly cloudy", "rainy", "stormy", "snowy"]
    temp = random.randint(-10, 35)
    condition = random.choice(conditions)
    
    forecast = f"Weather in {location}:\n"
    forecast += f"Current: {condition}, {temp}°C\n"
    
    if forecast_days > 1:
        forecast += f"\n{forecast_days}-day forecast:\n"
        for day in range(1, forecast_days + 1):
            day_temp = temp + random.randint(-5, 5)
            day_condition = random.choice(conditions)
            forecast += f"Day {day}: {day_condition}, {day_temp}°C\n"
    
    return forecast

# Document Search Engine Tool
@tool
def document_search_engine(query: str, document_type: Optional[str] = "all") -> str:
    """
    Search through internal documents, policies, and knowledge base.
    
    This tool can search for:
    - Company policies and procedures
    - Technical documentation and manuals
    - Compliance and regulatory documents
    - Historical records and reports
    - Product specifications and requirements
    - Legal documents and contracts
    
    Args:
        query (str): Search terms or questions about documents
        document_type (str, optional): Type of document to search ("policy", "technical", "legal", "all")
    
    Returns:
        str: Relevant document excerpts and references
        
    Examples:
        - "Find our data privacy policy"
        - "Search for loan approval procedures"
        - "What are the security guidelines for API access?"
        - "Show me compliance requirements for financial reporting"
    """
    document_db = {
        "policy": [
            "Data Privacy Policy: All personal data must be encrypted...",
            "Remote Work Policy: Employees may work remotely up to 3 days...",
            "Security Policy: All systems require multi-factor authentication..."
        ],
        "technical": [
            "API Documentation: REST endpoints available at /api/v1/...",
            "Database Schema: User table contains id, name, email...",
            "Deployment Guide: Use Docker containers with Kubernetes..."
        ],
        "legal": [
            "Terms of Service: By using this service, you agree to...",
            "Privacy Notice: We collect information to provide services...",
            "Compliance Framework: SOX requirements mandate quarterly audits..."
        ]
    }
    
    results = []
    search_types = [document_type] if document_type != "all" else document_db.keys()
    
    for doc_type in search_types:
        if doc_type in document_db:
            for doc in document_db[doc_type]:
                if any(term.lower() in doc.lower() for term in query.split()):
                    results.append(f"[{doc_type.upper()}] {doc}")
    
    if not results:
        results.append(f"No documents found matching '{query}'")
    
    return "\n\n".join(results)

# Smart Validator Tool
@tool
def smart_validator(input_data: str, validation_type: str = "auto") -> str:
    """
    Validate and verify various types of data and inputs.
    
    This tool can validate:
    - Email addresses (format, domain, deliverability)
    - Phone numbers (format, country code, carrier info)
    - URLs and web addresses
    - Credit card numbers (format, type, checksum)
    - Social security numbers and tax IDs
    - Postal codes and addresses
    - Date formats and ranges
    - File formats and data integrity
    
    Args:
        input_data (str): Data to validate
        validation_type (str): Type of validation ("email", "phone", "url", "auto")
    
    Returns:
        str: Validation results with detailed feedback
        
    Examples:
        - "Validate this email: user@example.com"
        - "Is this a valid phone number: +1-555-123-4567?"
        - "Check if this URL is valid: https://example.com"
        - "Verify this credit card format: 4111-1111-1111-1111"
    """
    import re
    
    if validation_type == "auto":
        # Auto-detect validation type
        if "@" in input_data and "." in input_data:
            validation_type = "email"
        elif any(char.isdigit() for char in input_data) and any(char in "+-() " for char in input_data):
            validation_type = "phone"
        elif input_data.startswith(("http://", "https://", "www.")):
            validation_type = "url"
        else:
            validation_type = "general"
    
    if validation_type == "email":
        pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
        is_valid = re.match(pattern, input_data) is not None
        return f"Email '{input_data}' is {'valid' if is_valid else 'invalid'}"
    
    elif validation_type == "phone":
        pattern = r'^\+?1?[-.\s]?\(?[0-9]{3}\)?[-.\s]?[0-9]{3}[-.\s]?[0-9]{4}$'
        is_valid = re.match(pattern, input_data) is not None
        return f"Phone number '{input_data}' is {'valid' if is_valid else 'invalid'}"
    
    elif validation_type == "url":
        pattern = r'^https?://(?:[-\w.])+(?:\:[0-9]+)?(?:/(?:[\w/_.])*(?:\?(?:[\w&=%.])*)?(?:\#(?:[\w.])*)?)?$'
        is_valid = re.match(pattern, input_data) is not None
        return f"URL '{input_data}' is {'valid' if is_valid else 'invalid'}"
    
    else:
        return f"Performed general validation on '{input_data}' - appears to be safe text input"

# Task Assistant Tool
@tool
def task_assistant(task_description: str, context: Optional[str] = None) -> str:
    """
    General-purpose task assistance and problem-solving tool.
    
    This tool can help with:
    - Breaking down complex tasks into steps
    - Providing guidance and recommendations
    - Answering questions and explaining concepts
    - Suggesting solutions to problems
    - Planning and organizing activities
    - Research and information gathering
    
    Args:
        task_description (str): Description of the task or question
        context (str, optional): Additional context or background information
    
    Returns:
        str: Helpful guidance, steps, or information for the task
        
    Examples:
        - "How do I prepare for a job interview?"
        - "What are the steps to deploy a web application?"
        - "Help me plan a team meeting agenda"
        - "Explain machine learning concepts for beginners"
    """
    responses = {
        "meeting": "For planning meetings: 1) Define objectives, 2) Create agenda, 3) Invite participants, 4) Prepare materials, 5) Set time limits",
        "interview": "Interview preparation: 1) Research the company, 2) Practice common questions, 3) Prepare examples, 4) Plan your outfit, 5) Arrive early",
        "deploy": "Deployment steps: 1) Test in staging, 2) Backup production, 3) Deploy code, 4) Run health checks, 5) Monitor performance",
        "learning": "Learning approach: 1) Start with basics, 2) Practice regularly, 3) Build projects, 4) Join communities, 5) Stay updated"
    }
    
    task_lower = task_description.lower()
    for key, response in responses.items():
        if key in task_lower:
            return f"Task assistance for '{task_description}':\n\n{response}"
    
    
    return f"""For the task '{task_description}', I recommend: 1) Break it into smaller steps, 2) Gather necessary resources, 3)
    Create a timeline, 4) Start with the most critical parts, 5) Review and adjust as needed.
        """

# Collect all tools for the LLM router
AVAILABLE_TOOLS = [
    advanced_calculator,
    weather_service, 
    document_search_engine,
    smart_validator,
    task_assistant
]

print("Enhanced tools with rich docstrings created!")
print(f"Available tools: {len(AVAILABLE_TOOLS)}")
for tool in AVAILABLE_TOOLS:
    print(f"   - {tool.name}: {tool.description[:50]}...")


### Tool Selection Router

In [None]:
def create_llm_tool_router(available_tools: List, llm_model: str = "gpt-4o-mini"):
    """
    Create an intelligent router that uses LLM to select appropriate tools.
    
    Args:
        available_tools: List of LangChain tools with docstrings
        llm_model: LLM model to use for routing decisions
        
    Returns:
        Function that routes user input to appropriate tools
    """
    
    # Initialize LLM for routing decisions
    routing_llm = ChatOpenAI(model=llm_model, temperature=0.1)
    
    def generate_tool_descriptions(tools: List) -> str:
        """Generate formatted tool descriptions for the LLM."""
        descriptions = []
        for tool in tools:
            tool_info = {
                "name": tool.name,
                "description": tool.description,
                "args": tool.args if hasattr(tool, 'args') else {},
                "examples": []
            }
            
                         # Extract examples from docstring if available
            if hasattr(tool, 'func') and tool.func.__doc__:
                docstring = tool.func.__doc__
                if "Examples:" in docstring:
                    examples_section = docstring.split("Examples:")[1]
                    examples = [line.strip().replace("- ", "") for line in examples_section.split("\n") 
                            if line.strip() and line.strip().startswith("-")]
                    tool_info["examples"] = examples[:3]  # Limit to 3 examples
        
            descriptions.append(tool_info)
        
        return json.dumps(descriptions, indent=2)
    
    def intelligent_router(user_input: str, conversation_history: List = None) -> Dict[str, Any]:
        """
        Use LLM to intelligently select the most appropriate tool(s).
        
        Args:
            user_input: User's request/question
            conversation_history: Previous conversation context
            
        Returns:
            Dict with routing decision and reasoning
        """
        
        # Generate tool descriptions
        tool_descriptions = generate_tool_descriptions(available_tools)
        
                 # Build context from conversation history
        context = ""
        if conversation_history and len(conversation_history) > 0:
            recent_messages = conversation_history[-4:]  # Last 4 messages for context
            context = "\n".join([f"{msg.type}: {msg.content[:100]}..." 
                                for msg in recent_messages if hasattr(msg, 'content')])
        
        # Create the routing prompt
        routing_prompt = f"""You are an intelligent tool router. Your job is to analyze user requests and select the most appropriate tool(s) to handle them.

            AVAILABLE TOOLS:
            {tool_descriptions}

            CONVERSATION CONTEXT:
            {context if context else "No previous context"}

            USER REQUEST: "{user_input}"

            Analyze the user's request and determine:
            1. Which tool(s) would best handle this request
            2. If multiple tools are needed, what's the order?
            3. What parameters should be passed to each tool?
            4. If no tools are needed, should this go to general conversation?

            Respond in this JSON format:
            {{
                "routing_decision": "tool_required" | "general_conversation" | "help_request",
                "selected_tools": [
                    {{
                        "tool_name": "tool_name",
                        "confidence": 0.95,
                        "parameters": {{"param": "value"}},
                        "reasoning": "Why this tool was selected"
                    }}
                ],
                "execution_order": ["tool1", "tool2"],
                "overall_reasoning": "Overall analysis of the request"
            }}

            IMPORTANT: Be precise with tool selection. Consider the tool descriptions and examples carefully."""

        try:
            # Get LLM routing decision
            response = routing_llm.invoke([
                SystemMessage(content="You are a precise tool routing specialist. Always respond with valid JSON."),
                HumanMessage(content=routing_prompt)
            ])
            
            print(f"Conversation history: {conversation_history}")
            print(f"Routing response: {response}")
            # Parse the response
            routing_result = json.loads(response.content)
            print(f"Routing result: {routing_result}")

            # Validate and enhance the result
            validated_result = validate_routing_decision(routing_result, available_tools)
            
            return validated_result
            
        except json.JSONDecodeError as e:
            # Fallback to simple routing if JSON parsing fails
            return {
                "routing_decision": "general_conversation",
                "selected_tools": [],
                "execution_order": [],
                "overall_reasoning": f"Failed to parse LLM response: {e}",
                "fallback": True
            }
        except Exception as e:
            # General error fallback
            return {
                "routing_decision": "general_conversation", 
                "selected_tools": [],
                "execution_order": [],
                "overall_reasoning": f"Router error: {e}",
                "error": True
            }
    
    def validate_routing_decision(decision: Dict, tools: List) -> Dict:
        """Validate and enhance the routing decision."""
        
        # Get available tool names
        tool_names = [tool.name for tool in tools]
        
        # Validate selected tools exist
        valid_tools = []
        for tool_selection in decision.get("selected_tools", []):
            tool_name = tool_selection.get("tool_name")
            if tool_name in tool_names:
                valid_tools.append(tool_selection)
            else:
                # Find closest match
                from difflib import get_close_matches
                matches = get_close_matches(tool_name, tool_names, n=1, cutoff=0.6)
                if matches:
                    tool_selection["tool_name"] = matches[0]
                    tool_selection["corrected"] = True
                    valid_tools.append(tool_selection)
        
        # Update the decision
        decision["selected_tools"] = valid_tools
        decision["execution_order"] = [tool["tool_name"] for tool in valid_tools]
        
        # Add tool count
        decision["tool_count"] = len(valid_tools)
        
        return decision
    
    return intelligent_router

# Create the intelligent router
intelligent_tool_router = create_llm_tool_router(AVAILABLE_TOOLS)

print("LLM-Powered Tool Router Created!")
print("Router Features:")
print("   - Uses LLM for intelligent tool selection")
print("   - Analyzes tool docstrings and examples")
print("   - Considers conversation context")
print("   - Provides confidence scores and reasoning")
print("   - Handles multi-tool requests")
print("   - Validates tool selections")


## Complete LangGraph Agent with Intelligent Router


In [None]:

# Enhanced Agent State
class IntelligentAgentState(TypedDict):
    messages: Annotated[Sequence[BaseMessage], add_messages]
    user_input: str
    session_id: str
    context: dict
    routing_result: dict  # Store LLM routing decision
    selected_tools: list
    tool_results: dict

def create_intelligent_langgraph_agent():
    """Create a LangGraph agent with LLM-powered tool selection."""
    
    # Initialize the main LLM for responses
    main_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.7)
    
    # Bind tools to the main LLM
    llm_with_tools = main_llm.bind_tools(AVAILABLE_TOOLS)
    
    def intelligent_router_node(state: IntelligentAgentState) -> IntelligentAgentState:
        """Router node that uses LLM to select appropriate tools."""
        
        user_input = state["user_input"]
        messages = state.get("messages", [])
        
        print(f"Router analyzing: '{user_input}'")
        
        # Use the intelligent router to analyze the request
        routing_result = intelligent_tool_router(user_input, messages)
        
        print(f"Routing decision: {routing_result['routing_decision']}")
        print(f"Selected tools: {[tool['tool_name'] for tool in routing_result.get('selected_tools', [])]}")
        
        # Store routing result in state
        return {
            **state,
            "routing_result": routing_result,
            "selected_tools": routing_result.get("selected_tools", [])
        }
    
    def llm_node(state: IntelligentAgentState) -> IntelligentAgentState:
        """Main LLM node that processes requests and decides on tool usage."""
        
        messages = state["messages"]
        routing_result = state.get("routing_result", {})
        
        # Create a system message based on routing analysis
        system_context = f"""You are a helpful AI assistant with access to specialized tools.
        ROUTING ANALYSIS:
        - Decision: {routing_result.get('routing_decision', 'unknown')}
        - Reasoning: {routing_result.get('overall_reasoning', 'No analysis available')}
        - Selected Tools: {[tool['tool_name'] for tool in routing_result.get('selected_tools', [])]}
        Based on the routing analysis, use the appropriate tools to help the user. If tools were recommended, use them. If not, respond conversationally.
        """
        
        # Add system context to messages
        enhanced_messages = [SystemMessage(content=system_context)] + list(messages)
        
        # Get LLM response
        response = llm_with_tools.invoke(enhanced_messages)
        
        return {
            **state,
            "messages": messages + [response]
        }
    
    def should_continue(state: IntelligentAgentState) -> str:
        """Decide whether to use tools or end the conversation."""
        last_message = state["messages"][-1]
        
        # Check if the LLM wants to use tools
        if hasattr(last_message, 'tool_calls') and last_message.tool_calls:
            return "tools"
        
        return END
    
    def help_node(state: IntelligentAgentState) -> IntelligentAgentState:
        """Provide help information about available capabilities."""
        
        help_message = f"""🤖 **AI Assistant Capabilities**
        
            I'm an intelligent assistant with access to specialized tools. Here's what I can help you with:

            🧮 **Advanced Calculator** - Mathematical calculations and expressions
            Examples: "Calculate the square root of 144", "What's 25% of 200?"

            🌤️ **Weather Service** - Current weather and forecasts worldwide  
            Examples: "Weather in Tokyo", "3-day forecast for London"

            🔍 **Document Search** - Find information in internal documents
            Examples: "Find privacy policy", "Search for API documentation"

            ✅ **Smart Validator** - Validate emails, phone numbers, URLs, etc.
            Examples: "Validate user@example.com", "Check this phone number"

            🎯 **Task Assistant** - General guidance and problem-solving
            Examples: "How to prepare for an interview", "Help plan a meeting"

            Just describe what you need in natural language, and I'll automatically select the right tools to help you!"""
        
        messages = state.get("messages", [])
        return {
            **state,
            "messages": messages + [AIMessage(content=help_message)]
        }
    
    # Create the state graph
    workflow = StateGraph(IntelligentAgentState)
    
    # Add nodes
    workflow.add_node("router", intelligent_router_node)
    workflow.add_node("llm", llm_node) 
    workflow.add_node("tools", ToolNode(AVAILABLE_TOOLS))
    workflow.add_node("help", help_node)
    
    # Set entry point
    workflow.add_edge(START, "router")
    
    # Conditional routing from router based on LLM analysis
    def route_after_analysis(state: IntelligentAgentState) -> str:
        """Route based on the LLM's analysis."""
        routing_result = state.get("routing_result", {})
        decision = routing_result.get("routing_decision", "general_conversation")
        
        if decision == "help_request":
            return "help"
        else:
            return "llm"  # Let LLM handle both tool usage and general conversation
    
    workflow.add_conditional_edges(
        "router",
        route_after_analysis,
        {"help": "help", "llm": "llm"}
    )
    
    # From LLM, decide whether to use tools or end
    workflow.add_conditional_edges(
        "llm",
        should_continue,
        {"tools": "tools", END: END}
    )
    
    # Tool execution flows back to LLM for final response
    workflow.add_edge("tools", "llm")
    
    # Help goes to end
    workflow.add_edge("help", END)
    
    # Set up memory
    memory = MemorySaver()
    
    # Compile the graph
    agent = workflow.compile(checkpointer=memory)
    
    return agent

# Create the intelligent agent
intelligent_agent = create_intelligent_langgraph_agent()

print("Intelligent LangGraph Agent Created!")
print("Features:")
print("   - LLM-powered tool selection")
print("   - Analyzes tool docstrings and examples")
print("   - Context-aware routing decisions")
print("   - Automatic tool parameter extraction")
print("   - Confidence scoring and reasoning")
print("   - Fallback handling for edge cases")


## ValidMind model

In [6]:
def agent_fn(input):
    """
    Invoke the financial agent with the given input.
    """
    initial_state = {
    "user_input": input["input"],
    "messages": [HumanMessage(content=input["input"])],
    "session_id": input["session_id"],
    "context": {},
    "routing_result": {},
    "selected_tools": [],
    "tool_results": {}
}

    session_config = {"configurable": {"thread_id": input["session_id"]}}

    result = intelligent_agent.invoke(initial_state, config=session_config)

    return result


vm_intelligent_model = vm.init_agent(input_id="financial_model", agent_fcn=agent_fn)
# add model to the vm agent
vm_intelligent_model.model = intelligent_agent

In [None]:
vm_intelligent_model.model

## Prepare sample  dataset

In [8]:
import pandas as pd
import uuid

test_dataset = pd.DataFrame([
    {
        "input": "Calculate the square root of 256 plus 15",
        "expected_tools": ["advanced_calculator"],
        "possible_outputs": [271],
        "session_id": str(uuid.uuid4())
    },
    {
        "input": "What's the weather like in Barcelona today?", 
        "expected_tools": ["weather_service"],
        "possible_outputs": ["sunny", "rainy", "cloudy"],
        "session_id": str(uuid.uuid4())
    },
    {
        "input": "Find our company's data privacy policy",
        "expected_tools": ["document_search_engine"],
        "possible_outputs": ["privacy_policy.pdf", "data_protection.doc", "company_privacy_guidelines.txt"],
        "session_id": str(uuid.uuid4())
    },
    {
        "input": "Validate this email address: john.doe@company.com",
        "expected_tools": ["smart_validator"],
        "possible_outputs": ["valid", "invalid"],
        "session_id": str(uuid.uuid4())
    },
    {
        "input": "How should I prepare for a technical interview?",
        "expected_tools": ["task_assistant"],
        "possible_outputs": ["algorithms", "data structures", "system design", "coding practice"],
        "session_id": str(uuid.uuid4())
    },
    {
        "input": "What's 25% of 480 and show me the weather in Tokyo",
        "expected_tools": ["advanced_calculator", "weather_service"],
        "possible_outputs": [120, "sunny", "rainy", "cloudy", "20°C", "68°F"],
        "session_id": str(uuid.uuid4())
    },
    {
        "input": "Help me understand machine learning basics",
        "expected_tools": ["task_assistant"],
        "possible_outputs": ["supervised", "unsupervised", "neural networks", "training", "testing"],
        "session_id": str(uuid.uuid4())
    },
    {
        "input": "What can you do for me?",
        "expected_tools": ["task_assistant"],
        "possible_outputs": ["calculator", "weather", "email validator", "document search", "general assistance"],
        "session_id": str(uuid.uuid4())
    },
    {
        "input": "Calculate 5+3 and check the weather in Paris",
        "expected_tools": ["advanced_calculator", "weather_service"],
        "possible_outputs": [8, "sunny", "rainy", "cloudy", "22°C", "72°F"],
        "session_id": str(uuid.uuid4())
    }
])


### Initialize ValidMind dataset


In [9]:
vm_test_dataset = vm.init_dataset(
    input_id="test_dataset",
    dataset=test_dataset,
    target_column="possible_outputs"
)

### Run agent and assign predictions

In [None]:
vm_test_dataset.assign_predictions(vm_intelligent_model)

#### Dataframe display settings

In [None]:
pd.set_option('display.max_colwidth', 40)
pd.set_option('display.width', 120)
pd.set_option('display.max_colwidth', None)
vm_test_dataset._df

#### Agent prediction column adjustment in dataset

In [None]:
output = vm_test_dataset._df['financial_model_prediction']
predictions = [row['messages'][-1].content for row in output]

vm_test_dataset._df['output'] = output
vm_test_dataset._df['financial_model_prediction'] = predictions
vm_test_dataset._df.head(2)

## Visualization

In [None]:
import langgraph

@vm.test("my_custom_tests.LangGraphVisualization")
def LangGraphVisualization(model):
    """
    Visualizes the LangGraph workflow structure using Mermaid diagrams.
    
    ### Purpose
    Creates a visual representation of the LangGraph agent's workflow using Mermaid diagrams
    to show the connections and flow between different components. This helps validate that
    the agent's architecture is properly structured.
    
    ### Test Mechanism
    1. Retrieves the graph representation from the model using get_graph()
    2. Attempts to render it as a Mermaid diagram
    3. Returns the visualization and validation results
    
    ### Signs of High Risk
    - Failure to generate graph visualization indicates potential structural issues
    - Missing or broken connections between components
    - Invalid graph structure that cannot be rendered
    """
    try:
        if not hasattr(model, 'model') or not isinstance(model.model, langgraph.graph.state.CompiledStateGraph):
            return {
                'test_results': False,
                'summary': {
                    'status': 'FAIL', 
                    'details': 'Model must have a LangGraph Graph object as model attribute'
                }
            }
        graph = model.model.get_graph(xray=False)
        mermaid_png = graph.draw_mermaid_png()
        return mermaid_png
    except Exception as e:
        return {
            'test_results': False, 
            'summary': {
                'status': 'FAIL',
                'details': f'Failed to generate graph visualization: {str(e)}'
            }
        }

vm.tests.run_test(
    "my_custom_tests.LangGraphVisualization",
    inputs = {
        "model": vm_intelligent_model
    }
).log()

## Accuracy Test

In [None]:
import pandas as pd
import validmind as vm

@vm.test("my_custom_tests.accuracy_test")
def accuracy_test(model, dataset, list_of_columns):
    """
    Run tests on a dataset of questions and expected responses.
    Optimized version using vectorized operations and list comprehension.
    """
    df = dataset._df
    
    # Pre-compute responses for all tests
    y_true = dataset.y.tolist()
    y_pred = dataset.y_pred(model).tolist()

    # Vectorized test results
    test_results = []
    for response, keywords in zip(y_pred, y_true):
        test_results.append(any(str(keyword).lower() in str(response).lower() for keyword in keywords))
        
    results = pd.DataFrame()
    column_names = [col + "_details" for col in list_of_columns]
    results[column_names] = df[list_of_columns]
    results["actual"] = y_pred
    results["expected"] = y_true
    results["passed"] = test_results
    results["error"] = None if test_results else f'Response did not contain any expected keywords: {y_true}'
    
    return results
   
result = vm.tests.run_test(
    "my_custom_tests.accuracy_test",
    inputs={
        "dataset": vm_test_dataset,
        "model": vm_intelligent_model
    },
    params={
        "list_of_columns": ["input"]
    }
)
result.log()

## Tool Call Accuracy Test

In [None]:
import validmind as vm

# Test with a real LangGraph result instead of creating mock objects
@vm.test("my_custom_tests.tool_call_accuracy")
def tool_call_accuracy(dataset, agent_output_column, expected_tools_column):
    """Test validation using actual LangGraph agent results."""
    # Let's create a simpler validation without the complex RAGAS setup
    def validate_tool_calls_simple(messages, expected_tools):
        """Simple validation of tool calls without RAGAS dependency issues."""
        
        tool_calls_found = []
        
        for message in messages:
            if hasattr(message, 'tool_calls') and message.tool_calls:
                for tool_call in message.tool_calls:
                    # Handle both dictionary and object formats
                    if isinstance(tool_call, dict):
                        tool_calls_found.append(tool_call['name'])
                    else:
                        # ToolCall object - use attribute access
                        tool_calls_found.append(tool_call.name)
        
        # Check if expected tools were called
        accuracy = 0.0
        matches = 0
        if expected_tools:
            matches = sum(1 for tool in expected_tools if tool in tool_calls_found)
            accuracy = matches / len(expected_tools)
        
        return {
            'accuracy': accuracy,
            'expected_tools': expected_tools,
            'found_tools': tool_calls_found,
            'matches': matches,
            'total_expected': len(expected_tools) if expected_tools else 0
        }

    df = dataset._df
    
    results = []
    for i, row in df.iterrows():
        result = validate_tool_calls_simple(row[agent_output_column]['messages'], row[expected_tools_column])
        results.append(result)
         
    return results

vm.tests.run_test(
    "my_custom_tests.tool_call_accuracy",
    inputs = {
        "dataset": vm_test_dataset,
    },
    params = {
        "agent_output_column": "output",
        "expected_tools_column": "expected_tools"
    }
)

## RAGAS Tests


### Dataset preparation - Extract Context from agent's stats 

In [23]:
from utils import capture_tool_output_messages, extract_tool_results_only, get_final_agent_response, format_tool_outputs_for_display

tool_messages = []
for i, row in vm_test_dataset._df.iterrows():
    tool_message = ""
    # Print messages in a readable format
    result = row['output']
    # Capture all tool outputs and metadata
    captured_data = capture_tool_output_messages(result)

    # Get just the tool results in a simple format
    tool_results = extract_tool_results_only(result)

    # Get the final agent response
    final_response = get_final_agent_response(result)

    # Print formatted summary
    # print(format_tool_outputs_for_display(captured_data))

    # Access specific tool outputs
    for output in captured_data["tool_outputs"]:
        # print(f"Tool: {output['tool_name']}")
        # print(f"Output: {output['content']}")
        tool_message += output['content']
        # print("-" * 30)
    tool_messages.append([tool_message])

vm_test_dataset._df['tool_messages'] = tool_messages

In [None]:
vm_test_dataset._df.head(2)

### Faithfulness

In [None]:
vm.tests.run_test(
    "validmind.model_validation.ragas.Faithfulness",
    inputs={"dataset": vm_test_dataset},
    param_grid={
        "user_input_column": ["input"],
        "response_column": ["financial_model_prediction"],
        "retrieved_contexts_column": ["tool_messages"],
    },
).log()

### Response Relevancy

In [None]:
vm.tests.run_test(
    "validmind.model_validation.ragas.ResponseRelevancy",
    inputs={"dataset": vm_test_dataset},
    params={
        "user_input_column": "input",
        "response_column": "financial_model_prediction",
        "retrieved_contexts_column": "tool_messages",
    }
).log()

### Context Recall

In [None]:
vm.tests.run_test(
    "validmind.model_validation.ragas.ContextRecall",
    inputs={"dataset": vm_test_dataset},
    param_grid={
        "user_input_column": ["input"],
        "retrieved_contexts_column": ["tool_messages"],
        "reference_column": ["financial_model_prediction"],
    },
).log()

### AspectCritic

In [None]:
vm.tests.run_test(
    "validmind.model_validation.ragas.AspectCritic",
    inputs={"dataset": vm_test_dataset},
    param_grid={
        "user_input_column": ["input"],
        "response_column": ["financial_model_prediction"],
        "retrieved_contexts_column": ["tool_messages"],
    },
).log()