# Day 7 - Lab 2: Agent Interoperability with A2A Protocol (SOLUTION)

This solution notebook contains complete implementations for building a multi-agent onboarding system.

## Key Concepts Demonstrated:
1. **Agent Discovery**: Automatic capability registration
2. **Message Bus**: Asynchronous agent communication
3. **Orchestration**: Coordinator-driven workflows

In [None]:
# Complete environment setup
import sys
import os
import json
import uuid
import time
import asyncio
from datetime import datetime, timedelta
from dataclasses import dataclass, field, asdict
from typing import List, Dict, Optional, Any, Callable
from enum import Enum
from collections import defaultdict
import queue
import threading

project_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

from utils import save_artifact

print('✅ Environment ready - Pure Python A2A implementation')

In [None]:
# Load all onboarding data
assets_path = 'assets'

with open(f'{assets_path}/onboarding_docs.json', 'r') as f:
    onboarding_docs = json.load(f)
with open(f'{assets_path}/roles_access_matrix.json', 'r') as f:
    roles_access = json.load(f)
with open(f'{assets_path}/training_catalog.json', 'r') as f:
    training_catalog = json.load(f)
with open(f'{assets_path}/new_hires_sample.json', 'r') as f:
    new_hires = json.load(f)

print(f'Loaded data for {len(new_hires["new_hires"])} new hires')

In [None]:
# Complete A2A Protocol Implementation
class MessageType(Enum):
    ANNOUNCE = 'announce'
    REQUEST = 'request'
    RESPONSE = 'response'
    ERROR = 'error'
    EVENT = 'event'
    DISCOVERY = 'discovery'

@dataclass
class A2AMessage:
    protocol: str = 'a2a.v1'
    msg_id: str = field(default_factory=lambda: str(uuid.uuid4()))
    corr_id: Optional[str] = None
    type: MessageType = MessageType.REQUEST
    sender: str = ''
    recipient: str = ''
    capabilities: List[str] = field(default_factory=list)
    payload: Dict[str, Any] = field(default_factory=dict)
    ttl_ms: int = 10000
    timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
    
    def to_dict(self) -> Dict:
        d = asdict(self)
        d['type'] = self.type.value
        return d
    
    @classmethod
    def from_dict(cls, data: Dict) -> 'A2AMessage':
        data['type'] = MessageType(data.get('type', 'request'))
        return cls(**data)

print('✅ A2A Protocol defined')

## SOLUTION: Challenge 1 - Message Bus and Service Registry

In [None]:
class ServiceRegistry:
    """Complete registry implementation."""
    def __init__(self):
        self.capabilities = defaultdict(set)  # capability -> set of agent_ids
        self.agents = {}  # agent_id -> agent_info
        self.lock = threading.Lock()
    
    def register_agent(self, agent_id: str, capabilities: List[str], info: Dict = None):
        """Register an agent with its capabilities."""
        with self.lock:
            self.agents[agent_id] = info or {}
            for capability in capabilities:
                self.capabilities[capability].add(agent_id)
    
    def unregister_agent(self, agent_id: str):
        """Remove an agent from registry."""
        with self.lock:
            if agent_id in self.agents:
                del self.agents[agent_id]
            for capability_set in self.capabilities.values():
                capability_set.discard(agent_id)
    
    def find_providers(self, capability: str) -> List[str]:
        """Find agents that provide a capability."""
        with self.lock:
            return list(self.capabilities.get(capability, set()))

class A2ABus:
    """Complete message bus implementation."""
    def __init__(self):
        self.registry = ServiceRegistry()
        self.handlers = {}  # agent_id -> handler function
        self.message_log = []  # For debugging
        self.running = True
        self.executor = threading.Thread(target=self._process_messages)
        self.message_queue = queue.Queue()
        self.executor.start()
    
    def publish(self, message: A2AMessage):
        """Publish a message to the bus."""
        self.message_log.append(message.to_dict())
        self.message_queue.put(message)
    
    def subscribe(self, agent_id: str, handler: Callable):
        """Subscribe an agent to receive messages."""
        self.handlers[agent_id] = handler
    
    def _process_messages(self):
        """Process messages in background thread."""
        while self.running:
            try:
                message = self.message_queue.get(timeout=0.1)
                
                # Handle broadcast
                if message.recipient == '*':
                    for agent_id, handler in self.handlers.items():
                        if agent_id != message.sender:
                            try:
                                handler(message)
                            except Exception as e:
                                print(f'Error handling message in {agent_id}: {e}')
                # Handle direct message
                elif message.recipient in self.handlers:
                    try:
                        self.handlers[message.recipient](message)
                    except Exception as e:
                        print(f'Error handling message in {message.recipient}: {e}')
            except queue.Empty:
                continue
    
    def shutdown(self):
        """Shutdown the bus."""
        self.running = False
        self.executor.join()

# Test implementation
bus = A2ABus()
bus.registry.register_agent('test-agent', ['test.capability'])
providers = bus.registry.find_providers('test.capability')
assert 'test-agent' in providers
print('✅ Bus and Registry fully implemented')

## SOLUTION: Challenge 2 - Specialized Agents

In [None]:
class BaseAgent:
    """Complete base agent implementation."""
    def __init__(self, agent_id: str, capabilities: List[str], bus: A2ABus):
        self.agent_id = agent_id
        self.capabilities = capabilities
        self.bus = bus
        self.running = False
        self.message_handlers = {
            MessageType.ANNOUNCE: self._handle_announce,
            MessageType.REQUEST: self._handle_request,
            MessageType.RESPONSE: self._handle_response,
            MessageType.ERROR: self._handle_error,
            MessageType.DISCOVERY: self._handle_discovery
        }
    
    def start(self):
        """Start the agent and announce capabilities."""
        self.running = True
        self.bus.subscribe(self.agent_id, self.handle_message)
        self.bus.registry.register_agent(self.agent_id, self.capabilities)
        
        # Announce capabilities
        announce = A2AMessage(
            type=MessageType.ANNOUNCE,
            sender=self.agent_id,
            recipient='*',
            capabilities=self.capabilities,
            payload={'status': 'online'}
        )
        self.bus.publish(announce)
        print(f'Agent {self.agent_id} started with capabilities: {self.capabilities}')
    
    def stop(self):
        """Stop the agent."""
        self.running = False
        self.bus.registry.unregister_agent(self.agent_id)
    
    def handle_message(self, message: A2AMessage):
        """Route messages to appropriate handlers."""
        handler = self.message_handlers.get(message.type, self._handle_unknown)
        handler(message)
    
    def _handle_announce(self, message: A2AMessage):
        pass  # Override in subclass if needed
    
    def _handle_request(self, message: A2AMessage):
        pass  # Override in subclass
    
    def _handle_response(self, message: A2AMessage):
        pass  # Override in subclass
    
    def _handle_error(self, message: A2AMessage):
        print(f'Error from {message.sender}: {message.payload}')
    
    def _handle_discovery(self, message: A2AMessage):
        """Respond to capability discovery requests."""
        requested_cap = message.payload.get('capability')
        if requested_cap in self.capabilities:
            response = A2AMessage(
                type=MessageType.RESPONSE,
                sender=self.agent_id,
                recipient=message.sender,
                corr_id=message.msg_id,
                payload={'provides': requested_cap}
            )
            self.bus.publish(response)
    
    def _handle_unknown(self, message: A2AMessage):
        print(f'{self.agent_id} received unknown message type: {message.type}')

In [None]:
class DocumentationAgent(BaseAgent):
    """Agent that provides documentation and policy information."""
    def __init__(self, bus: A2ABus):
        super().__init__('agent://documentation', ['onboarding.docs', 'policy.lookup'], bus)
        self.docs = onboarding_docs
    
    def _handle_request(self, message: A2AMessage):
        """Handle documentation requests."""
        request_type = message.payload.get('type')
        
        if request_type == 'get_policies':
            role = message.payload.get('role')
            policies = []
            for policy in self.docs['policies']:
                if 'all' in policy.get('required_for_roles', []) or role in policy.get('required_for_roles', []):
                    policies.append(policy)
            
            response = A2AMessage(
                type=MessageType.RESPONSE,
                sender=self.agent_id,
                recipient=message.sender,
                corr_id=message.msg_id,
                payload={'policies': policies}
            )
            self.bus.publish(response)

class AccessSetupAgent(BaseAgent):
    """Agent that manages access requests."""
    def __init__(self, bus: A2ABus):
        super().__init__('agent://access', ['onboarding.access', 'access.setup'], bus)
        self.roles_data = roles_access
    
    def _handle_request(self, message: A2AMessage):
        """Handle access setup requests."""
        request_type = message.payload.get('type')
        
        if request_type == 'setup_access':
            hire_id = message.payload.get('hire_id')
            role = message.payload.get('role')
            
            role_data = self.roles_data['roles'].get(role, {})
            required_systems = role_data.get('required_systems', [])
            
            access_requests = []
            for system in required_systems:
                access_requests.append({
                    'system': system,
                    'hire_id': hire_id,
                    'role': role,
                    'status': 'pending'
                })
            
            response = A2AMessage(
                type=MessageType.RESPONSE,
                sender=self.agent_id,
                recipient=message.sender,
                corr_id=message.msg_id,
                payload={'access_requests': access_requests}
            )
            self.bus.publish(response)

class TrainingScheduleAgent(BaseAgent):
    """Agent that schedules training."""
    def __init__(self, bus: A2ABus):
        super().__init__('agent://training', ['onboarding.training', 'training.schedule'], bus)
        self.training = training_catalog
    
    def _handle_request(self, message: A2AMessage):
        """Handle training schedule requests."""
        request_type = message.payload.get('type')
        
        if request_type == 'schedule_training':
            role = message.payload.get('role')
            start_date = message.payload.get('start_date')
            
            courses = []
            for course in self.training['courses']:
                if 'all' in course.get('required_for_roles', []) or role in course.get('required_for_roles', []):
                    courses.append({
                        'course_id': course['id'],
                        'title': course['title'],
                        'duration': course['duration'],
                        'due_within_days': course.get('due_within_days', 30)
                    })
            
            response = A2AMessage(
                type=MessageType.RESPONSE,
                sender=self.agent_id,
                recipient=message.sender,
                corr_id=message.msg_id,
                payload={'training_courses': courses}
            )
            self.bus.publish(response)

print('✅ All specialized agents implemented')

## SOLUTION: Challenge 3 - Orchestration

In [None]:
class CoordinatorAgent(BaseAgent):
    """Complete orchestration implementation."""
    def __init__(self, bus: A2ABus):
        super().__init__('agent://coordinator', ['onboarding.orchestration'], bus)
        self.pending_requests = {}  # msg_id -> request_info
        self.onboarding_plans = {}  # hire_id -> plan
        self.response_timeout = 5  # seconds
    
    async def create_onboarding_plan(self, hire_data: Dict) -> Dict:
        """Orchestrate complete onboarding plan."""
        hire_id = hire_data['id']
        role = hire_data['role']
        
        plan = {
            'hire_id': hire_id,
            'hire_name': hire_data['name'],
            'role': role,
            'documentation': [],
            'access_requests': [],
            'training_schedule': [],
            'created_at': datetime.now().isoformat()
        }
        
        # Store plan
        self.onboarding_plans[hire_id] = plan
        
        # Request documentation policies
        doc_request = A2AMessage(
            type=MessageType.REQUEST,
            sender=self.agent_id,
            recipient='agent://documentation',
            payload={'type': 'get_policies', 'role': role}
        )
        self.pending_requests[doc_request.msg_id] = ('documentation', hire_id)
        self.bus.publish(doc_request)
        
        # Request access setup
        access_request = A2AMessage(
            type=MessageType.REQUEST,
            sender=self.agent_id,
            recipient='agent://access',
            payload={'type': 'setup_access', 'hire_id': hire_id, 'role': role}
        )
        self.pending_requests[access_request.msg_id] = ('access', hire_id)
        self.bus.publish(access_request)
        
        # Request training schedule
        training_request = A2AMessage(
            type=MessageType.REQUEST,
            sender=self.agent_id,
            recipient='agent://training',
            payload={
                'type': 'schedule_training',
                'role': role,
                'start_date': hire_data.get('start_date')
            }
        )
        self.pending_requests[training_request.msg_id] = ('training', hire_id)
        self.bus.publish(training_request)
        
        # Wait for responses (simplified - in production use proper async)
        await asyncio.sleep(1)
        
        return self.onboarding_plans[hire_id]
    
    def _handle_response(self, message: A2AMessage):
        """Process responses from other agents."""
        if message.corr_id in self.pending_requests:
            response_type, hire_id = self.pending_requests[message.corr_id]
            plan = self.onboarding_plans.get(hire_id)
            
            if plan:
                if response_type == 'documentation':
                    plan['documentation'] = message.payload.get('policies', [])
                elif response_type == 'access':
                    plan['access_requests'] = message.payload.get('access_requests', [])
                elif response_type == 'training':
                    plan['training_schedule'] = message.payload.get('training_courses', [])
            
            # Clean up
            del self.pending_requests[message.corr_id]

print('✅ Coordinator orchestration implemented')

## Integration Test: Complete Multi-Agent Onboarding

In [None]:
async def test_multi_agent_onboarding():
    """End-to-end test of the multi-agent system."""
    # Create bus
    bus = A2ABus()
    
    # Start all agents
    agents = [
        DocumentationAgent(bus),
        AccessSetupAgent(bus),
        TrainingScheduleAgent(bus),
        CoordinatorAgent(bus)
    ]
    
    for agent in agents:
        agent.start()
    
    # Wait for agents to announce
    await asyncio.sleep(0.5)
    
    # Create onboarding plans for all hires
    coordinator = agents[-1]  # Last agent is coordinator
    plans = []
    
    for hire in new_hires['new_hires']:
        print(f"\nCreating onboarding plan for {hire['name']} ({hire['role']})...")
        plan = await coordinator.create_onboarding_plan(hire)
        plans.append(plan)
        
        # Verify plan completeness
        assert plan['hire_id'] == hire['id']
        assert len(plan['documentation']) > 0, 'Should have policies'
        assert len(plan['access_requests']) > 0, 'Should have access requests'
        assert len(plan['training_schedule']) > 0, 'Should have training'
        
        print(f"  ✓ {len(plan['documentation'])} policies")
        print(f"  ✓ {len(plan['access_requests'])} access requests")
        print(f"  ✓ {len(plan['training_schedule'])} training courses")
    
    # Cleanup
    for agent in agents:
        agent.stop()
    bus.shutdown()
    
    print(f"\n✅ Successfully created {len(plans)} onboarding plans")
    return plans

# Run the test
import asyncio
plans = await test_multi_agent_onboarding()

# Save results
save_artifact(json.dumps(plans, indent=2), 'onboarding_plans.json')
print("\n📄 Plans saved to artifacts/onboarding_plans.json")

## Advanced Features: Resilience and Performance

In [None]:
class ResilientCoordinatorAgent(CoordinatorAgent):
    """Enhanced coordinator with retry and fallback."""
    
    async def create_onboarding_plan_with_retry(self, hire_data: Dict, max_retries: int = 3) -> Dict:
        """Create plan with retry logic."""
        for attempt in range(max_retries):
            try:
                plan = await self.create_onboarding_plan(hire_data)
                
                # Verify completeness
                if all([
                    plan.get('documentation'),
                    plan.get('access_requests'),
                    plan.get('training_schedule')
                ]):
                    return plan
                
                print(f"Attempt {attempt + 1}: Plan incomplete, retrying...")
                await asyncio.sleep(1)
                
            except Exception as e:
                print(f"Attempt {attempt + 1} failed: {e}")
                if attempt == max_retries - 1:
                    raise
        
        return plan
    
    async def process_batch(self, hires: List[Dict]) -> List[Dict]:
        """Process multiple hires concurrently."""
        tasks = []
        for hire in hires:
            task = self.create_onboarding_plan_with_retry(hire)
            tasks.append(task)
        
        plans = await asyncio.gather(*tasks, return_exceptions=True)
        
        # Filter out exceptions
        successful_plans = [p for p in plans if not isinstance(p, Exception)]
        failed = len(plans) - len(successful_plans)
        
        if failed:
            print(f"⚠️ {failed} plans failed")
        
        return successful_plans

print('✅ Resilient coordinator implemented with retry and batch processing')

## Key Insights

### A2A Protocol Benefits:

1. **Decoupling**: Agents don't need to know about each other's internals
2. **Discovery**: Dynamic capability discovery enables flexibility
3. **Scalability**: New agents can be added without modifying existing ones
4. **Resilience**: Message-based communication with retry capabilities

### Production Considerations:

- **Message Persistence**: Use a message queue (RabbitMQ, Kafka) for durability
- **Service Discovery**: Use a service mesh (Consul, etcd) for production
- **Monitoring**: Track message flow, latency, and error rates
- **Security**: Add authentication and encryption for agent communication
- **Load Balancing**: Multiple instances of each agent type

### Combined with MCP:

- **MCP**: Ensures each agent behaves deterministically
- **A2A**: Enables agents to work together
- **Together**: Create robust, scalable AI systems