# Agentic Migration Planning System

This notebook implements a comprehensive multi-agent system for Databricks migration planning using DSPy, Vector Search, and MLflow deployment.

## System Architecture:
- **Goal Clarification Agent**: Helps users articulate migration goals
- **Question Generation Agent**: Dynamically generates contextual questions
- **Information Completeness Scorer**: Evaluates information quality
- **Plan Generation Agent**: Creates structured migration plans
- **Judge Agent**: Evaluates plan quality using Databricks Judge LLMs
- **Excel Export Agent**: Generates structured Excel outputs
- **Vector Search**: RAG-based document retrieval
- **Lakehouse Storage**: Conversation history persistence

## Features:
- Multi-agent orchestration with DSPy
- Dynamic question prioritization
- Information completeness scoring
- Structured plan generation with tables
- Plan quality evaluation
- Excel export with MCP integration
- Conversation history in Delta Lake
- Vector search for document retrieval


## 1. Setup and Configuration


In [None]:
# Install required packages
%pip install dspy-ai databricks-vectorsearch openpyxl markitdown

# Restart Python to ensure packages are loaded
dbutils.library.restartPython()


In [None]:
# Import required libraries
import dspy
import mlflow
import json
import logging
from typing import Dict, Any, List, Optional
from datetime import datetime

# Databricks specific imports
from databricks.vector_search import VectorSearchClient
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, lit, current_timestamp

# System imports
import sys
import os
sys.path.append('/Workspace/Repos/varun.bhandary@databricks.com/use-case-delivery-agent')

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("migration_planning_system")


In [None]:
# Configuration
CONFIG = {
    # Databricks configuration
    "catalog_name": "vbdemos",
    "schema_name": "usecase_agent",
    "vector_search_endpoint": "use-case-planning-agent",
    "vector_search_index": "migration_planning_documents",
    
    # Model configuration
    "llm_model": "databricks/databricks-claude-sonnet-4",
    "judge_model": "databricks-dbrx-instruct",
    
    # Data processing
    "pptx_volume_path": "/Volumes/vbdemos/dbdemos_autoloader/raw_data/usecase-planning-agent-pdf/",
    "processed_table": "vbdemos.usecase_agent.migration_documents",
    
    # Storage
    "conversations_table": "vbdemos.usecase_agent.conversation_sessions",
    "messages_table": "vbdemos.usecase_agent.conversation_messages",
    "interactions_table": "vbdemos.usecase_agent.agent_interactions",
    "evaluations_table": "vbdemos.usecase_agent.plan_evaluations"
}

print("Configuration loaded successfully")


## 2. Initialize DSPy and Vector Search


In [None]:
# Configure DSPy with Databricks LLM
api_key = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()
workspace_url = dbutils.notebook.entry_point.getDbutils().notebook().getContext().browserHostName().get()

lm = dspy.LM(
    model=CONFIG["llm_model"],
    api_key=api_key,
    api_base=f"https://{workspace_url}/serving-endpoints",
    model_type="chat"
)

dspy.configure(lm=lm)
print("DSPy configured successfully with Databricks LLM")


In [None]:
# Initialize Vector Search Client
vsc = VectorSearchClient(disable_notice=True)

# Test vector search connection
try:
    indexes = vsc.list_indexes(endpoint_name=CONFIG["vector_search_endpoint"])
    print(f"Vector Search connected. Found {len(indexes)} indexes.")
except Exception as e:
    print(f"Vector Search connection failed: {str(e)}")
    print("Please ensure the vector search endpoint is created and accessible.")


## 3. Initialize Multi-Agent System


In [None]:
# Import agent components
from agents import (
    GoalClarificationAgent,
    QuestionGenerationAgent,
    InformationCompletenessScorer,
    PlanGenerationAgent,
    PlanQualityJudge,
    ExcelExportAgent
)
from orchestration import WorkflowManager, ConversationManager
from data_processing import LakehouseStorage

# Initialize Spark session
spark = SparkSession.builder.appName("MigrationPlanningSystem").getOrCreate()

# Initialize lakehouse storage
lakehouse_storage = LakehouseStorage(
    spark_session=spark,
    catalog=CONFIG["catalog_name"],
    schema=CONFIG["schema_name"]
)

# Initialize conversation manager
conversation_manager = ConversationManager(storage_backend=lakehouse_storage)

# Initialize workflow manager
workflow_manager = WorkflowManager(
    conversation_manager=conversation_manager,
    vector_search_endpoint=CONFIG["vector_search_endpoint"],
    vector_search_index=CONFIG["vector_search_index"]
)

print("Multi-agent system initialized successfully")


## 4. Test the System


In [None]:
# Test the system with a sample migration planning session
print("Testing the migration planning system...")

# Start a planning session
session = workflow_manager.start_planning_session(
    user_id="test_user",
    project_context="We need to migrate our Oracle data warehouse to Databricks on Azure. We have 50+ data pipelines, 2TB of data, and need to complete within 6 months.",
    initial_goals=["Migrate Oracle to Databricks", "Improve performance", "Reduce costs"]
)

print(f"Session started: {session.session_id}")
print(f"Current phase: {session.current_phase}")
print(f"Goals: {session.goals}")


In [None]:
# Test goal clarification
print("\n=== Testing Goal Clarification ===")
response = workflow_manager.process_user_input(
    session_id=session.session_id,
    user_input="We want to migrate our Oracle data warehouse to Databricks for better performance and cost savings.",
    input_type="goal_statement"
)

print(f"Response: {response.get('response', 'No response')}")
print(f"Next action: {response.get('next_action', 'Unknown')}")
print(f"Phase: {response.get('phase', 'Unknown')}")


In [None]:
# Test information gathering
print("\n=== Testing Information Gathering ===")
response = workflow_manager.process_user_input(
    session_id=session.session_id,
    user_input="Resource: We have 5 team members with mixed skills. Timeline: We need to complete in 6 months. Scope: 50+ pipelines, 2TB of data.",
    input_type="information_answer"
)

print(f"Response: {response.get('response', 'No response')}")
print(f"Completeness Score: {response.get('completeness_score', 0):.1%}")
print(f"Quality Score: {response.get('quality_score', 0):.1%}")
print(f"Ready for Planning: {response.get('ready_for_planning', False)}")
print(f"Questions: {response.get('questions', [])}")


In [None]:
# Test plan generation
print("\n=== Testing Plan Generation ===")
response = workflow_manager.process_user_input(
    session_id=session.session_id,
    user_input="Generate the migration plan",
    input_type="plan_request"
)

print(f"Response: {response.get('response', 'No response')}")
print(f"Plan Overview: {response.get('plan_overview', 'No plan')[:200]}...")
print(f"Next Action: {response.get('next_action', 'Unknown')}")


In [None]:
# Test plan evaluation
print("\n=== Testing Plan Evaluation ===")
response = workflow_manager.process_user_input(
    session_id=session.session_id,
    user_input="Evaluate the generated plan",
    input_type="evaluation_request"
)

print(f"Response: {response.get('response', 'No response')}")
print(f"Quality Score: {response.get('quality_score', 0):.1f}/10")
print(f"Grade: {response.get('grade', 'N/A')}")
print(f"Feedback: {response.get('feedback', 'No feedback')[:200]}...")


In [None]:
# Test Excel export
print("\n=== Testing Excel Export ===")
export_result = workflow_manager.export_plan(
    session_id=session.session_id,
    format_type="excel"
)

print(f"Export Success: {export_result.get('success', False)}")
print(f"File Path: {export_result.get('file_path', 'No path')}")
print(f"Format: {export_result.get('format', 'Unknown')}")


## 5. MLflow Model Deployment


In [None]:
# Configure MLflow for Unity Catalog
mlflow.set_registry_uri("databricks-uc")

# Create a wrapper class for MLflow deployment
class MigrationPlanningAgentWrapper(mlflow.pyfunc.PythonModel):
    """MLflow wrapper for the Migration Planning Agent System."""
    
    def __init__(self):
        self.workflow_manager = None
        self.conversation_manager = None
    
    def load_context(self, context):
        """Load the agent system when the model is loaded."""
        try:
            # Reconfigure DSPy
            api_key = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()
            workspace_url = dbutils.notebook.entry_point.getDbutils().notebook().getContext().browserHostName().get()
            
            lm = dspy.LM(
                model=CONFIG["llm_model"],
                api_key=api_key,
                api_base=f"https://{workspace_url}/serving-endpoints",
                model_type="chat"
            )
            dspy.configure(lm=lm)
            
            # Initialize components
            from orchestration import ConversationManager, WorkflowManager
            from data_processing import LakehouseStorage
            
            spark = SparkSession.builder.appName("MigrationPlanningAgent").getOrCreate()
            lakehouse_storage = LakehouseStorage(
                spark_session=spark,
                catalog=CONFIG["catalog_name"],
                schema=CONFIG["schema_name"]
            )
            
            self.conversation_manager = ConversationManager(storage_backend=lakehouse_storage)
            self.workflow_manager = WorkflowManager(
                conversation_manager=self.conversation_manager,
                vector_search_endpoint=CONFIG["vector_search_endpoint"],
                vector_search_index=CONFIG["vector_search_index"]
            )
            
            print("Migration Planning Agent loaded successfully")
            
        except Exception as e:
            print(f"Error loading agent: {str(e)}")
            raise e
    
    def predict(self, model_input, params=None):
        """Predict method for MLflow serving."""
        try:
            # Handle different input formats
            if isinstance(model_input, dict):
                if "messages" in model_input:
                    # Chat format
                    user_message = ""
                    for message in model_input["messages"]:
                        if message.get("role") == "user":
                            user_message = message.get("content", "")
                            break
                    
                    # Extract session info
                    session_id = model_input.get("session_id", "default_session")
                    input_type = model_input.get("input_type", "message")
                    
                    # Process with workflow manager
                    response = self.workflow_manager.process_user_input(
                        session_id=session_id,
                        user_input=user_message,
                        input_type=input_type
                    )
                    
                    return {
                        "choices": [
                            {
                                "message": {
                                    "role": "assistant",
                                    "content": response.get("response", "No response")
                                }
                            }
                        ],
                        "session_id": session_id,
                        "phase": response.get("phase", "unknown"),
                        "next_action": response.get("next_action", "continue")
                    }
                
                else:
                    # Direct format
                    session_id = model_input.get("session_id", "default_session")
                    user_input = model_input.get("user_input", "")
                    input_type = model_input.get("input_type", "message")
                    
                    response = self.workflow_manager.process_user_input(
                        session_id=session_id,
                        user_input=user_input,
                        input_type=input_type
                    )
                    
                    return response
            
            # String input
            elif isinstance(model_input, str):
                response = self.workflow_manager.process_user_input(
                    session_id="default_session",
                    user_input=model_input,
                    input_type="message"
                )
                return response
            
            else:
                return {"error": "Invalid input format"}
                
        except Exception as e:
            return {"error": f"Prediction failed: {str(e)}"}

print("MLflow wrapper class defined successfully")


In [None]:
# Deploy the agent system using MLflow
print("Deploying Migration Planning Agent System...")

# Input example for the model
input_example = {
    "messages": [
        {
            "role": "user",
            "content": "We want to migrate our Oracle data warehouse to Databricks. Can you help us create a migration plan?"
        }
    ],
    "session_id": "test_session",
    "input_type": "goal_statement"
}

# Log the model to MLflow
with mlflow.start_run() as run:
    logged_model_info = mlflow.pyfunc.log_model(
        python_model=MigrationPlanningAgentWrapper(),
        artifact_path="migration-planning-agent",
        input_example=input_example,
        pip_requirements=[
            "dspy-ai>=2.4.0",
            "databricks-vectorsearch>=0.1.0",
            "openpyxl>=3.1.0",
            "markitdown>=0.1.0"
        ],
        env_vars={
            "VECTOR_SEARCH_ENDPOINT": CONFIG["vector_search_endpoint"],
            "VECTOR_SEARCH_INDEX": CONFIG["vector_search_index"],
            "CATALOG_NAME": CONFIG["catalog_name"],
            "SCHEMA_NAME": CONFIG["schema_name"]
        }
    )
    
    print(f"✅ Agent logged to MLflow: {logged_model_info.model_uri}")

# Register the model in Unity Catalog
model_name = f"{CONFIG['catalog_name']}.{CONFIG['schema_name']}.migration-planning-agent"
uc_model_info = mlflow.register_model(
    model_uri=logged_model_info.model_uri,
    name=model_name
)

print(f"✅ Agent registered in Unity Catalog: {uc_model_info.name}")
print(f"   Version: {uc_model_info.version}")
print(f"✅ Agent ready for deployment!")
print(f"📊 Model URI: {logged_model_info.model_uri}")
print(f"🏷️ Registered as: {uc_model_info.name}")
print(f"📈 Next step: Deploy via Databricks Model Serving UI")


## 6. System Summary


In [None]:
# System Summary
print("\n" + "="*60)
print("🎯 AGENTIC MIGRATION PLANNING SYSTEM - DEPLOYMENT COMPLETE")
print("="*60)
print(f"📊 Model Registered: {uc_model_info.name}")
print(f"🔢 Version: {uc_model_info.version}")
print(f"🔗 Model URI: {logged_model_info.model_uri}")
print(f"📈 Vector Search Endpoint: {CONFIG['vector_search_endpoint']}")
print(f"🗂️ Vector Search Index: {CONFIG['vector_search_index']}")
print(f"💾 Storage: {CONFIG['catalog_name']}.{CONFIG['schema_name']}")
print("\n🚀 NEXT STEPS:")
print("1. Deploy the model via Databricks Model Serving UI")
print("2. Test the deployed endpoint with sample requests")
print("3. Monitor performance and usage analytics")
print("4. Scale based on usage patterns")
print("\n✨ The system is ready for production use!")
print("="*60)
