In [None]:
# üöÄ PRODUCTION SERVING ENDPOINT - NEW WORKFLOW (CONFIG-DRIVEN)

from databricks.sdk import WorkspaceClient
from databricks.sdk.service.serving import (
    EndpointCoreConfigInput,
    ServedEntityInput
)
import mlflow
from mlflow.tracking import MlflowClient
import time
import sys
import yaml
import traceback
import requests
from typing import Optional, Dict, Tuple
from datetime import datetime

print("=" * 80)
print("üöÄ PRODUCTION SERVING ENDPOINT (NEW WORKFLOW)")
print("=" * 80)

# ‚úÖ LOAD PIPELINE CONFIGURATION

print("\nüìã Step 1: Loading configuration from pipeline_config.yml...")

try:
    import os

    config_path = "/Workspace/Repos/vipultak7171@gmail.com/ml-credit-risk/dev_env/pipeline_config.yml"

    if not os.path.exists(config_path):
        config_path = "/Workspace/ml-credit-risk/dev_env/pipeline_config.yml"

    with open(config_path, "r") as f:
        pipeline_cfg = yaml.safe_load(f)

    print(f"‚úÖ Configuration loaded successfully from: {config_path}")

except FileNotFoundError:
    print("‚ùå ERROR: pipeline_config.yml not found!")
    print("üí° Please ensure pipeline_config.yml is in the notebook directory")
    sys.exit(1)
except Exception as e:
    print(f"‚ùå ERROR loading configuration: {e}")
    traceback.print_exc()
    sys.exit(1)

# ‚úÖ CONFIGURATION CLASS

class Config:
    """Configuration manager - reads from pipeline_config.yml"""
    
    def __init__(self):
        MODEL_TYPE = pipeline_cfg["model"]["type"]
        UC_CATALOG = pipeline_cfg["model"]["catalog"]
        UC_SCHEMA = pipeline_cfg["model"]["schema"]
        BASE_NAME = pipeline_cfg["model"]["base_name"]
        
        self.MODEL_NAME = f"{UC_CATALOG}.{UC_SCHEMA}.{BASE_NAME}_{MODEL_TYPE}"
        self.MODEL_TYPE = MODEL_TYPE
        
        self.ENDPOINT_NAME = f"{BASE_NAME.replace('_', '-')}-{MODEL_TYPE}-prod"
        
        self.PRODUCTION_ALIAS = pipeline_cfg["aliases"]["production"]
        self.STAGING_ALIAS = pipeline_cfg["aliases"]["staging"]
        
        serving_cfg = pipeline_cfg.get("serving", {})
        self.WORKLOAD_SIZE = serving_cfg.get("workload_size", "Small")
        self.SCALE_TO_ZERO = serving_cfg.get("scale_to_zero_enabled", True)
        self.MIN_PROVISIONED_THROUGHPUT = serving_cfg.get("min_provisioned_throughput", 0)
        self.MAX_PROVISIONED_THROUGHPUT = serving_cfg.get("max_provisioned_throughput", 0)
        
        self.WORKLOAD_TYPE = serving_cfg.get("workload_type", "CPU")
        self.ENVIRONMENT_VARS = serving_cfg.get("environment_vars", {})
        
        self.DEPLOYMENT_TIMEOUT = serving_cfg.get("deployment_timeout", 1800)
        self.STATUS_CHECK_INTERVAL = serving_cfg.get("status_check_interval", 15)
        
        self.SLACK_ENABLED = pipeline_cfg["notifications"]["enabled"]
        self.SLACK_WEBHOOK_URL = self._get_slack_webhook()
        
        print(f"\nüìä Configuration Summary:")
        print(f"   Model Type: {self.MODEL_TYPE.upper()}")
        print(f"   Model Name: {self.MODEL_NAME}")
        print(f"   Endpoint Name: {self.ENDPOINT_NAME}")
        print(f"   Production Alias: @{self.PRODUCTION_ALIAS}")
        print(f"   Workload Size: {self.WORKLOAD_SIZE}")
        print(f"   Workload Type: {self.WORKLOAD_TYPE}")
        print(f"   Scale to Zero: {self.SCALE_TO_ZERO}")
        print(f"   Deployment Timeout: {self.DEPLOYMENT_TIMEOUT}s")
        print(f"   Slack: {'ENABLED' if self.SLACK_WEBHOOK_URL else 'DISABLED'}")
    
    def _get_slack_webhook(self) -> Optional[str]:
        """Safely retrieve Slack webhook URL from Databricks secrets"""
        if not self.SLACK_ENABLED:
            print("   ‚ÑπÔ∏è  Slack notifications disabled in config")
            return None
        
        try:
            scopes = ["shared-scope", "dev-scope", "prod-scope", "ml-scope"]
            for scope in scopes:
                try:
                    webhook = dbutils.secrets.get(scope, "SLACK_WEBHOOK_URL")
                    if webhook and webhook.strip():
                        print(f"   ‚úÖ Slack webhook found in scope '{scope}'")
                        return webhook
                except Exception:
                    continue
            
            print("   ‚ÑπÔ∏è  No Slack webhook found in secrets")
            return None
            
        except Exception as e:
            print(f"   ‚ö†Ô∏è  Could not access secrets: {e}")
            return None

config = Config()

print("=" * 80)

# üì¢ SLACK NOTIFICATION HELPER

class SlackNotifier:
    """Slack notification handler"""
    
    def __init__(self, webhook_url: Optional[str]):
        self.webhook_url = webhook_url
        self.enabled = webhook_url is not None and webhook_url.strip() != ""
        
    def send(self, message: str, level: str = "info", extra_fields: Optional[Dict] = None) -> bool:
        """Send Slack notification"""
        if not self.enabled:
            print(f"üì¢ [SLACK DISABLED] {message}")
            return False
        
        emoji_map = {
            "info": "‚ÑπÔ∏è",
            "success": "‚úÖ",
            "warning": "‚ö†Ô∏è",
            "error": "‚ùå",
            "rocket": "üöÄ",
            "gear": "‚öôÔ∏è"
        }
        
        formatted_message = f"{emoji_map.get(level, '‚ÑπÔ∏è')} *{message}*"
        
        if extra_fields:
            formatted_message += "\n"
            for key, value in extra_fields.items():
                formatted_message += f"\n‚Ä¢ *{key}:* {value}"
        
        payload = {
            "text": formatted_message,
            "username": "ML Pipeline Bot",
            "icon_emoji": ":rocket:"
        }
        
        try:
            response = requests.post(
                self.webhook_url,
                json=payload,
                timeout=5
            )
            
            if response.status_code == 200:
                print(f"üì¢ Slack notification sent successfully")
                return True
            else:
                print(f"‚ö†Ô∏è  Slack error: {response.status_code}")
                return False
                
        except Exception as e:
            print(f"‚ùå Slack notification failed: {e}")
            return False
    
    def send_deployment_started(self, endpoint_name: str, model_version: int) -> bool:
        """Send notification when deployment starts"""
        extra = {
            "Endpoint": endpoint_name,
            "Model": config.MODEL_NAME,
            "Version": f"v{model_version}",
            "Workload Size": config.WORKLOAD_SIZE,
            "Status": "Deploying..."
        }
        
        return self.send(
            "Serving Endpoint Deployment Started",
            level="gear",
            extra_fields=extra
        )
    
    def send_deployment_success(self, endpoint_name: str, model_version: int, deployment_time: float) -> bool:
        """Send success notification"""
        extra = {
            "Endpoint": endpoint_name,
            "Model": config.MODEL_NAME,
            "Version": f"v{model_version}",
            "Status": "READY üéâ",
            "Deployment Time": f"{deployment_time:.1f}s",
            "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        }
        
        return self.send(
            "Serving Endpoint Ready for Production",
            level="rocket",
            extra_fields=extra
        )
    
    def send_deployment_failed(self, endpoint_name: str, reason: str) -> bool:
        """Send failure notification"""
        extra = {
            "Endpoint": endpoint_name,
            "Model": config.MODEL_NAME,
            "Reason": reason,
            "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        }
        
        return self.send(
            "Serving Endpoint Deployment Failed",
            level="error",
            extra_fields=extra
        )

# Initialize Slack notifier
slack = SlackNotifier(config.SLACK_WEBHOOK_URL)

# ‚úÖ INITIALIZE CLIENTS

print("\nüîß Step 2: Initializing Databricks clients...")

try:
    w = WorkspaceClient()
    mlflow.set_tracking_uri("databricks")
    mlflow.set_registry_uri("databricks-uc")
    client = MlflowClient()
    
    print("‚úÖ WorkspaceClient initialized")
    print("‚úÖ MLflow client initialized")

except Exception as e:
    print(f"‚ùå Failed to initialize clients: {e}")
    slack.send_deployment_failed(
        config.ENDPOINT_NAME,
        f"Client initialization failed: {e}"
    )
    traceback.print_exc()
    sys.exit(1)

# Send startup notification
slack.send(
    "Serving Endpoint Pipeline Started",
    level="info",
    extra_fields={
        "Model": config.MODEL_NAME,
        "Endpoint": config.ENDPOINT_NAME
    }
)

# üìã STEP 1: GET PRODUCTION MODEL VERSION

def get_production_model_version() -> Optional[int]:
    """Get production model version from Unity Catalog"""
    print(f"\n{'='*70}")
    print("üìã STEP 1: Finding Production Model")
    print(f"{'='*70}")
    
    try:
        print(f"üîç Looking for: {config.MODEL_NAME}@{config.PRODUCTION_ALIAS}")
        
        # Get model version by production alias
        prod_mv = client.get_model_version_by_alias(
            config.MODEL_NAME,
            config.PRODUCTION_ALIAS
        )
        
        version = int(prod_mv.version)
        run_id = prod_mv.run_id
        status = prod_mv.status
        
        print(f"‚úÖ Production model found:")
        print(f"   Version: v{version}")
        print(f"   Run ID: {run_id}")
        print(f"   Status: {status}")
        
        # Get model metadata from tags
        tags = prod_mv.tags
        if tags:
            print(f"\nüìä Model Metadata:")
            for key, value in tags.items():
                if key.startswith("metric_"):
                    metric_name = key.replace("metric_", "")
                    print(f"   {metric_name}: {value}")
        
        return version
        
    except Exception as e:
        print(f"‚ùå No production model found: {e}")
        print(f"üí° Please run production_promotion.py first")
        return None

# üìã STEP 2: CHECK IF ENDPOINT EXISTS

def check_endpoint_exists(endpoint_name: str) -> bool:
    """Check if endpoint already exists"""
    print(f"\n{'='*70}")
    print("üìã STEP 2: Checking Endpoint Status")
    print(f"{'='*70}")
    
    try:
        endpoint = w.serving_endpoints.get(name=endpoint_name)
        print(f"‚úÖ Endpoint exists: {endpoint_name}")
        print(f"   Current state: {endpoint.state}")
        return True
        
    except Exception:
        print(f"‚ÑπÔ∏è  Endpoint does not exist: {endpoint_name}")
        print(f"   Will create new endpoint")
        return False

# üìã STEP 3: DEPLOY OR UPDATE ENDPOINT

def deploy_endpoint(endpoint_name: str, model_name: str, version: int, exists: bool) -> bool:
    """Deploy or update serving endpoint"""
    print(f"\n{'='*70}")
    print("üìã STEP 3: Deploying Serving Endpoint")
    print(f"{'='*70}")
    
    # Configure served entity
    served_entity = ServedEntityInput(
        entity_name=model_name,
        entity_version=str(version),
        workload_size=config.WORKLOAD_SIZE,
        scale_to_zero_enabled=config.SCALE_TO_ZERO
    )
    
    # Add throughput config if specified
    if config.MIN_PROVISIONED_THROUGHPUT > 0:
        served_entity.min_provisioned_throughput = config.MIN_PROVISIONED_THROUGHPUT
    
    if config.MAX_PROVISIONED_THROUGHPUT > 0:
        served_entity.max_provisioned_throughput = config.MAX_PROVISIONED_THROUGHPUT
    
    # Add environment vars if specified
    if config.ENVIRONMENT_VARS:
        served_entity.environment_vars = config.ENVIRONMENT_VARS
    
    try:
        if exists:
            # Update existing endpoint
            print(f"\nüîÑ Updating existing endpoint: {endpoint_name}")
            print(f"   New model version: v{version}")
            
            w.serving_endpoints.update_config(
                name=endpoint_name,
                served_entities=[served_entity]
            )
            
            print(f"‚úÖ Update request submitted successfully")
            
        else:
            # Create new endpoint
            print(f"\nüÜï Creating new endpoint: {endpoint_name}")
            print(f"   Model version: v{version}")
            
            endpoint_config = EndpointCoreConfigInput(
                name=endpoint_name,
                served_entities=[served_entity]
            )
            
            w.serving_endpoints.create(
                name=endpoint_name,
                config=endpoint_config
            )
            
            print(f"‚úÖ Creation request submitted successfully")
        
        # Send Slack notification
        slack.send_deployment_started(endpoint_name, version)
        
        return True
        
    except Exception as e:
        print(f"‚ùå Deployment failed: {e}")
        slack.send_deployment_failed(endpoint_name, str(e))
        traceback.print_exc()
        return False

# üìã STEP 4: WAIT FOR ENDPOINT READY

def wait_for_endpoint_ready(endpoint_name: str, model_version: int) -> Tuple[bool, float]:
    """Wait for endpoint to become ready"""
    print(f"\n{'='*70}")
    print("üìã STEP 4: Waiting for Endpoint Ready Status")
    print(f"{'='*70}")
    print(f"‚è≥ This may take several minutes (up to {config.DEPLOYMENT_TIMEOUT/60:.0f} min)...")
    
    start_time = time.time()
    waited = 0
    
    while waited < config.DEPLOYMENT_TIMEOUT:
        try:
            ep = w.serving_endpoints.get(name=endpoint_name)
            state = ep.state
            
            if state:
                config_update = str(state.config_update) if state.config_update else "UNKNOWN"
                ready = str(state.ready) if state.ready else "UNKNOWN"
                
                # Check for success
                if "NOT_UPDATING" in config_update and "READY" in ready:
                    deployment_time = time.time() - start_time
                    print(f"\n{'='*70}")
                    print("‚úÖ ENDPOINT READY FOR PRODUCTION!")
                    print(f"{'='*70}")
                    print(f"   Deployment Time: {deployment_time:.1f}s")
                    
                    # Send success notification
                    slack.send_deployment_success(endpoint_name, model_version, deployment_time)
                    
                    return True, deployment_time
                
                # Check for failure
                if "FAILED" in config_update:
                    print(f"\n‚ùå Endpoint deployment FAILED")
                    print(f"   Config Update Status: {config_update}")
                    
                    slack.send_deployment_failed(
                        endpoint_name,
                        f"Deployment failed with status: {config_update}"
                    )
                    
                    return False, 0.0
                
                # Show progress
                elapsed = time.time() - start_time
                print(f"‚è≥ {elapsed:.0f}s | Update: {config_update} | Ready: {ready}")
            
            time.sleep(config.STATUS_CHECK_INTERVAL)
            waited += config.STATUS_CHECK_INTERVAL
            
        except Exception as e:
            print(f"‚ö†Ô∏è  Error checking status: {e}")
            time.sleep(config.STATUS_CHECK_INTERVAL)
            waited += config.STATUS_CHECK_INTERVAL
    
    print(f"\n‚ùå Timeout: Endpoint not ready after {config.DEPLOYMENT_TIMEOUT}s")
    
    slack.send_deployment_failed(
        endpoint_name,
        f"Timeout: Endpoint not ready after {config.DEPLOYMENT_TIMEOUT}s"
    )
    
    return False, 0.0

# üìã STEP 5: VERIFY ENDPOINT

def verify_endpoint(endpoint_name: str) -> bool:
    """Verify endpoint is properly configured"""
    print(f"\n{'='*70}")
    print("üìã STEP 5: Verifying Endpoint Configuration")
    print(f"{'='*70}")
    
    try:
        ep = w.serving_endpoints.get(name=endpoint_name)
        
        print(f"‚úÖ Endpoint Details:")
        print(f"   Name: {ep.name}")
        print(f"   State: {ep.state}")
        
        if ep.creator:
            print(f"   Creator: {ep.creator}")
        
        if ep.creation_timestamp:
            print(f"   Creation Time: {ep.creation_timestamp}")
        
        if ep.last_updated_timestamp:
            print(f"   Last Updated: {ep.last_updated_timestamp}")
        
        if ep.config and ep.config.served_entities:
            print(f"\n   Served Entities:")
            for entity in ep.config.served_entities:
                print(f"     ‚Ä¢ Model: {entity.entity_name}")
                print(f"       Version: {entity.entity_version}")
                print(f"       Workload: {entity.workload_size}")
                print(f"       Scale to Zero: {entity.scale_to_zero_enabled}")
        
        return True
        
    except Exception as e:
        print(f"‚ö†Ô∏è  Could not verify endpoint: {e}")
        return False

# üé¨ MAIN EXECUTION

def main():
    """Main serving endpoint deployment pipeline"""
    try:
        print("\n" + "="*80)
        print("üé¨ STARTING SERVING ENDPOINT DEPLOYMENT")
        print("="*80 + "\n")
        
        # Step 1: Get production model version
        version = get_production_model_version()
        if not version:
            error_msg = "No production model found"
            print(f"\n‚ùå {error_msg}")
            print(f"üí° Run production_promotion.py first to promote a model")
            
            slack.send_deployment_failed(
                config.ENDPOINT_NAME,
                error_msg
            )
            
            sys.exit(1)
        
        # Step 2: Check if endpoint exists
        exists = check_endpoint_exists(config.ENDPOINT_NAME)
        
        # Step 3: Deploy or update endpoint
        if not deploy_endpoint(config.ENDPOINT_NAME, config.MODEL_NAME, version, exists):
            error_msg = "Deployment trigger failed"
            print(f"\n‚ùå {error_msg}")
            sys.exit(1)
        
        # Step 4: Wait for endpoint to be ready
        ready, deployment_time = wait_for_endpoint_ready(config.ENDPOINT_NAME, version)
        
        if not ready:
            error_msg = "Endpoint failed to become ready"
            print(f"\n‚ùå {error_msg}")
            sys.exit(1)
        
        # Step 5: Verify endpoint configuration
        verify_endpoint(config.ENDPOINT_NAME)
        
        # Success summary
        print("\n" + "="*80)
        print("‚úÖ‚úÖ SERVING ENDPOINT DEPLOYMENT COMPLETE ‚úÖ‚úÖ")
        print("="*80)
        print(f"\nüìä Deployment Summary:")
        print(f"   Endpoint Name: {config.ENDPOINT_NAME}")
        print(f"   Model: {config.MODEL_NAME}")
        print(f"   Model Type: {config.MODEL_TYPE.upper()}")
        print(f"   Version: v{version}")
        print(f"   Workload Size: {config.WORKLOAD_SIZE}")
        print(f"   Status: READY ‚úÖ")
        print(f"   Deployment Time: {deployment_time:.1f}s")
        
        print(f"\nüìå Next Steps:")
        print(f"   1. Run batch inference script")
        print(f"   2. Monitor endpoint performance")
        print(f"   3. Update API/application to use endpoint")
        
        print(f"\nüîó Endpoint URL:")
        print(f"   https://<workspace-url>/serving-endpoints/{config.ENDPOINT_NAME}/invocations")
        print("="*80 + "\n")
        
        # Save task values for workflow
        try:
            dbutils.jobs.taskValues.set(key="endpoint_name", value=config.ENDPOINT_NAME)
            dbutils.jobs.taskValues.set(key="endpoint_version", value=version)
            dbutils.jobs.taskValues.set(key="deployment_time", value=deployment_time)
            dbutils.jobs.taskValues.set(key="deployment_status", value="SUCCESS")
            print("‚úÖ Task values saved for workflow")
        except:
            print("‚ÑπÔ∏è  Not running in workflow - skipping task values")
        
        sys.exit(0)
        
    except Exception as e:
        print("\n" + "="*80)
        print("‚ùå SERVING ENDPOINT DEPLOYMENT FAILED")
        print("="*80)
        print(f"Error: {str(e)}")
        print("="*80 + "\n")
        
        slack.send_deployment_failed(
            config.ENDPOINT_NAME,
            str(e)
        )
        
        traceback.print_exc()
        sys.exit(1)

# ‚úÖ EXECUTE

if __name__ == "__main__":
    main()